diff options
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r-- | yt_dlp/utils.py | 247 |
1 files changed, 21 insertions, 226 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 14dbbf59f..324b54e78 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -50,7 +50,6 @@ from .compat import ( compat_brotli, compat_chr, compat_cookiejar, - compat_ctypes_WINFUNCTYPE, compat_etree_fromstring, compat_expanduser, compat_html_entities, @@ -288,37 +287,9 @@ def preferredencoding(): def write_json_file(obj, fn): """ Encode obj as JSON and write it to fn, atomically if possible """ - fn = encodeFilename(fn) - if sys.version_info < (3, 0) and sys.platform != 'win32': - encoding = get_filesystem_encoding() - # os.path.basename returns a bytes object, but NamedTemporaryFile - # will fail if the filename contains non ascii characters unless we - # use a unicode object - path_basename = lambda f: os.path.basename(fn).decode(encoding) - # the same for os.path.dirname - path_dirname = lambda f: os.path.dirname(fn).decode(encoding) - else: - path_basename = os.path.basename - path_dirname = os.path.dirname - - args = { - 'suffix': '.tmp', - 'prefix': path_basename(fn) + '.', - 'dir': path_dirname(fn), - 'delete': False, - } - - # In Python 2.x, json.dump expects a bytestream. - # In Python 3.x, it writes to a character stream - if sys.version_info < (3, 0): - args['mode'] = 'wb' - else: - args.update({ - 'mode': 'w', - 'encoding': 'utf-8', - }) - - tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) + tf = tempfile.NamedTemporaryFile( + prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn), + suffix='.tmp', delete=False, mode='w', encoding='utf-8') try: with tf: @@ -345,20 +316,11 @@ def write_json_file(obj, fn): raise -if sys.version_info >= (2, 7): - def find_xpath_attr(node, xpath, key, val=None): - """ Find the xpath xpath[@key=val] """ - assert re.match(r'^[a-zA-Z_-]+$', key) - expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) - return node.find(expr) -else: - def find_xpath_attr(node, xpath, key, val=None): - for f in node.findall(compat_xpath(xpath)): - if key not in f.attrib: - continue - if val is None or f.attrib.get(key) == val: - return f - return None +def find_xpath_attr(node, xpath, key, val=None): + """ Find the xpath xpath[@key=val] """ + assert re.match(r'^[a-zA-Z_-]+$', key) + expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) + return node.find(expr) # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter @@ -626,8 +588,6 @@ def extract_attributes(html_element): 'empty': '', 'noval': None, 'entity': '&', 'sq': '"', 'dq': '\'' }. - NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, - but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() try: @@ -763,8 +723,6 @@ def sanitize_path(s, force=False): if sys.platform == 'win32': force = False drive_or_unc, _ = os.path.splitdrive(s) - if sys.version_info < (2, 7) and not drive_or_unc: - drive_or_unc, _ = os.path.splitunc(s) elif force: drive_or_unc = '' else: @@ -922,51 +880,23 @@ def get_subprocess_encoding(): def encodeFilename(s, for_subprocess=False): - """ - @param s The name of the file - """ - - assert type(s) == compat_str - - # Python 3 has a Unicode API - if sys.version_info >= (3, 0): - return s - - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - return s - - # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible - if sys.platform.startswith('java'): - return s - - return s.encode(get_subprocess_encoding(), 'ignore') + assert type(s) == str + return s def decodeFilename(b, for_subprocess=False): - - if sys.version_info >= (3, 0): - return b - - if not isinstance(b, bytes): - return b - - return b.decode(get_subprocess_encoding(), 'ignore') + return b def encodeArgument(s): - if not isinstance(s, compat_str): - # Legacy code that uses byte strings - # Uncomment the following line after fixing all post processors - # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) - s = s.decode('ascii') - return encodeFilename(s, True) + # Legacy code that uses byte strings + # Uncomment the following line after fixing all post processors + # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) + return s if isinstance(s, str) else s.decode('ascii') def decodeArgument(b): - return decodeFilename(b, True) + return b def decodeOption(optval): @@ -1263,11 +1193,6 @@ class XAttrUnavailableError(YoutubeDLError): def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): - # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting - # expected HTTP responses to meet HTTP/1.0 or later (see also - # https://github.com/ytdl-org/youtube-dl/issues/6727) - if sys.version_info < (3, 0): - kwargs['strict'] = True hc = http_class(*args, **compat_kwargs(kwargs)) source_address = ydl_handler._params.get('source_address') @@ -1309,20 +1234,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): raise socket.error('getaddrinfo returns an empty list') if hasattr(hc, '_create_connection'): hc._create_connection = _create_connection - sa = (source_address, 0) - if hasattr(hc, 'source_address'): # Python 2.7+ - hc.source_address = sa - else: # Python 2.6 - def _hc_connect(self, *args, **kwargs): - sock = _create_connection( - (self.host, self.port), self.timeout, sa) - if is_https: - self.sock = ssl.wrap_socket( - sock, self.key_file, self.cert_file, - ssl_version=ssl.PROTOCOL_TLSv1) - else: - self.sock = sock - hc.connect = functools.partial(_hc_connect, hc) + hc.source_address = (source_address, 0) return hc @@ -1413,11 +1325,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): req.headers = handle_youtubedl_headers(req.headers) - if sys.version_info < (2, 7) and '#' in req.get_full_url(): - # Python 2.6 is brain-dead when it comes to fragments - req._Request__original = req._Request__original.partition('#')[0] - req._Request__r_type = req._Request__r_type.partition('#')[0] - return req def http_response(self, req, resp): @@ -1461,15 +1368,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): location = resp.headers.get('Location') if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 - if sys.version_info >= (3, 0): - location = location.encode('iso-8859-1').decode('utf-8') - else: - location = location.decode('utf-8') + location = location.encode('iso-8859-1').decode('utf-8') location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] - if sys.version_info < (3, 0): - location_escaped = location_escaped.encode('utf-8') resp.headers['Location'] = location_escaped return resp @@ -1668,19 +1570,6 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) def http_response(self, request, response): - # Python 2 will choke on next HTTP request in row if there are non-ASCII - # characters in Set-Cookie HTTP header of last response (see - # https://github.com/ytdl-org/youtube-dl/issues/6769). - # In order to at least prevent crashing we will percent encode Set-Cookie - # header before HTTPCookieProcessor starts processing it. - # if sys.version_info < (3, 0) and response.headers: - # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): - # set_cookie = response.headers.get(set_cookie_header) - # if set_cookie: - # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") - # if set_cookie != set_cookie_escaped: - # del response.headers[set_cookie_header] - # response.headers[set_cookie_header] = set_cookie_escaped return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response) https_request = compat_urllib_request.HTTPCookieProcessor.http_request @@ -1724,12 +1613,6 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): # essentially all clients do redirect in this case, so we do # the same. - # On python 2 urlh.geturl() may sometimes return redirect URL - # as byte string instead of unicode. This workaround allows - # to force it always return unicode. - if sys.version_info[0] < 3: - newurl = compat_str(newurl) - # Be conciliant with URIs containing a space. This is mainly # redundant with the more complete encoding done in http_error_302(), # but it is kept for compatibility with other callers. @@ -2013,91 +1896,12 @@ def get_windows_version(): return None -def _windows_write_string(s, out): - """ Returns True if the string was written using special methods, - False if it has yet to be written out.""" - # Adapted from http://stackoverflow.com/a/3259271/35070 - - import ctypes.wintypes - - WIN_OUTPUT_IDS = { - 1: -11, - 2: -12, - } - - try: - fileno = out.fileno() - except AttributeError: - # If the output stream doesn't have a fileno, it's virtual - return False - except io.UnsupportedOperation: - # Some strange Windows pseudo files? - return False - if fileno not in WIN_OUTPUT_IDS: - return False - - GetStdHandle = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( - ('GetStdHandle', ctypes.windll.kernel32)) - h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) - - WriteConsoleW = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, - ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), - ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32)) - written = ctypes.wintypes.DWORD(0) - - GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32)) - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 - GetConsoleMode = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, - ctypes.POINTER(ctypes.wintypes.DWORD))( - ('GetConsoleMode', ctypes.windll.kernel32)) - INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value - - def not_a_console(handle): - if handle == INVALID_HANDLE_VALUE or handle is None: - return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) - - if not_a_console(h): - return False - - def next_nonbmp_pos(s): - try: - return next(i for i, c in enumerate(s) if ord(c) > 0xffff) - except StopIteration: - return len(s) - - while s: - count = min(next_nonbmp_pos(s), 1024) - - ret = WriteConsoleW( - h, s, count if count else 2, ctypes.byref(written), None) - if ret == 0: - raise OSError('Failed to write string') - if not count: # We just wrote a non-BMP character - assert written.value == 2 - s = s[1:] - else: - assert written.value > 0 - s = s[written.value:] - return True - - def write_string(s, out=None, encoding=None): if out is None: out = sys.stderr assert type(s) == compat_str - if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'): - if _windows_write_string(s, out): - return - - if ('b' in getattr(out, 'mode', '') - or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr + if 'b' in getattr(out, 'mode', ''): byt = s.encode(encoding or preferredencoding(), 'ignore') out.write(byt) elif hasattr(out, 'buffer'): @@ -2985,8 +2789,6 @@ def lowercase_escape(s): def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" - if sys.version_info < (3, 0) and isinstance(s, compat_str): - s = s.encode('utf-8') return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") @@ -3335,12 +3137,7 @@ def args_to_str(args): def error_to_compat_str(err): - err_str = str(err) - # On python 2 error byte string must be decoded with proper - # encoding rather than ascii - if sys.version_info[0] < 3: - err_str = err_str.decode(preferredencoding()) - return err_str + return str(err) def error_to_str(err): @@ -5144,7 +4941,7 @@ def get_executable_path(): from zipimport import zipimporter if hasattr(sys, 'frozen'): # Running from PyInstaller path = os.path.dirname(sys.executable) - elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP + elif isinstance(__loader__, zipimporter): # Running from ZIP path = os.path.join(os.path.dirname(__file__), '../..') else: path = os.path.join(os.path.dirname(__file__), '..') @@ -5436,8 +5233,6 @@ class Config: try: # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 contents = optionf.read() - if sys.version_info < (3,): - contents = contents.decode(preferredencoding()) res = compat_shlex_split(contents, comments=True) finally: optionf.close() |