diff options
1043 files changed, 5159 insertions, 7636 deletions
diff --git a/.gitignore b/.gitignore index 31fdc484b..48cef694f 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,8 @@ updates_key.pem *.egg-info .tox *.class +*.isorted + # Generated AUTHORS README.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6f27a10f7..0ed1eb457 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -98,10 +98,10 @@ If you want to add support for a new site, first of all **make sure** this site After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): -1. [Fork this repository](https://git.conocimientoslibres.ga/software/hypervideo) +1. [Fork this repository](https://c.hgit.ga/software/hypervideo) 2. Check out the source code with: - git clone https://git.conocimientoslibres.ga/software/hypervideo + git clone https://c.hgit.ga/software/hypervideo 3. Start a new git branch with diff --git a/Changelog.md b/Changelog.md index 1caea2bfd..243f3d244 100644 --- a/Changelog.md +++ b/Changelog.md @@ -15,7 +15,7 @@ * Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) * Treat multiple `--match-filters` as OR -* File locking improvevemnts: +* File locking improvements: * Do not lock downloading file on Windows * Do not prevent download if locking is unsupported * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py index 46b4b2ff5..268e8a2ae 100755 --- a/devscripts/bash-completion.py +++ b/devscripts/bash-completion.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import os -from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp BASH_COMPLETION_FILE = "completions/bash/yt-dlp" @@ -26,5 +24,5 @@ def build_completion(opt_parser): f.write(filled_template) -parser = yt_dlp.parseOpts()[0] +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py deleted file mode 100644 index cd544b816..000000000 --- a/devscripts/buildserver.py +++ /dev/null @@ -1,435 +0,0 @@ -# UNUSED - -#!/usr/bin/python3 - -import argparse -import ctypes -import functools -import shutil -import subprocess -import sys -import tempfile -import threading -import traceback -import os.path - -sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__))))) -from yt_dlp.compat import ( - compat_input, - compat_http_server, - compat_str, - compat_urlparse, -) - -# These are not used outside of buildserver.py thus not in compat.py - -try: - import winreg as compat_winreg -except ImportError: # Python 2 - import _winreg as compat_winreg - -try: - import socketserver as compat_socketserver -except ImportError: # Python 2 - import SocketServer as compat_socketserver - - -class BuildHTTPServer(compat_socketserver.ThreadingMixIn, compat_http_server.HTTPServer): - allow_reuse_address = True - - -advapi32 = ctypes.windll.advapi32 - -SC_MANAGER_ALL_ACCESS = 0xf003f -SC_MANAGER_CREATE_SERVICE = 0x02 -SERVICE_WIN32_OWN_PROCESS = 0x10 -SERVICE_AUTO_START = 0x2 -SERVICE_ERROR_NORMAL = 0x1 -DELETE = 0x00010000 -SERVICE_STATUS_START_PENDING = 0x00000002 -SERVICE_STATUS_RUNNING = 0x00000004 -SERVICE_ACCEPT_STOP = 0x1 - -SVCNAME = 'youtubedl_builder' - -LPTSTR = ctypes.c_wchar_p -START_CALLBACK = ctypes.WINFUNCTYPE(None, ctypes.c_int, ctypes.POINTER(LPTSTR)) - - -class SERVICE_TABLE_ENTRY(ctypes.Structure): - _fields_ = [ - ('lpServiceName', LPTSTR), - ('lpServiceProc', START_CALLBACK) - ] - - -HandlerEx = ctypes.WINFUNCTYPE( - ctypes.c_int, # return - ctypes.c_int, # dwControl - ctypes.c_int, # dwEventType - ctypes.c_void_p, # lpEventData, - ctypes.c_void_p, # lpContext, -) - - -def _ctypes_array(c_type, py_array): - ar = (c_type * len(py_array))() - ar[:] = py_array - return ar - - -def win_OpenSCManager(): - res = advapi32.OpenSCManagerW(None, None, SC_MANAGER_ALL_ACCESS) - if not res: - raise Exception('Opening service manager failed - ' - 'are you running this as administrator?') - return res - - -def win_install_service(service_name, cmdline): - manager = win_OpenSCManager() - try: - h = advapi32.CreateServiceW( - manager, service_name, None, - SC_MANAGER_CREATE_SERVICE, SERVICE_WIN32_OWN_PROCESS, - SERVICE_AUTO_START, SERVICE_ERROR_NORMAL, - cmdline, None, None, None, None, None) - if not h: - raise OSError('Service creation failed: %s' % ctypes.FormatError()) - - advapi32.CloseServiceHandle(h) - finally: - advapi32.CloseServiceHandle(manager) - - -def win_uninstall_service(service_name): - manager = win_OpenSCManager() - try: - h = advapi32.OpenServiceW(manager, service_name, DELETE) - if not h: - raise OSError('Could not find service %s: %s' % ( - service_name, ctypes.FormatError())) - - try: - if not advapi32.DeleteService(h): - raise OSError('Deletion failed: %s' % ctypes.FormatError()) - finally: - advapi32.CloseServiceHandle(h) - finally: - advapi32.CloseServiceHandle(manager) - - -def win_service_report_event(service_name, msg, is_error=True): - with open('C:/sshkeys/log', 'a', encoding='utf-8') as f: - f.write(msg + '\n') - - event_log = advapi32.RegisterEventSourceW(None, service_name) - if not event_log: - raise OSError('Could not report event: %s' % ctypes.FormatError()) - - try: - type_id = 0x0001 if is_error else 0x0004 - event_id = 0xc0000000 if is_error else 0x40000000 - lines = _ctypes_array(LPTSTR, [msg]) - - if not advapi32.ReportEventW( - event_log, type_id, 0, event_id, None, len(lines), 0, - lines, None): - raise OSError('Event reporting failed: %s' % ctypes.FormatError()) - finally: - advapi32.DeregisterEventSource(event_log) - - -def win_service_handler(stop_event, *args): - try: - raise ValueError('Handler called with args ' + repr(args)) - TODO - except Exception as e: - tb = traceback.format_exc() - msg = str(e) + '\n' + tb - win_service_report_event(service_name, msg, is_error=True) - raise - - -def win_service_set_status(handle, status_code): - svcStatus = SERVICE_STATUS() - svcStatus.dwServiceType = SERVICE_WIN32_OWN_PROCESS - svcStatus.dwCurrentState = status_code - svcStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP - - svcStatus.dwServiceSpecificExitCode = 0 - - if not advapi32.SetServiceStatus(handle, ctypes.byref(svcStatus)): - raise OSError('SetServiceStatus failed: %r' % ctypes.FormatError()) - - -def win_service_main(service_name, real_main, argc, argv_raw): - try: - # args = [argv_raw[i].value for i in range(argc)] - stop_event = threading.Event() - handler = HandlerEx(functools.partial(stop_event, win_service_handler)) - h = advapi32.RegisterServiceCtrlHandlerExW(service_name, handler, None) - if not h: - raise OSError('Handler registration failed: %s' % - ctypes.FormatError()) - - TODO - except Exception as e: - tb = traceback.format_exc() - msg = str(e) + '\n' + tb - win_service_report_event(service_name, msg, is_error=True) - raise - - -def win_service_start(service_name, real_main): - try: - cb = START_CALLBACK( - functools.partial(win_service_main, service_name, real_main)) - dispatch_table = _ctypes_array(SERVICE_TABLE_ENTRY, [ - SERVICE_TABLE_ENTRY( - service_name, - cb - ), - SERVICE_TABLE_ENTRY(None, ctypes.cast(None, START_CALLBACK)) - ]) - - if not advapi32.StartServiceCtrlDispatcherW(dispatch_table): - raise OSError('ctypes start failed: %s' % ctypes.FormatError()) - except Exception as e: - tb = traceback.format_exc() - msg = str(e) + '\n' + tb - win_service_report_event(service_name, msg, is_error=True) - raise - - -def main(args=None): - parser = argparse.ArgumentParser() - parser.add_argument('-i', '--install', - action='store_const', dest='action', const='install', - help='Launch at Windows startup') - parser.add_argument('-u', '--uninstall', - action='store_const', dest='action', const='uninstall', - help='Remove Windows service') - parser.add_argument('-s', '--service', - action='store_const', dest='action', const='service', - help='Run as a Windows service') - parser.add_argument('-b', '--bind', metavar='<host:port>', - action='store', default='0.0.0.0:8142', - help='Bind to host:port (default %default)') - options = parser.parse_args(args=args) - - if options.action == 'install': - fn = os.path.abspath(__file__).replace('v:', '\\\\vboxsrv\\vbox') - cmdline = '%s %s -s -b %s' % (sys.executable, fn, options.bind) - win_install_service(SVCNAME, cmdline) - return - - if options.action == 'uninstall': - win_uninstall_service(SVCNAME) - return - - if options.action == 'service': - win_service_start(SVCNAME, main) - return - - host, port_str = options.bind.split(':') - port = int(port_str) - - print('Listening on %s:%d' % (host, port)) - srv = BuildHTTPServer((host, port), BuildHTTPRequestHandler) - thr = threading.Thread(target=srv.serve_forever) - thr.start() - compat_input('Press ENTER to shut down') - srv.shutdown() - thr.join() - - -def rmtree(path): - for name in os.listdir(path): - fname = os.path.join(path, name) - if os.path.isdir(fname): - rmtree(fname) - else: - os.chmod(fname, 0o666) - os.remove(fname) - os.rmdir(path) - - -class BuildError(Exception): - def __init__(self, output, code=500): - self.output = output - self.code = code - - def __str__(self): - return self.output - - -class HTTPError(BuildError): - pass - - -class PythonBuilder(object): - def __init__(self, **kwargs): - python_version = kwargs.pop('python', '3.4') - python_path = None - for node in ('Wow6432Node\\', ''): - try: - key = compat_winreg.OpenKey( - compat_winreg.HKEY_LOCAL_MACHINE, - r'SOFTWARE\%sPython\PythonCore\%s\InstallPath' % (node, python_version)) - try: - python_path, _ = compat_winreg.QueryValueEx(key, '') - finally: - compat_winreg.CloseKey(key) - break - except Exception: - pass - - if not python_path: - raise BuildError('No such Python version: %s' % python_version) - - self.pythonPath = python_path - - super(PythonBuilder, self).__init__(**kwargs) - - -class GITInfoBuilder(object): - def __init__(self, **kwargs): - try: - self.user, self.repoName = kwargs['path'][:2] - self.rev = kwargs.pop('rev') - except ValueError: - raise BuildError('Invalid path') - except KeyError as e: - raise BuildError('Missing mandatory parameter "%s"' % e.args[0]) - - path = os.path.join(os.environ['APPDATA'], 'Build archive', self.repoName, self.user) - if not os.path.exists(path): - os.makedirs(path) - self.basePath = tempfile.mkdtemp(dir=path) - self.buildPath = os.path.join(self.basePath, 'build') - - super(GITInfoBuilder, self).__init__(**kwargs) - - -class GITBuilder(GITInfoBuilder): - def build(self): - try: - subprocess.check_output(['git', 'clone', 'git://github.com/%s/%s.git' % (self.user, self.repoName), self.buildPath]) - subprocess.check_output(['git', 'checkout', self.rev], cwd=self.buildPath) - except subprocess.CalledProcessError as e: - raise BuildError(e.output) - - super(GITBuilder, self).build() - - -class YoutubeDLBuilder(object): - authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org'] - - def __init__(self, **kwargs): - if self.repoName != 'yt-dlp': - raise BuildError('Invalid repository "%s"' % self.repoName) - if self.user not in self.authorizedUsers: - raise HTTPError('Unauthorized user "%s"' % self.user, 401) - - super(YoutubeDLBuilder, self).__init__(**kwargs) - - def build(self): - try: - proc = subprocess.Popen([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], stdin=subprocess.PIPE, cwd=self.buildPath) - proc.wait() - #subprocess.check_output([os.path.join(self.pythonPath, 'python.exe'), 'setup.py', 'py2exe'], - # cwd=self.buildPath) - except subprocess.CalledProcessError as e: - raise BuildError(e.output) - - super(YoutubeDLBuilder, self).build() - - -class DownloadBuilder(object): - def __init__(self, **kwargs): - self.handler = kwargs.pop('handler') - self.srcPath = os.path.join(self.buildPath, *tuple(kwargs['path'][2:])) - self.srcPath = os.path.abspath(os.path.normpath(self.srcPath)) - if not self.srcPath.startswith(self.buildPath): - raise HTTPError(self.srcPath, 401) - - super(DownloadBuilder, self).__init__(**kwargs) - - def build(self): - if not os.path.exists(self.srcPath): - raise HTTPError('No such file', 404) - if os.path.isdir(self.srcPath): - raise HTTPError('Is a directory: %s' % self.srcPath, 401) - - self.handler.send_response(200) - self.handler.send_header('Content-Type', 'application/octet-stream') - self.handler.send_header('Content-Disposition', 'attachment; filename=%s' % os.path.split(self.srcPath)[-1]) - self.handler.send_header('Content-Length', str(os.stat(self.srcPath).st_size)) - self.handler.end_headers() - - with open(self.srcPath, 'rb') as src: - shutil.copyfileobj(src, self.handler.wfile) - - super(DownloadBuilder, self).build() - - -class CleanupTempDir(object): - def build(self): - try: - rmtree(self.basePath) - except Exception as e: - print('WARNING deleting "%s": %s' % (self.basePath, e)) - - super(CleanupTempDir, self).build() - - -class Null(object): - def __init__(self, **kwargs): - pass - - def start(self): - pass - - def close(self): - pass - - def build(self): - pass - - -class Builder(PythonBuilder, GITBuilder, YoutubeDLBuilder, DownloadBuilder, CleanupTempDir, Null): - pass - - -class BuildHTTPRequestHandler(compat_http_server.BaseHTTPRequestHandler): - actionDict = {'build': Builder, 'download': Builder} # They're the same, no more caching. - - def do_GET(self): - path = compat_urlparse.urlparse(self.path) - paramDict = dict([(key, value[0]) for key, value in compat_urlparse.parse_qs(path.query).items()]) - action, _, path = path.path.strip('/').partition('/') - if path: - path = path.split('/') - if action in self.actionDict: - try: - builder = self.actionDict[action](path=path, handler=self, **paramDict) - builder.start() - try: - builder.build() - finally: - builder.close() - except BuildError as e: - self.send_response(e.code) - msg = compat_str(e).encode('UTF-8') - self.send_header('Content-Type', 'text/plain; charset=UTF-8') - self.send_header('Content-Length', len(msg)) - self.end_headers() - self.wfile.write(msg) - else: - self.send_response(500, 'Unknown build method "%s"' % action) - else: - self.send_response(500, 'Malformed URL') - -if __name__ == '__main__': - main() diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py index 50f6bebc6..08f663e4b 100644 --- a/devscripts/check-porn.py +++ b/devscripts/check-porn.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - """ This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check if we are not 'age_limit' tagging some porn site @@ -12,11 +10,12 @@ pass the list filename as the only argument # Allow direct execution import os import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import gettestcases -from yt_dlp.utils import compat_urllib_parse_urlparse -from yt_dlp.utils import compat_urllib_request + +from yt_dlp.utils import compat_urllib_parse_urlparse, compat_urllib_request if len(sys.argv) > 1: METHOD = 'LIST' @@ -29,7 +28,7 @@ for test in gettestcases(): try: webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read() except Exception: - print('\nFail: {0}'.format(test['name'])) + print('\nFail: {}'.format(test['name'])) continue webpage = webpage.decode('utf8', 'replace') @@ -39,7 +38,7 @@ for test in gettestcases(): elif METHOD == 'LIST': domain = compat_urllib_parse_urlparse(test['url']).netloc if not domain: - print('\nFail: {0}'.format(test['name'])) + print('\nFail: {}'.format(test['name'])) continue domain = '.'.join(domain.split('.')[-2:]) @@ -47,11 +46,11 @@ for test in gettestcases(): if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or test['info_dict']['age_limit'] != 18): - print('\nPotential missing age_limit check: {0}'.format(test['name'])) + print('\nPotential missing age_limit check: {}'.format(test['name'])) elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and test['info_dict']['age_limit'] == 18): - print('\nPotential false negative: {0}'.format(test['name'])) + print('\nPotential false negative: {}'.format(test['name'])) else: sys.stdout.write('.') diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py index fb45e0280..d9c0048e2 100755 --- a/devscripts/fish-completion.py +++ b/devscripts/fish-completion.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import optparse import os -from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp from yt_dlp.utils import shell_quote @@ -46,5 +44,5 @@ def build_completion(opt_parser): f.write(filled_template) -parser = yt_dlp.parseOpts()[0] +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py index 0979eee5b..c7d83f1a7 100644 --- a/devscripts/generate_aes_testdata.py +++ b/devscripts/generate_aes_testdata.py @@ -1,15 +1,13 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import codecs -import subprocess - import os +import subprocess import sys + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from yt_dlp.utils import intlist_to_bytes from yt_dlp.aes import aes_encrypt, key_expansion +from yt_dlp.utils import intlist_to_bytes secret_msg = b'Secret message goes here' diff --git a/devscripts/lazy_load_template.py b/devscripts/lazy_load_template.py index da89e070d..cdafaf1ef 100644 --- a/devscripts/lazy_load_template.py +++ b/devscripts/lazy_load_template.py @@ -1,31 +1,33 @@ -# coding: utf-8 +import importlib +import random import re -from ..utils import bug_reports_message, write_string +from ..utils import ( + age_restricted, + bug_reports_message, + classproperty, + write_string, +) class LazyLoadMetaClass(type): def __getattr__(cls, name): - if '_real_class' not in cls.__dict__: + # "_TESTS" bloat the lazy_extractors + if '_real_class' not in cls.__dict__ and name != 'get_testcases': write_string( - f'WARNING: Falling back to normal extractor since lazy extractor ' - f'{cls.__name__} does not have attribute {name}{bug_reports_message()}') - return getattr(cls._get_real_class(), name) + 'WARNING: Falling back to normal extractor since lazy extractor ' + f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') + return getattr(cls.real_class, name) class LazyLoadExtractor(metaclass=LazyLoadMetaClass): - _module = None - _WORKING = True - - @classmethod - def _get_real_class(cls): + @classproperty + def real_class(cls): if '_real_class' not in cls.__dict__: - mod = __import__(cls._module, fromlist=(cls.__name__,)) - cls._real_class = getattr(mod, cls.__name__) + cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__) return cls._real_class def __new__(cls, *args, **kwargs): - real_cls = cls._get_real_class() - instance = real_cls.__new__(real_cls) + instance = cls.real_class.__new__(cls.real_class) instance.__init__(*args, **kwargs) return instance diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py index 8b3670d85..361e17d8c 100755 --- a/devscripts/make_contributing.py +++ b/devscripts/make_contributing.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - -import io import optparse import re @@ -16,7 +13,7 @@ def main(): infile, outfile = args - with io.open(infile, encoding='utf-8') as inf: + with open(infile, encoding='utf-8') as inf: readme = inf.read() bug_text = re.search( @@ -26,7 +23,7 @@ def main(): out = bug_text + dev_text - with io.open(outfile, 'w', encoding='utf-8') as outf: + with open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index b58fb85e3..8c481bc2d 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,105 +1,125 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals, print_function - -from inspect import getsource -import io import os -from os.path import dirname as dirn +import optparse import sys +from inspect import getsource -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) - -lazy_extractors_filename = sys.argv[1] if len(sys.argv) > 1 else 'yt_dlp/extractor/lazy_extractors.py' -if os.path.exists(lazy_extractors_filename): - os.remove(lazy_extractors_filename) - -# Block plugins from loading -plugins_dirname = 'ytdlp_plugins' -plugins_blocked_dirname = 'ytdlp_plugins_blocked' -if os.path.exists(plugins_dirname): - os.rename(plugins_dirname, plugins_blocked_dirname) - -from yt_dlp.extractor import _ALL_CLASSES -from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor - -if os.path.exists(plugins_blocked_dirname): - os.rename(plugins_blocked_dirname, plugins_dirname) - -with open('devscripts/lazy_load_template.py', 'rt') as f: - module_template = f.read() +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -CLASS_PROPERTIES = ['ie_key', 'working', '_match_valid_url', 'suitable', '_match_id', 'get_temp_id'] -module_contents = [ - module_template, - *[getsource(getattr(InfoExtractor, k)) for k in CLASS_PROPERTIES], - '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n'] -ie_template = ''' +NO_ATTR = object() +STATIC_CLASS_PROPERTIES = ['IE_NAME', 'IE_DESC', 'SEARCH_KEY', '_WORKING', '_NETRC_MACHINE', 'age_limit'] +CLASS_METHODS = [ + 'ie_key', 'working', 'description', 'suitable', '_match_valid_url', '_match_id', 'get_temp_id', 'is_suitable' +] +IE_TEMPLATE = ''' class {name}({bases}): - _module = '{module}' + _module = {module!r} ''' - - -def get_base_name(base): - if base is InfoExtractor: - return 'LazyLoadExtractor' - elif base is SearchInfoExtractor: - return 'LazyLoadSearchExtractor' - else: - return base.__name__ - - -def build_lazy_ie(ie, name): - s = ie_template.format( - name=name, - bases=', '.join(map(get_base_name, ie.__bases__)), - module=ie.__module__) +with open('devscripts/lazy_load_template.py', encoding='utf-8') as f: + MODULE_TEMPLATE = f.read() + + +def main(): + parser = optparse.OptionParser(usage='%prog [OUTFILE.py]') + args = parser.parse_args()[1] or ['yt_dlp/extractor/lazy_extractors.py'] + if len(args) != 1: + parser.error('Expected only an output filename') + + lazy_extractors_filename = args[0] + if os.path.exists(lazy_extractors_filename): + os.remove(lazy_extractors_filename) + + _ALL_CLASSES = get_all_ies() # Must be before import + + from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor + + DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) + module_src = '\n'.join(( + MODULE_TEMPLATE, + ' _module = None', + *extra_ie_code(DummyInfoExtractor), + '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', + *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor), + )) + + with open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: + f.write(f'{module_src}\n') + + +def get_all_ies(): + PLUGINS_DIRNAME = 'ytdlp_plugins' + BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' + if os.path.exists(PLUGINS_DIRNAME): + os.rename(PLUGINS_DIRNAME, BLOCKED_DIRNAME) + try: + from yt_dlp.extractor import _ALL_CLASSES + finally: + if os.path.exists(BLOCKED_DIRNAME): + os.rename(BLOCKED_DIRNAME, PLUGINS_DIRNAME) + return _ALL_CLASSES + + +def extra_ie_code(ie, base=None): + for var in STATIC_CLASS_PROPERTIES: + val = getattr(ie, var) + if val != (getattr(base, var) if base else NO_ATTR): + yield f' {var} = {val!r}' + yield '' + + for name in CLASS_METHODS: + f = getattr(ie, name) + if not base or f.__func__ != getattr(base, name).__func__: + yield getsource(f) + + +def build_ies(ies, bases, attr_base): + names = [] + for ie in sort_ies(ies, bases): + yield build_lazy_ie(ie, ie.__name__, attr_base) + if ie in ies: + names.append(ie.__name__) + + yield f'\n_ALL_CLASSES = [{", ".join(names)}]' + + +def sort_ies(ies, ignored_bases): + """find the correct sorting and add the required base classes so that subclasses can be correctly created""" + classes, returned_classes = ies[:-1], set() + assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE' + while classes: + for c in classes[:]: + bases = set(c.__bases__) - {object, *ignored_bases} + restart = False + for b in bases: + if b not in classes and b not in returned_classes: + assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE' + classes.insert(0, b) + restart = True + if restart: + break + if bases <= returned_classes: + yield c + returned_classes.add(c) + classes.remove(c) + break + yield ies[-1] + + +def build_lazy_ie(ie, name, attr_base): + bases = ', '.join({ + 'InfoExtractor': 'LazyLoadExtractor', + 'SearchInfoExtractor': 'LazyLoadSearchExtractor', + }.get(base.__name__, base.__name__) for base in ie.__bases__) + + s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases) valid_url = getattr(ie, '_VALID_URL', None) if not valid_url and hasattr(ie, '_make_valid_url'): valid_url = ie._make_valid_url() if valid_url: s += f' _VALID_URL = {valid_url!r}\n' - if not ie._WORKING: - s += ' _WORKING = False\n' - if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: - s += f'\n{getsource(ie.suitable)}' - return s - - -# find the correct sorting and add the required base classes so that subclasses -# can be correctly created -classes = _ALL_CLASSES[:-1] -ordered_cls = [] -while classes: - for c in classes[:]: - bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) - stop = False - for b in bases: - if b not in classes and b not in ordered_cls: - if b.__name__ == 'GenericIE': - exit() - classes.insert(0, b) - stop = True - if stop: - break - if all(b in ordered_cls for b in bases): - ordered_cls.append(c) - classes.remove(c) - break -ordered_cls.append(_ALL_CLASSES[-1]) - -names = [] -for ie in ordered_cls: - name = ie.__name__ - src = build_lazy_ie(ie, name) - module_contents.append(src) - if ie in _ALL_CLASSES: - names.append(name) - -module_contents.append( - '\n_ALL_CLASSES = [{0}]'.format(', '.join(names))) - -module_src = '\n'.join(module_contents) + '\n' - -with io.open(lazy_extractors_filename, 'wt', encoding='utf-8') as f: - f.write(module_src) + return s + '\n'.join(extra_ie_code(ie, attr_base)) + + +if __name__ == '__main__': + main() diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py index 47d6d27b7..fd234bf58 100755..100644 --- a/devscripts/make_readme.py +++ b/devscripts/make_readme.py @@ -2,30 +2,29 @@ # yt-dlp --help | make_readme.py # This must be run in a console of correct width - -from __future__ import unicode_literals - -import io -import sys import re +import sys README_FILE = 'README.md' -helptext = sys.stdin.read() +OPTIONS_START = 'General Options:' +OPTIONS_END = 'CONFIGURATION' +EPILOG_START = 'See full documentation' + + +helptext = sys.stdin.read() if isinstance(helptext, bytes): - helptext = helptext.decode('utf-8') + helptext = helptext.decode() -with io.open(README_FILE, encoding='utf-8') as f: - oldreadme = f.read() +start, end = helptext.index(f'\n {OPTIONS_START}'), helptext.index(f'\n{EPILOG_START}') +options = re.sub(r'(?m)^ (\w.+)$', r'## \1', helptext[start + 1: end + 1]) -header = oldreadme[:oldreadme.index('# OPTIONS')] -footer = oldreadme[oldreadme.index('# CONFIGURATION'):] +with open(README_FILE, encoding='utf-8') as f: + readme = f.read() -options = helptext[helptext.index(' General Options:') + 19:] -options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options) -options = '# OPTIONS\n' + options + '\n' +header = readme[:readme.index(f'## {OPTIONS_START}')] +footer = readme[readme.index(f'# {OPTIONS_END}'):] -with io.open(README_FILE, 'w', encoding='utf-8') as f: - f.write(header) - f.write(options) - f.write(footer) +with open(README_FILE, 'w', encoding='utf-8') as f: + for part in (header, options, footer): + f.write(part) diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py index 729f60a0e..d8c53c5e1 100644 --- a/devscripts/make_supportedsites.py +++ b/devscripts/make_supportedsites.py @@ -1,47 +1,23 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - -import io import optparse import os import sys +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Import yt_dlp -ROOT_DIR = os.path.join(os.path.dirname(__file__), '..') -sys.path.insert(0, ROOT_DIR) -import yt_dlp +from yt_dlp.extractor import list_extractor_classes def main(): parser = optparse.OptionParser(usage='%prog OUTFILE.md') - options, args = parser.parse_args() + _, args = parser.parse_args() if len(args) != 1: parser.error('Expected an output filename') - outfile, = args - - def gen_ies_md(ies): - for ie in ies: - ie_md = '**{0}**'.format(ie.IE_NAME) - if ie.IE_DESC is False: - continue - if ie.IE_DESC is not None: - ie_md += ': {0}'.format(ie.IE_DESC) - search_key = getattr(ie, 'SEARCH_KEY', None) - if search_key is not None: - ie_md += f'; "{ie.SEARCH_KEY}:" prefix' - if not ie.working(): - ie_md += ' (Currently broken)' - yield ie_md - - ies = sorted(yt_dlp.gen_extractors(), key=lambda i: i.IE_NAME.lower()) - out = '# Supported sites\n' + ''.join( - ' - ' + md + '\n' - for md in gen_ies_md(ies)) - - with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(out) + out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) + + with open(args[0], 'w', encoding='utf-8') as outf: + outf.write(f'# Supported sites\n{out}\n') if __name__ == '__main__': diff --git a/devscripts/posix-locale.sh b/devscripts/posix-locale.sh deleted file mode 100755 index 0aa7a592d..000000000 --- a/devscripts/posix-locale.sh +++ /dev/null @@ -1,6 +0,0 @@ - -# source this file in your shell to get a POSIX locale (which will break many programs, but that's kind of the point) - -export LC_ALL=POSIX -export LANG=POSIX -export LANGUAGE=POSIX diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index b763d2d9a..df9abe5ae 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - -import io import optparse import os.path import re @@ -32,14 +29,14 @@ def main(): outfile, = args - with io.open(README_FILE, encoding='utf-8') as f: + with open(README_FILE, encoding='utf-8') as f: readme = f.read() readme = filter_excluded_sections(readme) readme = move_sections(readme) readme = filter_options(readme) - with io.open(outfile, 'w', encoding='utf-8') as outf: + with open(outfile, 'w', encoding='utf-8') as outf: outf.write(PREFIX + readme) diff --git a/devscripts/run_tests.bat b/devscripts/run_tests.bat index b8bb393d9..190d23918 100644 --- a/devscripts/run_tests.bat +++ b/devscripts/run_tests.bat @@ -13,4 +13,5 @@ if ["%~1"]==[""] ( exit /b 1 ) +set PYTHONWARNINGS=error pytest %test_set% diff --git a/devscripts/run_tests.sh b/devscripts/run_tests.sh index c9a75ba00..e9904ae35 100755 --- a/devscripts/run_tests.sh +++ b/devscripts/run_tests.sh @@ -11,4 +11,4 @@ else exit 1 fi -python3 -m pytest "$test_set" +python3 -bb -Werror -m pytest "$test_set" diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py index 780df0de6..59faea06a 100755 --- a/devscripts/zsh-completion.py +++ b/devscripts/zsh-completion.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import os -from os.path import dirname as dirn import sys -sys.path.insert(0, dirn(dirn((os.path.abspath(__file__))))) +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + import yt_dlp ZSH_COMPLETION_FILE = "completions/zsh/_yt-dlp" @@ -45,5 +43,5 @@ def build_completion(opt_parser): f.write(template) -parser = yt_dlp.parseOpts()[0] +parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) @@ -1,32 +1,41 @@ #!/usr/bin/env python3 -# coding: utf-8 import os import platform import sys -from PyInstaller.utils.hooks import collect_submodules +from PyInstaller.__main__ import run as run_pyinstaller OS_NAME = platform.system() if OS_NAME == 'Windows': from PyInstaller.utils.win32.versioninfo import ( - VarStruct, VarFileInfo, StringStruct, StringTable, - StringFileInfo, FixedFileInfo, VSVersionInfo, SetVersion, + FixedFileInfo, + SetVersion, + StringFileInfo, + StringStruct, + StringTable, + VarFileInfo, + VarStruct, + VSVersionInfo, ) elif OS_NAME == 'Darwin': pass else: - raise Exception('{OS_NAME} is not supported') + raise Exception(f'{OS_NAME} is not supported') ARCH = platform.architecture()[0][:2] def main(): opts = parse_options() - version = read_version() + version = read_version('yt_dlp/version.py') - suffix = '_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '' - final_file = 'dist/%syt-dlp%s%s' % ( - 'yt-dlp/' if '--onedir' in opts else '', suffix, '.exe' if OS_NAME == 'Windows' else '') + onedir = '--onedir' in opts or '-D' in opts + if not onedir and '-F' not in opts and '--onefile' not in opts: + opts.append('--onefile') + + name = 'yt-dlp%s' % ('_macos' if OS_NAME == 'Darwin' else '_x86' if ARCH == '32' else '') + final_file = ''.join(( + 'dist/', f'{name}/' if onedir else '', name, '.exe' if OS_NAME == 'Windows' else '')) print(f'Building yt-dlp v{version} {ARCH}bit for {OS_NAME} with options {opts}') print('Remember to update the version using "devscripts/update-version.py"') @@ -36,20 +45,20 @@ def main(): print(f'Destination: {final_file}\n') opts = [ - f'--name=yt-dlp{suffix}', + f'--name={name}', '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', '--noconfirm', + # NB: Modules that are only imported dynamically must be added here. + # --collect-submodules may not work correctly if user has a yt-dlp installed via PIP + '--hidden-import=yt_dlp.compat._legacy', *dependency_options(), *opts, 'yt_dlp/__main__.py', ] - print(f'Running PyInstaller with {opts}') - - import PyInstaller.__main__ - - PyInstaller.__main__.run(opts) + print(f'Running PyInstaller with {opts}') + run_pyinstaller(opts) set_version_info(final_file, version) @@ -60,12 +69,14 @@ def parse_options(): if ARCH != opts[0]: raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') opts = opts[1:] - return opts or ['--onefile'] + return opts -def read_version(): - exec(compile(open('yt_dlp/version.py').read(), 'yt_dlp/version.py', 'exec')) - return locals()['__version__'] +# Get the version from yt_dlp/version.py without importing the package +def read_version(fname): + with open(fname, encoding='utf-8') as f: + exec(compile(f.read(), fname, 'exec')) + return locals()['__version__'] def version_to_list(version): @@ -74,10 +85,12 @@ def version_to_list(version): def dependency_options(): - dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi'] + collect_submodules('websockets') - excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] + # Due to the current implementation, these are auto-detected, but explicitly add them just in case + dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi', 'websockets'] + excluded_modules = ['test', 'ytdlp_plugins', 'youtube_dl', 'youtube_dlc'] yield from (f'--hidden-import={module}' for module in dependencies) + yield '--collect-submodules=websockets' yield from (f'--exclude-module={module}' for module in excluded_modules) @@ -2,5 +2,5 @@ universal = True [flake8] -exclude = yt_dlp/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv,devscripts/create-github-release.py,devscripts/release.sh,devscripts/show-downloads-statistics.py -ignore = E402,E501,E731,E741,W503
\ No newline at end of file +exclude = devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv +ignore = E402,E501,E731,E741,W503 diff --git a/supportedsites.md b/supportedsites.md index eac7842a3..7663c09d4 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -24,12 +24,12 @@ - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** - - **AbemaTV** + - **AbemaTV**: [<abbr title="netrc machine"><em>abematv</em></abbr>] - **AbemaTVTitle** - **AcademicEarth:Course** - **acast** - **acast:channel** - - **ADN**: Anime Digital Network + - **ADN**: [<abbr title="netrc machine"><em>animedigitalnetwork</em></abbr>] Anime Digital Network - **AdobeConnect** - **adobetv** - **adobetv:channel** @@ -40,8 +40,8 @@ - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **aenetworks:collection** - **aenetworks:show** - - **afreecatv**: afreecatv.com - - **afreecatv:live**: afreecatv.com + - **afreecatv**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com + - **afreecatv:live**: [<abbr title="netrc machine"><em>afreecatv</em></abbr>] afreecatv.com - **afreecatv:user** - **AirMozilla** - **AliExpressLive** @@ -50,8 +50,8 @@ - **AlphaPorno** - **Alsace20TV** - **Alsace20TVEmbed** - - **Alura** - - **AluraCourse** + - **Alura**: [<abbr title="netrc machine"><em>alura</em></abbr>] + - **AluraCourse**: [<abbr title="netrc machine"><em>aluracourse</em></abbr>] - **Amara** - **AmazonStore** - **AMCNetworks** @@ -60,9 +60,9 @@ - **AmHistoryChannel** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **AnimalPlanet** - - **AnimeLab** - - **AnimeLabShows** - - **AnimeOnDemand** + - **AnimeLab**: [<abbr title="netrc machine"><em>animelab</em></abbr>] + - **AnimeLabShows**: [<abbr title="netrc machine"><em>animelab</em></abbr>] + - **AnimeOnDemand**: [<abbr title="netrc machine"><em>animeondemand</em></abbr>] - **ant1newsgr:article**: ant1news.gr articles - **ant1newsgr:embed**: ant1news.gr embedded videos - **ant1newsgr:watch**: ant1news.gr videos @@ -88,7 +88,7 @@ - **ArteTVPlaylist** - **AsianCrush** - **AsianCrushPlaylist** - - **AtresPlayer** + - **AtresPlayer**: [<abbr title="netrc machine"><em>atresplayer</em></abbr>] - **ATTTechChannel** - **ATVAt** - **AudiMedia** @@ -114,13 +114,13 @@ - **Bandcamp:weekly** - **bangumi.bilibili.com**: BiliBili็ชๅง - **BannedVideo** - - **bbc**: BBC - - **bbc.co.uk**: BBC iPlayer + - **bbc**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC + - **bbc.co.uk**: [<abbr title="netrc machine"><em>bbc</em></abbr>] BBC iPlayer - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:iplayer:episodes** - **bbc.co.uk:iplayer:group** - **bbc.co.uk:playlist** - - **BBVTV** + - **BBVTV**: [<abbr title="netrc machine"><em>bbvtv</em></abbr>] - **Beatport** - **Beeg** - **BehindKink** @@ -142,8 +142,9 @@ - **BilibiliChannel** - **BiliBiliPlayer** - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix - - **BiliIntl** - - **BiliIntlSeries** + - **BiliIntl**: [<abbr title="netrc machine"><em>biliintl</em></abbr>] + - **BiliIntlSeries**: [<abbr title="netrc machine"><em>biliintl</em></abbr>] + - **BiliLive** - **BioBioChileTV** - **Biography** - **BIQLE** @@ -252,10 +253,10 @@ - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** - - **crunchyroll** - - **crunchyroll:beta** - - **crunchyroll:playlist** - - **crunchyroll:playlist:beta** + - **crunchyroll**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] + - **crunchyroll:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] + - **crunchyroll:playlist**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] + - **crunchyroll:playlist:beta**: [<abbr title="netrc machine"><em>crunchyroll</em></abbr>] - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: ่ฏ่ฆๆฐ่ @@ -263,18 +264,18 @@ - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - **CultureUnplugged** - - **curiositystream** - - **curiositystream:collections** - - **curiositystream:series** + - **curiositystream**: [<abbr title="netrc machine"><em>curiositystream</em></abbr>] + - **curiositystream:collections**: [<abbr title="netrc machine"><em>curiositystream</em></abbr>] + - **curiositystream:series**: [<abbr title="netrc machine"><em>curiositystream</em></abbr>] - **CWTV** - - **Cybrary** - - **CybraryCourse** + - **Cybrary**: [<abbr title="netrc machine"><em>cybrary</em></abbr>] + - **CybraryCourse**: [<abbr title="netrc machine"><em>cybrary</em></abbr>] - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** - - **dailymotion** - - **dailymotion:playlist** - - **dailymotion:user** + - **dailymotion**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>] + - **dailymotion:playlist**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>] + - **dailymotion:user**: [<abbr title="netrc machine"><em>dailymotion</em></abbr>] - **damtomo:record** - **damtomo:video** - **daum.net** @@ -291,7 +292,7 @@ - **DestinationAmerica** - **DHM**: Filmarchiv - Deutsches Historisches Museum - **Digg** - - **DigitalConcertHall**: DigitalConcertHall extractor + - **DigitalConcertHall**: [<abbr title="netrc machine"><em>digitalconcerthall</em></abbr>] DigitalConcertHall extractor - **DigitallySpeaking** - **Digiteka** - **Discovery** @@ -315,7 +316,7 @@ - **DRBonanza** - **Drooble** - **Dropbox** - - **Dropout** + - **Dropout**: [<abbr title="netrc machine"><em>dropout</em></abbr>] - **DropoutSeason** - **DrTuber** - **drtv** @@ -334,7 +335,7 @@ - **egghead:lesson**: egghead.io lesson - **ehftv** - **eHow** - - **EinsUndEinsTV** + - **EinsUndEinsTV**: [<abbr title="netrc machine"><em>1und1tv</em></abbr>] - **Einthusan** - **eitb.tv** - **EllenTube** @@ -348,7 +349,7 @@ - **Epicon** - **EpiconSeries** - **Eporner** - - **EroProfile** + - **EroProfile**: [<abbr title="netrc machine"><em>eroprofile</em></abbr>] - **EroProfile:album** - **ertflix**: ERTFLIX videos - **ertflix:codename**: ERTFLIX videos by codename @@ -361,17 +362,17 @@ - **Europa** - **EuropeanTour** - **EUScreen** - - **EWETV** + - **EWETV**: [<abbr title="netrc machine"><em>ewetv</em></abbr>] - **ExpoTV** - **Expressen** - **ExtremeTube** - **EyedoTV** - - **facebook** + - **facebook**: [<abbr title="netrc machine"><em>facebook</em></abbr>] - **FacebookPluginsVideo** - - **fancode:live** - - **fancode:vod** + - **fancode:live**: [<abbr title="netrc machine"><em>fancode</em></abbr>] + - **fancode:vod**: [<abbr title="netrc machine"><em>fancode</em></abbr>] - **faz.net** - - **fc2** + - **fc2**: [<abbr title="netrc machine"><em>fc2</em></abbr>] - **fc2:embed** - **fc2:live** - **Fczenit** @@ -401,19 +402,19 @@ - **FranceTVSite** - **Freesound** - **freespeech.org** - - **FrontendMasters** - - **FrontendMastersCourse** - - **FrontendMastersLesson** + - **FrontendMasters**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>] + - **FrontendMastersCourse**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>] + - **FrontendMastersLesson**: [<abbr title="netrc machine"><em>frontendmasters</em></abbr>] - **FujiTVFODPlus7** - - **Funimation** - - **funimation:page** - - **funimation:show** + - **Funimation**: [<abbr title="netrc machine"><em>funimation</em></abbr>] + - **funimation:page**: [<abbr title="netrc machine"><em>funimation</em></abbr>] + - **funimation:show**: [<abbr title="netrc machine"><em>funimation</em></abbr>] - **Funk** - **Fusion** - **Fux** - **Gab** - **GabTV** - - **Gaia** + - **Gaia**: [<abbr title="netrc machine"><em>gaia</em></abbr>] - **GameInformer** - **GameJolt** - **GameJoltCommunity** @@ -425,20 +426,19 @@ - **GameStar** - **Gaskrank** - **Gazeta** - - **GDCVault** + - **GDCVault**: [<abbr title="netrc machine"><em>gdcvault</em></abbr>] - **GediDigital** - - **gem.cbc.ca** + - **gem.cbc.ca**: [<abbr title="netrc machine"><em>cbcgem</em></abbr>] - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - - **generic**: Generic downloader that works on some sites - **Gettr** - **GettrStreaming** - **Gfycat** - **GiantBomb** - **Giga** - - **GlattvisionTV** + - **GlattvisionTV**: [<abbr title="netrc machine"><em>glattvisiontv</em></abbr>] - **Glide**: Glide mobile video messages (glide.me) - - **Globo** + - **Globo**: [<abbr title="netrc machine"><em>globo</em></abbr>] - **GloboArticle** - **glomex**: Glomex videos - **glomex:embed**: Glomex embedded videos @@ -466,7 +466,7 @@ - **hgtv.com:show** - **HGTVDe** - **HGTVUsa** - - **HiDive** + - **HiDive**: [<abbr title="netrc machine"><em>hidive</em></abbr>] - **HistoricFilms** - **history:player** - **history:topic**: History.com Topic @@ -481,8 +481,8 @@ - **Howcast** - **HowStuffWorks** - **hrfernsehen** - - **HRTi** - - **HRTiPlaylist** + - **HRTi**: [<abbr title="netrc machine"><em>hrti</em></abbr>] + - **HRTiPlaylist**: [<abbr title="netrc machine"><em>hrti</em></abbr>] - **HSEProduct** - **HSEShow** - **Huajiao**: ่ฑๆค็ดๆญ @@ -506,19 +506,19 @@ - **Inc** - **IndavideoEmbed** - **InfoQ** - - **Instagram** - - **instagram:story** - - **instagram:tag**: Instagram hashtag search URLs - - **instagram:user**: Instagram user profile + - **Instagram**: [<abbr title="netrc machine"><em>instagram</em></abbr>] + - **instagram:story**: [<abbr title="netrc machine"><em>instagram</em></abbr>] + - **instagram:tag**: [<abbr title="netrc machine"><em>instagram</em></abbr>] Instagram hashtag search URLs + - **instagram:user**: [<abbr title="netrc machine"><em>instagram</em></abbr>] Instagram user profile - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **InvestigationDiscovery** - - **IPrima** + - **IPrima**: [<abbr title="netrc machine"><em>iprima</em></abbr>] - **IPrimaCNN** - **iq.com**: International version of iQiyi - **iq.com:album** - - **iqiyi**: ็ฑๅฅ่บ + - **iqiyi**: [<abbr title="netrc machine"><em>iqiyi</em></abbr>] ็ฑๅฅ่บ - **ITProTV** - **ITProTVCourse** - **ITTF** @@ -576,9 +576,9 @@ - **LcpPlay** - **Le**: ไน่ง็ฝ - **Lecture2Go** - - **Lecturio** - - **LecturioCourse** - - **LecturioDeCourse** + - **Lecturio**: [<abbr title="netrc machine"><em>lecturio</em></abbr>] + - **LecturioCourse**: [<abbr title="netrc machine"><em>lecturio</em></abbr>] + - **LecturioDeCourse**: [<abbr title="netrc machine"><em>lecturio</em></abbr>] - **LEGO** - **Lemonde** - **Lenta** @@ -592,10 +592,10 @@ - **limelight:channel_list** - **LineLive** - **LineLiveChannel** - - **LinkedIn** - - **linkedin:learning** - - **linkedin:learning:course** - - **LinuxAcademy** + - **LinkedIn**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] + - **linkedin:learning**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] + - **linkedin:learning:course**: [<abbr title="netrc machine"><em>linkedin</em></abbr>] + - **LinuxAcademy**: [<abbr title="netrc machine"><em>linuxacademy</em></abbr>] - **LiTV** - **LiveJournal** - **livestream** @@ -606,8 +606,8 @@ - **LocalNews8** - **LoveHomePorn** - **lrt.lt** - - **lynda**: lynda.com videos - - **lynda:course**: lynda.com online courses + - **lynda**: [<abbr title="netrc machine"><em>lynda</em></abbr>] lynda.com videos + - **lynda:course**: [<abbr title="netrc machine"><em>lynda</em></abbr>] lynda.com online courses - **m6** - **MagentaMusik360** - **mailru**: ะะธะดะตะพ@Mail.Ru @@ -674,7 +674,7 @@ - **MLBVideo** - **MLSSoccer** - **Mnet** - - **MNetTV** + - **MNetTV**: [<abbr title="netrc machine"><em>mnettv</em></abbr>] - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - **Mofosex** - **MofosexEmbed** @@ -746,8 +746,8 @@ - **ndr:embed** - **ndr:embed:base** - **NDTV** - - **Nebula** - - **nebula:collection** + - **Nebula**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] + - **nebula:collection**: [<abbr title="netrc machine"><em>watchnebula</em></abbr>] - **NerdCubedFeed** - **netease:album**: ็ฝๆไบ้ณไน - ไธ่พ - **netease:djradio**: ็ฝๆไบ้ณไน - ็ตๅฐ @@ -756,7 +756,7 @@ - **netease:program**: ็ฝๆไบ้ณไน - ็ตๅฐ่็ฎ - **netease:singer**: ็ฝๆไบ้ณไน - ๆญๆ - **netease:song**: ็ฝๆไบ้ณไน - - **NetPlus** + - **NetPlus**: [<abbr title="netrc machine"><em>netplus</em></abbr>] - **Netzkino** - **Newgrounds** - **Newgrounds:playlist** @@ -770,8 +770,8 @@ - **NexxEmbed** - **NFB** - **NFHSNetwork** - - **nfl.com** (Currently broken) - - **nfl.com:article** (Currently broken) + - **nfl.com**: (**Currently broken**) + - **nfl.com:article**: (**Currently broken**) - **NhkForSchoolBangumi** - **NhkForSchoolProgramList** - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, ๅฝ่ช) or math (sansuu/suugaku or ็ฎๆฐใปๆฐๅญฆ) @@ -783,7 +783,7 @@ - **nickelodeon:br** - **nickelodeonru** - **nicknight** - - **niconico**: ใใณใใณๅ็ป + - **niconico**: [<abbr title="netrc machine"><em>niconico</em></abbr>] ใใณใใณๅ็ป - **niconico:history**: NicoNico user history. Requires cookies. - **niconico:playlist** - **niconico:series** @@ -796,7 +796,7 @@ - **Nitter** - **njoy**: N-JOY - **njoy:embed** - - **NJPWWorld**: ๆฐๆฅๆฌใใญใฌในใฏใผใซใ + - **NJPWWorld**: [<abbr title="netrc machine"><em>njpwworld</em></abbr>] ๆฐๆฅๆฌใใญใฌในใฏใผใซใ - **NobelPrize** - **NonkTube** - **NoodleMagazine** @@ -869,9 +869,9 @@ - **orf:tvthek**: ORF TVthek - **orf:vorarlberg**: Radio Vorarlberg - **orf:wien**: Radio Wien - - **OsnatelTV** + - **OsnatelTV**: [<abbr title="netrc machine"><em>osnateltv</em></abbr>] - **OutsideTV** - - **PacktPub** + - **PacktPub**: [<abbr title="netrc machine"><em>packtpub</em></abbr>] - **PacktPubCourse** - **PalcoMP3:artist** - **PalcoMP3:song** @@ -893,7 +893,7 @@ - **peer.tv** - **PeerTube** - **PeerTube:Playlist** - - **peloton** + - **peloton**: [<abbr title="netrc machine"><em>peloton</em></abbr>] - **peloton:live**: Peloton Live - **People** - **PerformGroup** @@ -902,7 +902,7 @@ - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** - - **Piapro** + - **Piapro**: [<abbr title="netrc machine"><em>piapro</em></abbr>] - **Picarto** - **PicartoVod** - **Piksel** @@ -913,26 +913,26 @@ - **pixiv:sketch:user** - **Pladform** - **PlanetMarathi** - - **Platzi** - - **PlatziCourse** + - **Platzi**: [<abbr title="netrc machine"><em>platzi</em></abbr>] + - **PlatziCourse**: [<abbr title="netrc machine"><em>platzi</em></abbr>] - **play.fm** - **player.sky.it** - - **PlayPlusTV** + - **PlayPlusTV**: [<abbr title="netrc machine"><em>playplustv</em></abbr>] - **PlayStuff** - **PlaysTV** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **Playvid** - **PlayVids** - **Playwire** - - **pluralsight** + - **pluralsight**: [<abbr title="netrc machine"><em>pluralsight</em></abbr>] - **pluralsight:course** - **PlutoTV** - **podomatic** - **Pokemon** - **PokemonSoundLibrary** - **PokemonWatch** - - **PokerGo** - - **PokerGoCollection** + - **PokerGo**: [<abbr title="netrc machine"><em>pokergo</em></abbr>] + - **PokerGoCollection**: [<abbr title="netrc machine"><em>pokergo</em></abbr>] - **PolsatGo** - **PolskieRadio** - **polskieradio:kierowcow** @@ -947,11 +947,11 @@ - **Pornez** - **PornFlip** - **PornHd** - - **PornHub**: PornHub and Thumbzilla - - **PornHubPagedVideoList** - - **PornHubPlaylist** - - **PornHubUser** - - **PornHubUserVideosUpload** + - **PornHub**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] PornHub and Thumbzilla + - **PornHubPagedVideoList**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] + - **PornHubPlaylist**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] + - **PornHubUser**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] + - **PornHubUserVideosUpload**: [<abbr title="netrc machine"><em>pornhub</em></abbr>] - **Pornotube** - **PornoVoisines** - **PornoXO** @@ -973,7 +973,7 @@ - **qqmusic:playlist**: QQ้ณไน - ๆญๅ - **qqmusic:singer**: QQ้ณไน - ๆญๆ - **qqmusic:toplist**: QQ้ณไน - ๆ่กๆฆ - - **QuantumTV** + - **QuantumTV**: [<abbr title="netrc machine"><em>quantumtv</em></abbr>] - **Qub** - **R7** - **R7Article** @@ -1029,8 +1029,8 @@ - **Rokfin** - **rokfin:channel** - **rokfin:stack** - - **RoosterTeeth** - - **RoosterTeethSeries** + - **RoosterTeeth**: [<abbr title="netrc machine"><em>roosterteeth</em></abbr>] + - **RoosterTeethSeries**: [<abbr title="netrc machine"><em>roosterteeth</em></abbr>] - **RottenTomatoes** - **Rozhlas** - **RTBF** @@ -1069,12 +1069,12 @@ - **Ruutu** - **Ruv** - **ruv.is:spila** - - **safari**: safaribooksonline.com online video - - **safari:api** - - **safari:course**: safaribooksonline.com online courses + - **safari**: [<abbr title="netrc machine"><em>safari</em></abbr>] safaribooksonline.com online video + - **safari:api**: [<abbr title="netrc machine"><em>safari</em></abbr>] + - **safari:course**: [<abbr title="netrc machine"><em>safari</em></abbr>] safaribooksonline.com online courses - **Saitosan** - - **SAKTV** - - **SaltTV** + - **SAKTV**: [<abbr title="netrc machine"><em>saktv</em></abbr>] + - **SaltTV**: [<abbr title="netrc machine"><em>salttv</em></abbr>] - **SampleFocus** - **Sapo**: SAPO Vรญdeos - **savefrom.net** @@ -1086,8 +1086,8 @@ - **ScreencastOMatic** - **ScrippsNetworks** - **scrippsnetworks:watch** - - **SCTE** - - **SCTECourse** + - **SCTE**: [<abbr title="netrc machine"><em>scte</em></abbr>] + - **SCTECourse**: [<abbr title="netrc machine"><em>scte</em></abbr>] - **Seeker** - **SenateGov** - **SenateISVP** @@ -1096,7 +1096,7 @@ - **Sexu** - **SeznamZpravy** - **SeznamZpravyArticle** - - **Shahid** + - **Shahid**: [<abbr title="netrc machine"><em>shahid</em></abbr>] - **ShahidShow** - **Shared**: shared.sx - **ShemarooMe** @@ -1121,15 +1121,15 @@ - **Slutload** - **Snotr** - **Sohu** - - **SonyLIV** + - **SonyLIV**: [<abbr title="netrc machine"><em>sonyliv</em></abbr>] - **SonyLIVSeries** - - **soundcloud** - - **soundcloud:playlist** - - **soundcloud:related** - - **soundcloud:search**: Soundcloud search; "scsearch:" prefix - - **soundcloud:set** - - **soundcloud:trackstation** - - **soundcloud:user** + - **soundcloud**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] + - **soundcloud:playlist**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] + - **soundcloud:related**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] + - **soundcloud:search**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] Soundcloud search; "scsearch:" prefix + - **soundcloud:set**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] + - **soundcloud:trackstation**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] + - **soundcloud:user**: [<abbr title="netrc machine"><em>soundcloud</em></abbr>] - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** @@ -1147,8 +1147,8 @@ - **Sport5** - **SportBox** - **SportDeutschland** - - **spotify** - - **spotify:show** + - **spotify**: Spotify episodes + - **spotify:show**: Spotify shows - **Spreaker** - **SpreakerPage** - **SpreakerShow** @@ -1190,13 +1190,13 @@ - **Tass** - **TBS** - **TDSLifeway** - - **Teachable** - - **TeachableCourse** + - **Teachable**: [<abbr title="netrc machine"><em>teachable</em></abbr>] + - **TeachableCourse**: [<abbr title="netrc machine"><em>teachable</em></abbr>] - **teachertube**: teachertube.com videos - **teachertube:user:collection**: teachertube.com user and collection videos - **TeachingChannel** - **Teamcoco** - - **TeamTreeHouse** + - **TeamTreeHouse**: [<abbr title="netrc machine"><em>teamtreehouse</em></abbr>] - **TechTalks** - **techtv.mit.edu** - **TedEmbed** @@ -1218,8 +1218,8 @@ - **TeleQuebecVideo** - **TeleTask** - **Telewebion** - - **TennisTV** - - **TenPlay** + - **TennisTV**: [<abbr title="netrc machine"><em>tennistv</em></abbr>] + - **TenPlay**: [<abbr title="netrc machine"><em>10play</em></abbr>] - **TF1** - **TFO** - **TheIntercept** @@ -1250,10 +1250,10 @@ - **Tokentube** - **Tokentube:channel** - **ToonGoggles** - - **tou.tv** + - **tou.tv**: [<abbr title="netrc machine"><em>toutv</em></abbr>] - **Toypics**: Toypics video - **ToypicsUser**: Toypics user profile - - **TrailerAddict** (Currently broken) + - **TrailerAddict**: (**Currently broken**) - **TravelChannel** - **Trilulilu** - **Trovo** @@ -1264,9 +1264,9 @@ - **TruNews** - **TruTV** - **Tube8** - - **TubiTv** + - **TubiTv**: [<abbr title="netrc machine"><em>tubitv</em></abbr>] - **TubiTvShow** - - **Tumblr** + - **Tumblr**: [<abbr title="netrc machine"><em>tumblr</em></abbr>] - **tunein:clip** - **tunein:program** - **tunein:station** @@ -1314,23 +1314,23 @@ - **TwitCasting** - **TwitCastingLive** - **TwitCastingUser** - - **twitch:clips** - - **twitch:stream** - - **twitch:vod** - - **TwitchCollection** - - **TwitchVideos** - - **TwitchVideosClips** - - **TwitchVideosCollections** + - **twitch:clips**: [<abbr title="netrc machine"><em>twitch</em></abbr>] + - **twitch:stream**: [<abbr title="netrc machine"><em>twitch</em></abbr>] + - **twitch:vod**: [<abbr title="netrc machine"><em>twitch</em></abbr>] + - **TwitchCollection**: [<abbr title="netrc machine"><em>twitch</em></abbr>] + - **TwitchVideos**: [<abbr title="netrc machine"><em>twitch</em></abbr>] + - **TwitchVideosClips**: [<abbr title="netrc machine"><em>twitch</em></abbr>] + - **TwitchVideosCollections**: [<abbr title="netrc machine"><em>twitch</em></abbr>] - **twitter** - **twitter:amplify** - **twitter:broadcast** - **twitter:card** - **twitter:shortener** - - **udemy** - - **udemy:course** + - **udemy**: [<abbr title="netrc machine"><em>udemy</em></abbr>] + - **udemy:course**: [<abbr title="netrc machine"><em>udemy</em></abbr>] - **UDNEmbed**: ่ฏๅๅฝฑ้ณ - - **UFCArabia** - - **UFCTV** + - **UFCArabia**: [<abbr title="netrc machine"><em>ufcarabia</em></abbr>] + - **UFCTV**: [<abbr title="netrc machine"><em>ufctv</em></abbr>] - **ukcolumn** - **UKTVPlay** - **umg:de**: Universal Music Deutschland @@ -1358,7 +1358,7 @@ - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - - **vhx:embed** + - **vhx:embed**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] - **Viafree** - **vice** - **vice:article** @@ -1378,26 +1378,26 @@ - **videomore:season** - **videomore:video** - **VideoPress** - - **Vidio** - - **VidioLive** - - **VidioPremier** + - **Vidio**: [<abbr title="netrc machine"><em>vidio</em></abbr>] + - **VidioLive**: [<abbr title="netrc machine"><em>vidio</em></abbr>] + - **VidioPremier**: [<abbr title="netrc machine"><em>vidio</em></abbr>] - **VidLii** - - **vier**: vier.be and vijf.be + - **vier**: [<abbr title="netrc machine"><em>vier</em></abbr>] vier.be and vijf.be - **vier:videos** - **viewlift** - **viewlift:embed** - **Viidea** - - **viki** - - **viki:channel** - - **vimeo** - - **vimeo:album** - - **vimeo:channel** - - **vimeo:group** - - **vimeo:likes**: Vimeo user likes - - **vimeo:ondemand** - - **vimeo:review**: Review pages on vimeo - - **vimeo:user** - - **vimeo:watchlater**: Vimeo watch later list, "vimeowatchlater" keyword (requires authentication) + - **viki**: [<abbr title="netrc machine"><em>viki</em></abbr>] + - **viki:channel**: [<abbr title="netrc machine"><em>viki</em></abbr>] + - **vimeo**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] + - **vimeo:album**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] + - **vimeo:channel**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] + - **vimeo:group**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] + - **vimeo:likes**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo user likes + - **vimeo:ondemand**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] + - **vimeo:review**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Review pages on vimeo + - **vimeo:user**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] + - **vimeo:watchlater**: [<abbr title="netrc machine"><em>vimeo</em></abbr>] Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) - **Vimm:recording** - **Vimm:stream** - **Vimple**: Vimple - one-click video hosting @@ -1405,15 +1405,15 @@ - **vine:user** - **Viqeo** - **Viu** - - **viu:ott** + - **viu:ott**: [<abbr title="netrc machine"><em>viu</em></abbr>] - **viu:playlist** - **Vivo**: vivo.sx - - **vk**: VK - - **vk:uservideos**: VK - User's Videos - - **vk:wallpost** - - **vlive** - - **vlive:channel** - - **vlive:post** + - **vk**: [<abbr title="netrc machine"><em>vk</em></abbr>] VK + - **vk:uservideos**: [<abbr title="netrc machine"><em>vk</em></abbr>] VK - User's Videos + - **vk:wallpost**: [<abbr title="netrc machine"><em>vk</em></abbr>] + - **vlive**: [<abbr title="netrc machine"><em>vlive</em></abbr>] + - **vlive:channel**: [<abbr title="netrc machine"><em>vlive</em></abbr>] + - **vlive:post**: [<abbr title="netrc machine"><em>vlive</em></abbr>] - **vm.tiktok** - **Vodlocker** - **VODPl** @@ -1428,12 +1428,12 @@ - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Vrak** - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - - **VrtNU**: VrtNU.be - - **vrv** + - **VrtNU**: [<abbr title="netrc machine"><em>vrtnu</em></abbr>] VrtNU.be + - **vrv**: [<abbr title="netrc machine"><em>vrv</em></abbr>] - **vrv:series** - **VShare** - **VTM** - - **VTXTV** + - **VTXTV**: [<abbr title="netrc machine"><em>vtxtv</em></abbr>] - **VuClip** - **Vupload** - **VVVVID** @@ -1442,7 +1442,7 @@ - **Vzaar** - **Wakanim** - **Walla** - - **WalyTV** + - **WalyTV**: [<abbr title="netrc machine"><em>walytv</em></abbr>] - **wasdtv:clip** - **wasdtv:record** - **wasdtv:stream** @@ -1452,7 +1452,7 @@ - **WatchBox** - **WatchIndianPorn**: Watch Indian Porn - **WDR** - - **wdr:mobile** (Currently broken) + - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix @@ -1537,13 +1537,13 @@ - **YoutubeLivestreamEmbed**: YouTube livestream embeds - **YoutubeYtBe**: youtu.be - **Zapiks** - - **Zattoo** - - **ZattooLive** - - **ZattooMovies** - - **ZattooRecordings** + - **Zattoo**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] + - **ZattooLive**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] + - **ZattooMovies**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] + - **ZattooRecordings**: [<abbr title="netrc machine"><em>zattoo</em></abbr>] - **ZDF** - **ZDFChannel** - - **Zee5** + - **Zee5**: [<abbr title="netrc machine"><em>zee5</em></abbr>] - **zee5:series** - **ZenYandex** - **ZenYandexChannel** @@ -1552,3 +1552,4 @@ - **zingmp3:album** - **zoom** - **Zype** + - **generic**: Generic downloader that works on some sites diff --git a/test/helper.py b/test/helper.py index 28c21b2eb..2333ace98 100644 --- a/test/helper.py +++ b/test/helper.py @@ -1,26 +1,16 @@ -from __future__ import unicode_literals - import errno -import io import hashlib import json import os.path import re -import types import ssl import sys +import types import yt_dlp.extractor from yt_dlp import YoutubeDL -from yt_dlp.compat import ( - compat_os_name, - compat_str, -) -from yt_dlp.utils import ( - preferredencoding, - write_string, -) - +from yt_dlp.compat import compat_os_name, compat_str +from yt_dlp.utils import preferredencoding, write_string if 'pytest' in sys.modules: import pytest @@ -35,10 +25,10 @@ def get_params(override=None): 'parameters.json') LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'local_parameters.json') - with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: + with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): - with io.open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: + with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) @@ -63,8 +53,8 @@ def report_warning(message): _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' - output = '%s %s\n' % (_msg_header, message) - if 'b' in getattr(sys.stderr, 'mode', '') or sys.version_info[0] < 3: + output = f'{_msg_header} {message}\n' + if 'b' in getattr(sys.stderr, 'mode', ''): output = output.encode(preferredencoding()) sys.stderr.write(output) @@ -74,7 +64,7 @@ class FakeYDL(YoutubeDL): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. params = get_params(override=override) - super(FakeYDL, self).__init__(params, auto_init=False) + super().__init__(params, auto_init=False) self.result = [] def to_screen(self, s, skip_eol=None): @@ -99,11 +89,10 @@ class FakeYDL(YoutubeDL): def gettestcases(include_onlymatching=False): for ie in yt_dlp.extractor.gen_extractors(): - for tc in ie.get_testcases(include_onlymatching): - yield tc + yield from ie.get_testcases(include_onlymatching) -md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() +md5 = lambda s: hashlib.md5(s.encode()).hexdigest() def expect_value(self, got, expected, field): @@ -113,33 +102,30 @@ def expect_value(self, got, expected, field): self.assertTrue( isinstance(got, compat_str), - 'Expected a %s object, but got %s for field %s' % ( - compat_str.__name__, type(got).__name__, field)) + f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( match_rex.match(got), - 'field %s (value: %r) should match %r' % (field, got, match_str)) + f'field {field} (value: {got!r}) should match {match_str!r}') elif isinstance(expected, compat_str) and expected.startswith('startswith:'): start_str = expected[len('startswith:'):] self.assertTrue( isinstance(got, compat_str), - 'Expected a %s object, but got %s for field %s' % ( - compat_str.__name__, type(got).__name__, field)) + f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( got.startswith(start_str), - 'field %s (value: %r) should start with %r' % (field, got, start_str)) + f'field {field} (value: {got!r}) should start with {start_str!r}') elif isinstance(expected, compat_str) and expected.startswith('contains:'): contains_str = expected[len('contains:'):] self.assertTrue( isinstance(got, compat_str), - 'Expected a %s object, but got %s for field %s' % ( - compat_str.__name__, type(got).__name__, field)) + f'Expected a {compat_str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( contains_str in got, - 'field %s (value: %r) should contain %r' % (field, got, contains_str)) + f'field {field} (value: {got!r}) should contain {contains_str!r}') elif isinstance(expected, type): self.assertTrue( isinstance(got, expected), - 'Expected type %r for field %s, but got value %r of type %r' % (expected, field, got, type(got))) + f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}') elif isinstance(expected, dict) and isinstance(got, dict): expect_dict(self, got, expected) elif isinstance(expected, list) and isinstance(got, list): @@ -159,13 +145,12 @@ def expect_value(self, got, expected, field): if isinstance(expected, compat_str) and expected.startswith('md5:'): self.assertTrue( isinstance(got, compat_str), - 'Expected field %s to be a unicode object, but got value %r of type %r' % (field, got, type(got))) + f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}') got = 'md5:' + md5(got) elif isinstance(expected, compat_str) and re.match(r'^(?:min|max)?count:\d+', expected): self.assertTrue( isinstance(got, (list, dict)), - 'Expected field %s to be a list or a dict, but it is of type %s' % ( - field, type(got).__name__)) + f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}') op, _, expected_num = expected.partition(':') expected_num = int(expected_num) if op == 'mincount': @@ -185,7 +170,7 @@ def expect_value(self, got, expected, field): return self.assertEqual( expected, got, - 'Invalid value for field %s, expected %r, got %r' % (field, expected, got)) + f'Invalid value for field {field}, expected {expected!r}, got {got!r}') def expect_dict(self, got_dict, expected_dict): @@ -260,13 +245,13 @@ def expect_info_dict(self, got_dict, expected_dict): info_dict_str = '' if len(missing_keys) != len(expected_dict): info_dict_str += ''.join( - ' %s: %s,\n' % (_repr(k), _repr(v)) + f' {_repr(k)}: {_repr(v)},\n' for k, v in test_info_dict.items() if k not in missing_keys) if info_dict_str: info_dict_str += '\n' info_dict_str += ''.join( - ' %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) + f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) write_string( '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) @@ -295,21 +280,21 @@ def assertRegexpMatches(self, text, regexp, msg=None): def assertGreaterEqual(self, got, expected, msg=None): if not (got >= expected): if msg is None: - msg = '%r not greater than or equal to %r' % (got, expected) + msg = f'{got!r} not greater than or equal to {expected!r}' self.assertTrue(got >= expected, msg) def assertLessEqual(self, got, expected, msg=None): if not (got <= expected): if msg is None: - msg = '%r not less than or equal to %r' % (got, expected) + msg = f'{got!r} not less than or equal to {expected!r}' self.assertTrue(got <= expected, msg) def assertEqual(self, got, expected, msg=None): if not (got == expected): if msg is None: - msg = '%r not equal to %r' % (got, expected) + msg = f'{got!r} not equal to {expected!r}' self.assertTrue(got == expected, msg) diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index 866ded243..257ea7dd3 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1,21 +1,23 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution -import io import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import threading from test.helper import FakeYDL, expect_dict, expect_value, http_server_port + from yt_dlp.compat import compat_etree_fromstring, compat_http_server -from yt_dlp.extractor.common import InfoExtractor from yt_dlp.extractor import YoutubeIE, get_info_extractor -from yt_dlp.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError -import threading - +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.utils import ( + ExtractorError, + RegexNotFoundError, + encode_data_uri, + strip_jsonp, +) TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" @@ -1011,8 +1013,7 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: - with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) @@ -1357,10 +1358,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: - with io.open('./test/testdata/mpd/%s.mpd' % mpd_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) @@ -1549,10 +1549,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: - with io.open('./test/testdata/ism/%s.Manifest' % ism_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( - compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url) + compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) @@ -1576,10 +1575,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: - with io.open('./test/testdata/f4m/%s.f4m' % f4m_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), f4m_url, None) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) @@ -1624,10 +1622,9 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: - with io.open('./test/testdata/xspf/%s.xspf' % xspf_file, - mode='r', encoding='utf-8') as f: + with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f: entries = self.ie._parse_xspf( - compat_etree_fromstring(f.read().encode('utf-8')), + compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index c9108c5b6..1133f6165 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -1,31 +1,38 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import copy import json - from test.helper import FakeYDL, assertRegexpMatches + from yt_dlp import YoutubeDL -from yt_dlp.compat import compat_os_name, compat_setenv, compat_str, compat_urllib_error +from yt_dlp.compat import ( + compat_os_name, + compat_setenv, + compat_str, + compat_urllib_error, +) from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor -from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList +from yt_dlp.utils import ( + ExtractorError, + LazyList, + int_or_none, + match_filter_func, +) TEST_URL = 'http://localhost/sample.mp4' class YDL(FakeYDL): def __init__(self, *args, **kwargs): - super(YDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.downloaded_info_dicts = [] self.msgs = [] @@ -551,11 +558,11 @@ class TestYoutubeDL(unittest.TestCase): def s_formats(lang, autocaption=False): return [{ 'ext': ext, - 'url': 'http://localhost/video.%s.%s' % (lang, ext), + 'url': f'http://localhost/video.{lang}.{ext}', '_auto': autocaption, } for ext in ['vtt', 'srt', 'ass']] - subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es']) - auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es']) + subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']} + auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']} info_dict = { 'id': 'test', 'title': 'Test', @@ -580,7 +587,7 @@ class TestYoutubeDL(unittest.TestCase): result = get_info({'writesubtitles': True}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['en'])) + self.assertEqual(set(subs.keys()), {'en'}) self.assertTrue(subs['en'].get('data') is None) self.assertEqual(subs['en']['ext'], 'ass') @@ -591,39 +598,39 @@ class TestYoutubeDL(unittest.TestCase): result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'fr'])) + self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'fr'])) + self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['fr'])) + self.assertEqual(set(subs.keys()), {'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['en'])) + self.assertEqual(set(subs.keys()), {'en'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'en'])) + self.assertEqual(set(subs.keys()), {'es', 'en'}) result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'pt'])) + self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertFalse(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) - self.assertEqual(set(subs.keys()), set(['es', 'pt'])) + self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertTrue(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) @@ -654,7 +661,7 @@ class TestYoutubeDL(unittest.TestCase): 'duration': 100000, 'playlist_index': 1, 'playlist_autonumber': 2, - '_last_playlist_index': 100, + '__last_playlist_index': 100, 'n_entries': 10, 'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}] } @@ -1082,7 +1089,7 @@ class TestYoutubeDL(unittest.TestCase): class _YDL(YDL): def __init__(self, *args, **kwargs): - super(_YDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def trouble(self, s, tb=None): pass diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py index c514413a4..6280e1f2c 100644 --- a/test/test_YoutubeDLCookieJar.py +++ b/test/test_YoutubeDLCookieJar.py @@ -1,13 +1,10 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - import os import re import sys import tempfile import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.utils import YoutubeDLCookieJar @@ -20,7 +17,7 @@ class TestYoutubeDLCookieJar(unittest.TestCase): tf = tempfile.NamedTemporaryFile(delete=False) try: cookiejar.save(filename=tf.name, ignore_discard=True, ignore_expires=True) - temp = tf.read().decode('utf-8') + temp = tf.read().decode() self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) self.assertTrue(re.search( diff --git a/test/test_aes.py b/test/test_aes.py index 5c9273f8a..2b7b7cf54 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -1,30 +1,30 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import base64 + from yt_dlp.aes import ( - aes_decrypt, - aes_encrypt, - aes_ecb_encrypt, - aes_ecb_decrypt, + BLOCK_SIZE_BYTES, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, aes_ctr_decrypt, aes_ctr_encrypt, + aes_decrypt, + aes_decrypt_text, + aes_ecb_decrypt, + aes_ecb_encrypt, + aes_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, - aes_decrypt_text, - BLOCK_SIZE_BYTES, ) -from yt_dlp.compat import compat_pycrypto_AES +from yt_dlp.dependencies import Cryptodome_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes -import base64 # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' @@ -45,7 +45,7 @@ class TestAES(unittest.TestCase): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if compat_pycrypto_AES: + if Cryptodome_AES: decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) @@ -75,25 +75,25 @@ class TestAES(unittest.TestCase): decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) - if compat_pycrypto_AES: + if Cryptodome_AES: decrypted = aes_gcm_decrypt_and_verify_bytes( data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_decrypt_text(self): - password = intlist_to_bytes(self.key).decode('utf-8') + password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae' - ).decode('utf-8') + ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) - password = intlist_to_bytes(self.key).decode('utf-8') + password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83' - ).decode('utf-8') + ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py index 70f9f4845..e1012f69b 100644 --- a/test/test_age_restriction.py +++ b/test/test_age_restriction.py @@ -1,13 +1,12 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import try_rm, is_download_test +from test.helper import is_download_test, try_rm from yt_dlp import YoutubeDL diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 2d89366d4..b6019554e 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -1,22 +1,16 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution +import collections import os import sys import unittest -import collections + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import gettestcases -from yt_dlp.extractor import ( - FacebookIE, - gen_extractors, - YoutubeIE, -) +from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors class TestAllURLsMatching(unittest.TestCase): @@ -81,11 +75,11 @@ class TestAllURLsMatching(unittest.TestCase): url = tc['url'] for ie in ies: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): - self.assertTrue(ie.suitable(url), '%s should match URL %r' % (type(ie).__name__, url)) + self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}') else: self.assertFalse( ie.suitable(url), - '%s should not match URL %r . That URL belongs to %s.' % (type(ie).__name__, url, tc['name'])) + f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.') def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) @@ -120,7 +114,7 @@ class TestAllURLsMatching(unittest.TestCase): for (ie_name, ie_list) in name_accu.items(): self.assertEqual( len(ie_list), 1, - 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list))) + f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})') if __name__ == '__main__': diff --git a/test/test_cache.py b/test/test_cache.py index 8c4f85387..14e54ba20 100644 --- a/test/test_cache.py +++ b/test/test_cache.py @@ -1,18 +1,15 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - -import shutil - # Allow direct execution import os +import shutil import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL + from yt_dlp.cache import Cache diff --git a/test/test_compat.py b/test/test_compat.py index c9bc4d7fb..224175c65 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -1,26 +1,20 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from yt_dlp import compat from yt_dlp.compat import ( - compat_getenv, - compat_setenv, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, - compat_shlex_split, + compat_getenv, + compat_setenv, compat_str, compat_struct_unpack, - compat_urllib_parse_quote, - compat_urllib_parse_quote_plus, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, @@ -28,6 +22,12 @@ from yt_dlp.compat import ( class TestCompat(unittest.TestCase): + def test_compat_passthrough(self): + with self.assertWarns(DeprecationWarning): + compat.compat_basestring + + compat.asyncio.events # Must not raise error + def test_compat_getenv(self): test_str = 'ัะตัั' compat_setenv('yt_dlp_COMPAT_GETENV', test_str) @@ -42,39 +42,12 @@ class TestCompat(unittest.TestCase): def test_compat_expanduser(self): old_home = os.environ.get('HOME') - test_str = r'C:\Documents and Settings\ัะตัั\Application Data' - compat_setenv('HOME', test_str) - self.assertEqual(compat_expanduser('~'), test_str) - compat_setenv('HOME', old_home or '') - - def test_all_present(self): - import yt_dlp.compat - all_names = yt_dlp.compat.__all__ - present_names = set(filter( - lambda c: '_' in c and not c.startswith('_'), - dir(yt_dlp.compat))) - set(['unicode_literals']) - self.assertEqual(all_names, sorted(present_names)) - - def test_compat_urllib_parse_quote(self): - self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def') - self.assertEqual(compat_urllib_parse_quote('/user/abc+def'), '/user/abc%2Bdef') - self.assertEqual(compat_urllib_parse_quote('/user/abc+def', safe='+'), '%2Fuser%2Fabc+def') - self.assertEqual(compat_urllib_parse_quote(''), '') - self.assertEqual(compat_urllib_parse_quote('%'), '%25') - self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%') - self.assertEqual(compat_urllib_parse_quote('ๆดฅๆณข'), '%E6%B4%A5%E6%B3%A2') - self.assertEqual( - compat_urllib_parse_quote('''<meta property="og:description" content="โโโโ%โ
โโโ" /> -%<a href="https://ar.wikipedia.org/wiki/ุชุณููุงู
ู">%a''', safe='<>=":%/ \r\n'), - '''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%%E2%96%85%E2%96%86%E2%96%87%E2%96%88" /> -%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a''') - self.assertEqual( - compat_urllib_parse_quote('''(^โฃ_โข^)ใฃ๏ธปใโไธ โ โ โ โ โ โถ%I%Break%25Things%''', safe='% '), - '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%''') - - def test_compat_urllib_parse_quote_plus(self): - self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def') - self.assertEqual(compat_urllib_parse_quote_plus('/abc def'), '%2Fabc+def') + test_str = R'C:\Documents and Settings\ัะตัั\Application Data' + try: + compat_setenv('HOME', test_str) + self.assertEqual(compat_expanduser('~'), test_str) + finally: + compat_setenv('HOME', old_home or '') def test_compat_urllib_parse_unquote(self): self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') @@ -109,17 +82,6 @@ class TestCompat(unittest.TestCase): self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') - def test_compat_shlex_split(self): - self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) - self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) - self.assertEqual(compat_shlex_split('-val ไธญๆ'), ['-val', 'ไธญๆ']) - - def test_compat_etree_Element(self): - try: - compat_etree_Element.items - except AttributeError: - self.fail('compat_etree_Element is not a type') - def test_compat_etree_fromstring(self): xml = ''' <root foo="bar" spam="ไธญๆ"> @@ -128,7 +90,7 @@ class TestCompat(unittest.TestCase): <foo><bar>spam</bar></foo> </root> ''' - doc = compat_etree_fromstring(xml.encode('utf-8')) + doc = compat_etree_fromstring(xml.encode()) self.assertTrue(isinstance(doc.attrib['foo'], compat_str)) self.assertTrue(isinstance(doc.attrib['spam'], compat_str)) self.assertTrue(isinstance(doc.find('normal').text, compat_str)) diff --git a/test/test_cookies.py b/test/test_cookies.py index 842ebcb99..5bfaec367 100644 --- a/test/test_cookies.py +++ b/test/test_cookies.py @@ -6,10 +6,10 @@ from yt_dlp.cookies import ( LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, WindowsChromeCookieDecryptor, - parse_safari_cookies, - pbkdf2_sha1, _get_linux_desktop_environment, _LinuxDesktopEnvironment, + parse_safari_cookies, + pbkdf2_sha1, ) diff --git a/test/test_download.py b/test/test_download.py index 818a670fb..9a83bee2f 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution +import hashlib +import json import os +import socket import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import ( @@ -19,25 +20,19 @@ from test.helper import ( try_rm, ) - -import hashlib -import io -import json -import socket - import yt_dlp.YoutubeDL from yt_dlp.compat import ( compat_http_client, - compat_urllib_error, compat_HTTPError, + compat_urllib_error, ) +from yt_dlp.extractor import get_info_extractor from yt_dlp.utils import ( DownloadError, ExtractorError, - format_bytes, UnavailableVideoError, + format_bytes, ) -from yt_dlp.extractor import get_info_extractor RETRIES = 3 @@ -46,7 +41,7 @@ class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen self.processed_info_dicts = [] - super(YoutubeDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def report_warning(self, message): # Don't accept warnings during tests @@ -54,7 +49,7 @@ class YoutubeDL(yt_dlp.YoutubeDL): def process_info(self, info_dict): self.processed_info_dicts.append(info_dict.copy()) - return super(YoutubeDL, self).process_info(info_dict) + return super().process_info(info_dict) def _file_md5(fn): @@ -80,7 +75,7 @@ class TestDownload(unittest.TestCase): def strclass(cls): """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" - return '%s.%s' % (cls.__module__, cls.__name__) + return f'{cls.__module__}.{cls.__name__}' add_ie = getattr(self, self._testMethodName).add_ie return '%s (%s)%s:' % (self._testMethodName, @@ -179,7 +174,7 @@ def generator(test_case, tname): report_warning('%s failed due to network errors, skipping...' % tname) return - print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num)) + print(f'Retrying: {try_num} failed tries\n\n##########\n\n') try_num += 1 else: @@ -245,7 +240,7 @@ def generator(test_case, tname): self.assertTrue( os.path.exists(info_json_fn), 'Missing info file %s' % info_json_fn) - with io.open(info_json_fn, encoding='utf-8') as infof: + with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py index 03ae8c62a..c33308064 100644 --- a/test/test_downloader_http.py +++ b/test/test_downloader_http.py @@ -1,20 +1,19 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import re import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import threading from test.helper import http_server_port, try_rm + from yt_dlp import YoutubeDL from yt_dlp.compat import compat_http_server from yt_dlp.downloader.http import HttpFD from yt_dlp.utils import encodeFilename -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -66,7 +65,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): assert False -class FakeLogger(object): +class FakeLogger: def debug(self, msg): pass diff --git a/test/test_execution.py b/test/test_execution.py index cf6b6b913..6efd432e9 100644 --- a/test/test_execution.py +++ b/test/test_execution.py @@ -1,13 +1,10 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - -import unittest - -import sys +import contextlib import os import subprocess +import sys +import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.utils import encodeArgument @@ -26,15 +23,14 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, '-c', 'import yt_dlp'], cwd=rootDir) def test_module_exec(self): - if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution - subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, '-m', 'yt_dlp', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_main_exec(self): - subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL) + subprocess.check_call([sys.executable, 'yt_dlp/__main__.py', '--ignore-config', '--version'], cwd=rootDir, stdout=_DEV_NULL) def test_cmdline_umlauts(self): p = subprocess.Popen( - [sys.executable, 'yt_dlp/__main__.py', encodeArgument('รค'), '--version'], + [sys.executable, 'yt_dlp/__main__.py', '--ignore-config', encodeArgument('รค'), '--version'], cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE) _, stderr = p.communicate() self.assertFalse(stderr) @@ -44,10 +40,8 @@ class TestExecution(unittest.TestCase): subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', 'yt_dlp/extractor/lazy_extractors.py'], cwd=rootDir, stdout=_DEV_NULL) subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=_DEV_NULL) finally: - try: + with contextlib.suppress(OSError): os.remove('yt_dlp/extractor/lazy_extractors.py') - except (IOError, OSError): - pass if __name__ == '__main__': diff --git a/test/test_http.py b/test/test_http.py index 40df167e0..146df7500 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -1,18 +1,17 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import ssl +import threading from test.helper import http_server_port + from yt_dlp import YoutubeDL from yt_dlp.compat import compat_http_server, compat_urllib_request -import ssl -import threading TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -32,17 +31,6 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): self.send_header('Content-Type', 'video/mp4') self.end_headers() self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') - elif self.path == '/302': - if sys.version_info[0] == 3: - # XXX: Python 3 http server does not allow non-ASCII header values - self.send_response(404) - self.end_headers() - return - - new_url = 'http://127.0.0.1:%d/ไธญๆ.html' % http_server_port(self.server) - self.send_response(302) - self.send_header(b'Location', new_url.encode('utf-8')) - self.end_headers() elif self.path == '/%E4%B8%AD%E6%96%87.html': self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') @@ -52,7 +40,7 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): assert False -class FakeLogger(object): +class FakeLogger: def debug(self, msg): pass @@ -72,40 +60,75 @@ class TestHTTP(unittest.TestCase): self.server_thread.daemon = True self.server_thread.start() - def test_unicode_path_redirection(self): - # XXX: Python 3 http server does not allow non-ASCII header values - if sys.version_info[0] == 3: - return - - ydl = YoutubeDL({'logger': FakeLogger()}) - r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port) - self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port) - class TestHTTPS(unittest.TestCase): def setUp(self): certfn = os.path.join(TEST_DIR, 'testcert.pem') self.httpd = compat_http_server.HTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) - self.httpd.socket = ssl.wrap_socket( - self.httpd.socket, certfile=certfn, server_side=True) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.load_cert_chain(certfn, None) + self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() def test_nocheckcertificate(self): - if sys.version_info >= (2, 7, 9): # No certificate checking anyways - ydl = YoutubeDL({'logger': FakeLogger()}) - self.assertRaises( - Exception, - ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) + ydl = YoutubeDL({'logger': FakeLogger()}) + self.assertRaises( + Exception, + ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) +class TestClientCert(unittest.TestCase): + def setUp(self): + certfn = os.path.join(TEST_DIR, 'testcert.pem') + self.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') + cacertfn = os.path.join(self.certdir, 'ca.crt') + self.httpd = compat_http_server.HTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) + sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + sslctx.verify_mode = ssl.CERT_REQUIRED + sslctx.load_verify_locations(cafile=cacertfn) + sslctx.load_cert_chain(certfn, None) + self.httpd.socket = sslctx.wrap_socket(self.httpd.socket, server_side=True) + self.port = http_server_port(self.httpd) + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + def _run_test(self, **params): + ydl = YoutubeDL({ + 'logger': FakeLogger(), + # Disable client-side validation of unacceptable self-signed testcert.pem + # The test is of a check on the server side, so unaffected + 'nocheckcertificate': True, + **params, + }) + r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) + self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) + + def test_certificate_combined_nopass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithkey.crt')) + + def test_certificate_nocombined_nopass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), + client_certificate_key=os.path.join(self.certdir, 'client.key')) + + def test_certificate_combined_pass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'clientwithencryptedkey.crt'), + client_certificate_password='foobar') + + def test_certificate_nocombined_pass(self): + self._run_test(client_certificate=os.path.join(self.certdir, 'client.crt'), + client_certificate_key=os.path.join(self.certdir, 'clientencrypted.key'), + client_certificate_password='foobar') + + def _build_proxy_handler(name): class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): proxy_name = name @@ -117,7 +140,7 @@ def _build_proxy_handler(name): self.send_response(200) self.send_header('Content-Type', 'text/plain; charset=utf-8') self.end_headers() - self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) + self.wfile.write(f'{self.proxy_name}: {self.path}'.encode()) return HTTPTestRequestHandler @@ -138,26 +161,26 @@ class TestProxy(unittest.TestCase): self.geo_proxy_thread.start() def test_proxy(self): - geo_proxy = '127.0.0.1:{0}'.format(self.geo_port) + geo_proxy = f'127.0.0.1:{self.geo_port}' ydl = YoutubeDL({ - 'proxy': '127.0.0.1:{0}'.format(self.port), + 'proxy': f'127.0.0.1:{self.port}', 'geo_verification_proxy': geo_proxy, }) url = 'http://foo.com/bar' - response = ydl.urlopen(url).read().decode('utf-8') - self.assertEqual(response, 'normal: {0}'.format(url)) + response = ydl.urlopen(url).read().decode() + self.assertEqual(response, f'normal: {url}') req = compat_urllib_request.Request(url) req.add_header('Ytdl-request-proxy', geo_proxy) - response = ydl.urlopen(req).read().decode('utf-8') - self.assertEqual(response, 'geo: {0}'.format(url)) + response = ydl.urlopen(req).read().decode() + self.assertEqual(response, f'geo: {url}') def test_proxy_with_idn(self): ydl = YoutubeDL({ - 'proxy': '127.0.0.1:{0}'.format(self.port), + 'proxy': f'127.0.0.1:{self.port}', }) url = 'http://ไธญๆ.tw/' - response = ydl.urlopen(url).read().decode('utf-8') + response = ydl.urlopen(url).read().decode() # b'xn--fiq228c' is 'ไธญๆ'.encode('idna') self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index e230b045f..872c58c8f 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -1,11 +1,9 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.jsinterp import JSInterpreter diff --git a/test/test_netrc.py b/test/test_netrc.py index 94a703406..f7a0b33d2 100644 --- a/test/test_netrc.py +++ b/test/test_netrc.py @@ -1,9 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/test/test_overwrites.py b/test/test_overwrites.py index f5d10a409..a6d5bae40 100644 --- a/test/test_overwrites.py +++ b/test/test_overwrites.py @@ -1,18 +1,15 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - import os -from os.path import join import subprocess import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import is_download_test, try_rm - root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -download_file = join(root_dir, 'test.webm') +download_file = os.path.join(root_dir, 'test.webm') @is_download_test @@ -46,7 +43,7 @@ class TestOverwrites(unittest.TestCase): self.assertTrue(os.path.getsize(download_file) > 1) def tearDown(self): - try_rm(join(root_dir, 'test.webm')) + try_rm(os.path.join(root_dir, 'test.webm')) if __name__ == '__main__': diff --git a/test/test_post_hooks.py b/test/test_post_hooks.py index 1555a23e0..e84a08f29 100644 --- a/test/test_post_hooks.py +++ b/test/test_post_hooks.py @@ -1,20 +1,19 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import get_params, try_rm, is_download_test +from test.helper import get_params, is_download_test, try_rm + import yt_dlp.YoutubeDL from yt_dlp.utils import DownloadError class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): - super(YoutubeDL, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.to_stderr = self.to_screen diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index bbe998993..9d8a4dcc5 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution import os import sys @@ -16,7 +13,7 @@ from yt_dlp.postprocessor import ( FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataParserPP, - ModifyChaptersPP + ModifyChaptersPP, ) diff --git a/test/test_socks.py b/test/test_socks.py index cf1f613ab..a8b068cdd 100644 --- a/test/test_socks.py +++ b/test/test_socks.py @@ -1,25 +1,16 @@ #!/usr/bin/env python3 -# coding: utf-8 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import random import subprocess +from test.helper import FakeYDL, get_params, is_download_test -from test.helper import ( - FakeYDL, - get_params, - is_download_test, -) -from yt_dlp.compat import ( - compat_str, - compat_urllib_request, -) +from yt_dlp.compat import compat_str, compat_urllib_request @is_download_test @@ -41,7 +32,7 @@ class TestMultipleSocks(unittest.TestCase): 'proxy': params['primary_proxy'] }) self.assertEqual( - ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8'), + ydl.urlopen('http://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_proxy_https(self): @@ -52,7 +43,7 @@ class TestMultipleSocks(unittest.TestCase): 'proxy': params['primary_proxy'] }) self.assertEqual( - ydl.urlopen('https://yt-dl.org/ip').read().decode('utf-8'), + ydl.urlopen('https://yt-dl.org/ip').read().decode(), params['primary_server_ip']) def test_secondary_proxy_http(self): @@ -63,7 +54,7 @@ class TestMultipleSocks(unittest.TestCase): req = compat_urllib_request.Request('http://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( - ydl.urlopen(req).read().decode('utf-8'), + ydl.urlopen(req).read().decode(), params['secondary_server_ip']) def test_secondary_proxy_https(self): @@ -74,7 +65,7 @@ class TestMultipleSocks(unittest.TestCase): req = compat_urllib_request.Request('https://yt-dl.org/ip') req.add_header('Ytdl-request-proxy', params['secondary_proxy']) self.assertEqual( - ydl.urlopen(req).read().decode('utf-8'), + ydl.urlopen(req).read().decode(), params['secondary_server_ip']) @@ -105,7 +96,7 @@ class TestSocks(unittest.TestCase): ydl = FakeYDL({ 'proxy': '%s://127.0.0.1:%d' % (protocol, self.port), }) - return ydl.urlopen('http://yt-dl.org/ip').read().decode('utf-8') + return ydl.urlopen('http://yt-dl.org/ip').read().decode() def test_socks4(self): self.assertTrue(isinstance(self._get_ip('socks4'), compat_str)) diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 95e33e54a..182bd7a4b 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -1,33 +1,31 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest -sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, md5, is_download_test +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +from test.helper import FakeYDL, is_download_test, md5 from yt_dlp.extractor import ( - YoutubeIE, - DailymotionIE, - TedTalkIE, - VimeoIE, - WallaIE, - CeskaTelevizeIE, - LyndaIE, NPOIE, + NRKTVIE, PBSIE, + CeskaTelevizeIE, ComedyCentralIE, - NRKTVIE, + DailymotionIE, + DemocracynowIE, + LyndaIE, RaiPlayIE, - VikiIE, - ThePlatformIE, - ThePlatformFeedIE, RTVEALaCartaIE, - DemocracynowIE, + TedTalkIE, + ThePlatformFeedIE, + ThePlatformIE, + VikiIE, + VimeoIE, + WallaIE, + YoutubeIE, ) @@ -53,8 +51,8 @@ class BaseTestSubtitles(unittest.TestCase): for sub_info in subtitles.values(): if sub_info.get('data') is None: uf = self.DL.urlopen(sub_info['url']) - sub_info['data'] = uf.read().decode('utf-8') - return dict((l, sub_info['data']) for l, sub_info in subtitles.items()) + sub_info['data'] = uf.read().decode() + return {l: sub_info['data'] for l, sub_info in subtitles.items()} @is_download_test @@ -163,7 +161,7 @@ class TestVimeoSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) + self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') @@ -186,7 +184,7 @@ class TestWallaSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['heb'])) + self.assertEqual(set(subtitles.keys()), {'heb'}) self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') def test_nosubtitles(self): @@ -208,7 +206,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['cs'])) + self.assertEqual(set(subtitles.keys()), {'cs'}) self.assertTrue(len(subtitles['cs']) > 20000) def test_nosubtitles(self): @@ -229,7 +227,7 @@ class TestLyndaSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') @@ -242,7 +240,7 @@ class TestNPOSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['nl'])) + self.assertEqual(set(subtitles.keys()), {'nl'}) self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') @@ -252,13 +250,13 @@ class TestMTVSubtitles(BaseTestSubtitles): IE = ComedyCentralIE def getInfoDict(self): - return super(TestMTVSubtitles, self).getInfoDict()['entries'][0] + return super().getInfoDict()['entries'][0] def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') @@ -271,7 +269,7 @@ class TestNRKSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['no'])) + self.assertEqual(set(subtitles.keys()), {'no'}) self.assertEqual(md5(subtitles['no']), '544fa917d3197fcbee64634559221cc2') @@ -284,7 +282,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['it'])) + self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') def test_subtitles_array_key(self): @@ -292,7 +290,7 @@ class TestRaiPlaySubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['it'])) + self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') @@ -305,7 +303,7 @@ class TestVikiSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') @@ -320,7 +318,7 @@ class TestThePlatformSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') @@ -333,7 +331,7 @@ class TestThePlatformFeedSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') @@ -348,7 +346,7 @@ class TestRtveSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['es'])) + self.assertEqual(set(subtitles.keys()), {'es'}) self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') @@ -361,7 +359,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') def test_subtitles_in_page(self): @@ -369,7 +367,7 @@ class TestDemocracynowSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'acaca989e24a9e45a6719c9b3d60815c') @@ -382,7 +380,7 @@ class TestPBSSubtitles(BaseTestSubtitles): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) + self.assertEqual(set(subtitles.keys()), {'en'}) def test_subtitles_dfxp_format(self): self.DL.params['writesubtitles'] = True diff --git a/test/test_utils.py b/test/test_utils.py index 1f826c2f2..184c39cff 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - # Allow direct execution +import contextlib import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -16,75 +14,95 @@ import itertools import json import xml.etree.ElementTree +from yt_dlp.compat import ( + compat_chr, + compat_etree_fromstring, + compat_getenv, + compat_HTMLParseError, + compat_os_name, + compat_setenv, +) from yt_dlp.utils import ( + Config, + DateRange, + ExtractorError, + InAdvancePagedList, + LazyList, + OnDemandPagedList, age_restricted, args_to_str, - encode_base_n, + base_url, caesar, clean_html, clean_podcast_url, - Config, + cli_bool_option, + cli_option, + cli_valueless_option, date_from_str, datetime_from_str, - DateRange, detect_exe_version, determine_ext, + dfxp2srt, dict_get, + encode_base_n, encode_compat_str, encodeFilename, escape_rfc3986, escape_url, + expand_path, extract_attributes, - ExtractorError, find_xpath_attr, fix_xml_ampersands, - format_bytes, float_or_none, - get_element_by_class, + format_bytes, get_element_by_attribute, - get_elements_by_class, - get_elements_by_attribute, - get_element_html_by_class, + get_element_by_class, get_element_html_by_attribute, - get_elements_html_by_class, + get_element_html_by_class, + get_element_text_and_html_by_tag, + get_elements_by_attribute, + get_elements_by_class, get_elements_html_by_attribute, + get_elements_html_by_class, get_elements_text_and_html_by_attribute, - get_element_text_and_html_by_tag, - InAdvancePagedList, int_or_none, intlist_to_bytes, + iri_to_uri, is_html, js_to_json, limit_length, locked_file, + lowercase_escape, + match_str, merge_dicts, mimetype2ext, month_by_name, multipart_encode, ohdave_rsa_encrypt, - OnDemandPagedList, orderedSet, parse_age_limit, + parse_bitrate, + parse_codecs, + parse_count, + parse_dfxp_time_expr, parse_duration, parse_filesize, - parse_count, parse_iso8601, - parse_resolution, - parse_bitrate, parse_qs, + parse_resolution, pkcs1pad, + prepend_extension, read_batch_urls, + remove_end, + remove_quotes, + remove_start, + render_table, + replace_extension, + rot47, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, - expand_path, - prepend_extension, - replace_extension, - remove_start, - remove_end, - remove_quotes, - rot47, shell_quote, smuggle_url, str_to_int, @@ -96,38 +114,18 @@ from yt_dlp.utils import ( unified_strdate, unified_timestamp, unsmuggle_url, + update_url_query, uppercase_escape, - lowercase_escape, url_basename, url_or_none, - base_url, - urljoin, urlencode_postdata, + urljoin, urshift, - update_url_query, version_tuple, - xpath_with_ns, + xpath_attr, xpath_element, xpath_text, - xpath_attr, - render_table, - match_str, - parse_dfxp_time_expr, - dfxp2srt, - cli_option, - cli_valueless_option, - cli_bool_option, - parse_codecs, - iri_to_uri, - LazyList, -) -from yt_dlp.compat import ( - compat_chr, - compat_etree_fromstring, - compat_getenv, - compat_HTMLParseError, - compat_os_name, - compat_setenv, + xpath_with_ns, ) @@ -266,15 +264,22 @@ class TestUtil(unittest.TestCase): def test_expand_path(self): def env(var): - return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var) + return f'%{var}%' if sys.platform == 'win32' else f'${var}' compat_setenv('yt_dlp_EXPATH_PATH', 'expanded') self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') - self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) - self.assertEqual(expand_path('~'), compat_getenv('HOME')) - self.assertEqual( - expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), - '%s/expanded' % compat_getenv('HOME')) + + old_home = os.environ.get('HOME') + test_str = R'C:\Documents and Settings\ัะตัั\Application Data' + try: + compat_setenv('HOME', test_str) + self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME')) + self.assertEqual(expand_path('~'), compat_getenv('HOME')) + self.assertEqual( + expand_path('~/%s' % env('yt_dlp_EXPATH_PATH')), + '%s/expanded' % compat_getenv('HOME')) + finally: + compat_setenv('HOME', old_home or '') def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') @@ -538,9 +543,6 @@ class TestUtil(unittest.TestCase): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) - # Python 3 has no long - if sys.version_info < (3, 0): - eval('self.assertEqual(str_to_int(123456L), 123456)') self.assertEqual(str_to_int('noninteger'), None) self.assertEqual(str_to_int([]), None) @@ -669,8 +671,7 @@ class TestUtil(unittest.TestCase): def get_page(pagenum): firstid = pagenum * pagesize upto = min(size, pagenum * pagesize + pagesize) - for i in range(firstid, upto): - yield i + yield from range(firstid, upto) pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) @@ -739,7 +740,7 @@ class TestUtil(unittest.TestCase): multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') self.assertEqual( - multipart_encode({'ๆฌไฝ'.encode('utf-8'): 'ๅผ'.encode('utf-8')}, boundary='AAAAAA')[0], + multipart_encode({'ๆฌไฝ'.encode(): 'ๅผ'.encode()}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') self.assertRaises( ValueError, multipart_encode, {b'field': b'value'}, boundary='value') @@ -1400,7 +1401,7 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') <p begin="3" dur="-1">Ignored, three</p> </div> </body> - </tt>'''.encode('utf-8') + </tt>'''.encode() srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols @@ -1418,14 +1419,14 @@ Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) - dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?> + dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?> <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The first line</p> </div> </body> - </tt>'''.encode('utf-8') + </tt>''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line @@ -1433,7 +1434,7 @@ The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) - dfxp_data_with_style = '''<?xml version="1.0" encoding="utf-8"?> + dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?> <tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> <head> <styling> @@ -1451,7 +1452,7 @@ The first line <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> </div> </body> -</tt>'''.encode('utf-8') +</tt>''' srt_data = '''1 00:00:02,080 --> 00:00:05,840 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> @@ -1758,7 +1759,7 @@ Line 1 def test(ll, idx, val, cache): self.assertEqual(ll[idx], val) - self.assertEqual(getattr(ll, '_LazyList__cache'), list(cache)) + self.assertEqual(ll._cache, list(cache)) ll = LazyList(range(10)) test(ll, 0, 0, range(1)) @@ -1821,10 +1822,8 @@ Line 1 else: self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') finally: - try: + with contextlib.suppress(OSError): os.remove(FILE) - except Exception: - pass if __name__ == '__main__': diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py index cc606115f..657994074 100644 --- a/test/test_verbose_output.py +++ b/test/test_verbose_output.py @@ -1,13 +1,9 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - -import unittest - -import sys import os import subprocess +import sys +import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -17,7 +13,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_arg(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '--username', 'johnsmith@gmail.com', '--password', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -30,7 +27,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_shortarg(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '-u', 'johnsmith@gmail.com', '-p', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -43,7 +41,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_eq(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '--username=johnsmith@gmail.com', '--password=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -56,7 +55,8 @@ class TestVerboseOutput(unittest.TestCase): def test_private_info_shortarg_eq(self): outp = subprocess.Popen( [ - sys.executable, 'yt_dlp/__main__.py', '-v', + sys.executable, 'yt_dlp/__main__.py', + '-v', '--ignore-config', '-u=johnsmith@gmail.com', '-p=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 455192b1f..66611e236 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -1,18 +1,14 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test -from yt_dlp.extractor import ( - YoutubeIE, - YoutubeTabIE, -) +from yt_dlp.extractor import YoutubeIE, YoutubeTabIE @is_download_test diff --git a/test/test_youtube_misc.py b/test/test_youtube_misc.py index 402681cad..36f8be689 100644 --- a/test/test_youtube_misc.py +++ b/test/test_youtube_misc.py @@ -1,10 +1,9 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Allow direct execution import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index bbbba073f..2c2013295 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -1,21 +1,20 @@ #!/usr/bin/env python3 - -from __future__ import unicode_literals - # Allow direct execution +import contextlib import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -import io import re import string - +import urllib.request from test.helper import FakeYDL, is_download_test + +from yt_dlp.compat import compat_str from yt_dlp.extractor import YoutubeIE from yt_dlp.jsinterp import JSInterpreter -from yt_dlp.compat import compat_str, compat_urlretrieve _SIG_TESTS = [ ( @@ -129,11 +128,9 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) def tearDown(self): - try: + with contextlib.suppress(OSError): for f in os.listdir(self.TESTDATA_DIR): os.remove(f) - except OSError: - pass def t_factory(name, sig_func, url_pattern): @@ -147,8 +144,8 @@ def t_factory(name, sig_func, url_pattern): fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): - compat_urlretrieve(url, fn) - with io.open(fn, encoding='utf-8') as testf: + urllib.request.urlretrieve(url, fn) + with open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) diff --git a/test/testdata/certificate/ca.crt b/test/testdata/certificate/ca.crt new file mode 100644 index 000000000..ddf7be7ad --- /dev/null +++ b/test/testdata/certificate/ca.crt @@ -0,0 +1,10 @@ +-----BEGIN CERTIFICATE----- +MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw +FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz +MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D +AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU +KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F +iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud +EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2 +m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1 +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/ca.key b/test/testdata/certificate/ca.key new file mode 100644 index 000000000..38920d571 --- /dev/null +++ b/test/testdata/certificate/ca.key @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49 +AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH +YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/ca.srl b/test/testdata/certificate/ca.srl new file mode 100644 index 000000000..de2d1eab3 --- /dev/null +++ b/test/testdata/certificate/ca.srl @@ -0,0 +1 @@ +4A260C33C4D34612646E6321E1E767DF1A95EF0B diff --git a/test/testdata/certificate/client.crt b/test/testdata/certificate/client.crt new file mode 100644 index 000000000..874622fae --- /dev/null +++ b/test/testdata/certificate/client.crt @@ -0,0 +1,9 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- diff --git a/test/testdata/certificate/client.csr b/test/testdata/certificate/client.csr new file mode 100644 index 000000000..2d5d7a5c1 --- /dev/null +++ b/test/testdata/certificate/client.csr @@ -0,0 +1,7 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG +SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq +3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA +1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq +6kiYq8Oxx6ZMoI+11k75/Kip1g== +-----END CERTIFICATE REQUEST----- diff --git a/test/testdata/certificate/client.key b/test/testdata/certificate/client.key new file mode 100644 index 000000000..e47389b51 --- /dev/null +++ b/test/testdata/certificate/client.key @@ -0,0 +1,5 @@ +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 +AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird +m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientencrypted.key b/test/testdata/certificate/clientencrypted.key new file mode 100644 index 000000000..0baee37e9 --- /dev/null +++ b/test/testdata/certificate/clientencrypted.key @@ -0,0 +1,8 @@ +-----BEGIN EC PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 + +96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS +rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn +IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientwithencryptedkey.crt b/test/testdata/certificate/clientwithencryptedkey.crt new file mode 100644 index 000000000..f357e4c95 --- /dev/null +++ b/test/testdata/certificate/clientwithencryptedkey.crt @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- +-----BEGIN EC PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 + +96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS +rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn +IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/clientwithkey.crt b/test/testdata/certificate/clientwithkey.crt new file mode 100644 index 000000000..942f6e2a4 --- /dev/null +++ b/test/testdata/certificate/clientwithkey.crt @@ -0,0 +1,14 @@ +-----BEGIN CERTIFICATE----- +MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG +A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow +FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA +BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS +XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD +aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY +D0dB8M1kJw== +-----END CERTIFICATE----- +-----BEGIN EC PRIVATE KEY----- +MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 +AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird +m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== +-----END EC PRIVATE KEY----- diff --git a/test/testdata/certificate/instructions.md b/test/testdata/certificate/instructions.md new file mode 100644 index 000000000..b0e3fbd48 --- /dev/null +++ b/test/testdata/certificate/instructions.md @@ -0,0 +1,19 @@ +# Generate certificates for client cert tests + +## CA +```sh +openssl ecparam -name prime256v1 -genkey -noout -out ca.key +openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest" +``` + +## Client +```sh +openssl ecparam -name prime256v1 -genkey -noout -out client.key +openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256 +openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2" +openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256 +cp client.crt clientwithkey.crt +cp client.crt clientwithencryptedkey.crt +cat client.key >> clientwithkey.crt +cat clientencrypted.key >> clientwithencryptedkey.crt +```
\ No newline at end of file diff --git a/yt-dlp.cmd b/yt-dlp.cmd index 2b651a41e..aa4500f9f 100644 --- a/yt-dlp.cmd +++ b/yt-dlp.cmd @@ -1 +1 @@ -@py "%~dp0yt_dlp\__main__.py" %*
\ No newline at end of file +@py -bb -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* @@ -1,2 +1,2 @@ #!/bin/sh -exec python3 "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" +exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index ab7cf3d6d..94f8dcaef 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1,8 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import absolute_import, unicode_literals - import collections import contextlib import datetime @@ -16,6 +12,7 @@ import locale import operator import os import platform +import random import re import shutil import subprocess @@ -24,97 +21,115 @@ import tempfile import time import tokenize import traceback -import random import unicodedata - -from enum import Enum +import urllib.request from string import ascii_letters +from .cache import Cache from .compat import ( - compat_basestring, - compat_brotli, compat_get_terminal_size, - compat_kwargs, - compat_numeric_types, compat_os_name, - compat_pycrypto_AES, compat_shlex_quote, compat_str, - compat_tokenize_tokenize, compat_urllib_error, compat_urllib_request, - compat_urllib_request_DataHandler, windows_enable_vt_mode, ) from .cookies import load_cookies +from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name +from .downloader.rtmp import rtmpdump_version +from .extractor import _LAZY_LOADER +from .extractor import _PLUGIN_CLASSES as plugin_extractors +from .extractor import gen_extractor_classes, get_info_extractor +from .extractor.openload import PhantomJSwrapper +from .minicurses import format_text +from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors +from .postprocessor import ( + EmbedThumbnailPP, + FFmpegFixupDuplicateMoovPP, + FFmpegFixupDurationPP, + FFmpegFixupM3u8PP, + FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, + FFmpegMergerPP, + FFmpegPostProcessor, + MoveFilesAfterDownloadPP, + get_postprocessor, +) from .utils import ( + DEFAULT_OUTTMPL, + LINK_TEMPLATES, + NO_DEFAULT, + NUMBER_RE, + OUTTMPL_TYPES, + POSTPROCESS_WHEN, + STR_FORMAT_RE_TMPL, + STR_FORMAT_TYPES, + ContentTooShortError, + DateRange, + DownloadCancelled, + DownloadError, + EntryNotInPlaylist, + ExistingVideoReached, + ExtractorError, + GeoRestrictedError, + HEADRequest, + InAdvancePagedList, + ISO3166Utils, + LazyList, + MaxDownloadsReached, + Namespace, + PagedList, + PerRequestProxyHandler, + Popen, + PostProcessingError, + ReExtractInfo, + RejectedVideoReached, + SameFileError, + UnavailableVideoError, + YoutubeDLCookieProcessor, + YoutubeDLHandler, + YoutubeDLRedirectHandler, age_restricted, args_to_str, - ContentTooShortError, date_from_str, - DateRange, - DEFAULT_OUTTMPL, determine_ext, determine_protocol, - DownloadCancelled, - DownloadError, encode_compat_str, encodeFilename, - EntryNotInPlaylist, error_to_compat_str, - ExistingVideoReached, expand_path, - ExtractorError, filter_dict, float_or_none, format_bytes, - format_field, format_decimal_suffix, + format_field, formatSeconds, - GeoRestrictedError, get_domain, - has_certifi, - HEADRequest, - InAdvancePagedList, int_or_none, iri_to_uri, - ISO3166Utils, join_nonempty, - LazyList, - LINK_TEMPLATES, locked_file, make_dir, make_HTTPS_handler, - MaxDownloadsReached, merge_headers, network_exceptions, - NO_DEFAULT, number_of_digits, orderedSet, - OUTTMPL_TYPES, - PagedList, parse_filesize, - PerRequestProxyHandler, platform_name, - Popen, - POSTPROCESS_WHEN, - PostProcessingError, preferredencoding, prepend_extension, - ReExtractInfo, register_socks_protocols, - RejectedVideoReached, remove_terminal_sequences, render_table, replace_extension, - SameFileError, sanitize_filename, sanitize_path, sanitize_url, sanitized_Request, std_headers, - STR_FORMAT_RE_TMPL, - STR_FORMAT_TYPES, str_or_none, strftime_or_none, subtitles_filename, @@ -123,52 +138,19 @@ from .utils import ( to_high_limit_path, traverse_obj, try_get, - UnavailableVideoError, url_basename, variadic, version_tuple, write_json_file, write_string, - YoutubeDLCookieProcessor, - YoutubeDLHandler, - YoutubeDLRedirectHandler, ) -from .cache import Cache -from .minicurses import format_text -from .extractor import ( - gen_extractor_classes, - get_info_extractor, - _LAZY_LOADER, - _PLUGIN_CLASSES as plugin_extractors -) -from .extractor.openload import PhantomJSwrapper -from .downloader import ( - FFmpegFD, - get_suitable_downloader, - shorten_protocol_name -) -from .downloader.rtmp import rtmpdump_version -from .postprocessor import ( - get_postprocessor, - EmbedThumbnailPP, - FFmpegFixupDuplicateMoovPP, - FFmpegFixupDurationPP, - FFmpegFixupM3u8PP, - FFmpegFixupM4aPP, - FFmpegFixupStretchedPP, - FFmpegFixupTimestampPP, - FFmpegMergerPP, - FFmpegPostProcessor, - MoveFilesAfterDownloadPP, - _PLUGIN_CLASSES as plugin_postprocessors -) -from .version import __version__ +from .version import RELEASE_GIT_HEAD, __version__ if compat_os_name == 'nt': import ctypes -class YoutubeDL(object): +class YoutubeDL: """YoutubeDL class. YoutubeDL objects are the ones responsible of downloading the @@ -329,13 +311,17 @@ class YoutubeDL(object): has been filtered out. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue - cookiefile: File name where cookies should be read from and dumped to + cookiefile: File name or text stream from where cookies should be read and dumped to cookiesfrombrowser: A tuple containing the name of the browser, the profile name/pathfrom where cookies are loaded, and the name of the keyring. Eg: ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') legacyserverconnect: Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation nocheckcertificate: Do not verify SSL certificates + client_certificate: Path to client certificate file in PEM format. May include the private key + client_certificate_key: Path to private key file for client certificate + client_certificate_password: Password for client certificate private key, if encrypted. + If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. At the moment, this is only supported by YouTube. http_headers: A dictionary of custom headers to be used for all requests @@ -427,10 +413,14 @@ class YoutubeDL(object): sleep_interval_subtitles: Number of seconds to sleep before each subtitle download listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. - match_filter: A function that gets called with the info_dict of - every video. - If it returns a message, the video is ignored. - If it returns None, the video is downloaded. + match_filter: A function that gets called for every video with the signature + (info_dict, *, incomplete: bool) -> Optional[str] + For backward compatibility with youtube-dl, the signature + (info_dict) -> Optional[str] is also allowed. + - If it returns a message, the video is ignored. + - If it returns None, the video is downloaded. + - If it returns utils.NO_DEFAULT, the user is interactively + asked whether to download the video. match_filter_func in utils.py is one example for this. no_color: Do not emit color codes in output. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For @@ -504,7 +494,7 @@ class YoutubeDL(object): care about HLS. (only for youtube) """ - _NUMERIC_FIELDS = set(( + _NUMERIC_FIELDS = { 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', @@ -512,7 +502,7 @@ class YoutubeDL(object): 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', - )) + } _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py @@ -579,7 +569,7 @@ class YoutubeDL(object): def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: - self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion)) + self.report_warning(f'{option} is deprecated. Use {suggestion} instead') return True return False @@ -681,7 +671,7 @@ class YoutubeDL(object): pp_def = dict(pp_def_raw) when = pp_def.pop('when', 'post_process') self.add_post_processor( - get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), + get_postprocessor(pp_def.pop('key'))(self, **pp_def), when=when) self._setup_opener() @@ -696,7 +686,7 @@ class YoutubeDL(object): with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: self.archive.add(line.strip()) - except IOError as ioe: + except OSError as ioe: if ioe.errno != errno.ENOENT: raise return False @@ -782,9 +772,9 @@ class YoutubeDL(object): assert hasattr(self, '_output_process') assert isinstance(message, compat_str) line_count = message.count('\n') + 1 - self._output_process.stdin.write((message + '\n').encode('utf-8')) + self._output_process.stdin.write((message + '\n').encode()) self._output_process.stdin.flush() - res = ''.join(self._output_channel.readline().decode('utf-8') + res = ''.join(self._output_channel.readline().decode() for _ in range(line_count)) return res[:-len('\n')] @@ -893,16 +883,19 @@ class YoutubeDL(object): raise DownloadError(message, exc_info) self._download_retcode = 1 - class Styles(Enum): - HEADERS = 'yellow' - EMPHASIS = 'light blue' - ID = 'green' - DELIM = 'blue' - ERROR = 'red' - WARNING = 'yellow' - SUPPRESS = 'light black' + Styles = Namespace( + HEADERS='yellow', + EMPHASIS='light blue', + FILENAME='green', + ID='green', + DELIM='blue', + ERROR='red', + WARNING='yellow', + SUPPRESS='light black', + ) def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): + text = str(text) if test_encoding: original_text = text # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 @@ -910,8 +903,6 @@ class YoutubeDL(object): text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback - if isinstance(f, self.Styles): - f = f.value return format_text(text, f) if allow_colors else text if fallback is None else fallback def _format_screen(self, *args, **kwargs): @@ -972,7 +963,7 @@ class YoutubeDL(object): self.to_screen('Deleting existing file') def raise_no_formats(self, info, forced=False, *, msg=None): - has_drm = info.get('__has_drm') + has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' if forced or not ignored: @@ -993,11 +984,9 @@ class YoutubeDL(object): outtmpl_dict.update({ k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl_dict.get(k) is None}) - for key, val in outtmpl_dict.items(): + for _, val in outtmpl_dict.items(): if isinstance(val, bytes): - self.report_warning( - 'Parameter outtmpl is bytes, but should be a unicode string. ' - 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.') + self.report_warning('Parameter outtmpl is bytes, but should be a unicode string') return outtmpl_dict def get_output_path(self, dir_type='', filename=None): @@ -1007,12 +996,6 @@ class YoutubeDL(object): expand_path(paths.get('home', '').strip()), expand_path(paths.get(dir_type, '').strip()) if dir_type else '', filename or '') - - # Temporary fix for #4787 - # 'Treat' all problem characters by passing filename through preferredencoding - # to workaround encoding issues with subprocess on python2 @ Windows - if sys.version_info < (3, 0) and sys.platform == 'win32': - path = encodeFilename(path, True).decode(preferredencoding()) return sanitize_path(path, force=self.params.get('windowsfilenames')) @staticmethod @@ -1022,7 +1005,7 @@ class YoutubeDL(object): # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. sep = ''.join([random.choice(ascii_letters) for _ in range(32)]) - outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep)) + outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution # because meta fields may contain env variables we don't want to @@ -1070,7 +1053,7 @@ class YoutubeDL(object): formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) - info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads + info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) @@ -1078,7 +1061,7 @@ class YoutubeDL(object): # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { - 'playlist_index': number_of_digits(info_dict.get('_last_playlist_index') or 0), + 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0), 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } @@ -1092,18 +1075,18 @@ class YoutubeDL(object): # Field is of the form key1.key2... # where keys (except first) can be string, int or slice FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)') - MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?') + MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys())) - INTERNAL_FORMAT_RE = re.compile(r'''(?x) + INTERNAL_FORMAT_RE = re.compile(rf'''(?x) (?P<negate>-)? - (?P<fields>{field}) - (?P<maths>(?:{math_op}{math_field})*) + (?P<fields>{FIELD_RE}) + (?P<maths>(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) (?:>(?P<strf_format>.+?))? (?P<remaining> (?P<alternate>(?<!\\),[^|&)]+)? (?:&(?P<replacement>.*?))? (?:\|(?P<default>.*?))? - )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) + )$''') def _traverse_infodict(k): k = k.split('.') @@ -1182,7 +1165,7 @@ class YoutubeDL(object): fmt = outer_mobj.group('format') if fmt == 's' and value is not None and key in field_size_compat_map.keys(): - fmt = '0{:d}d'.format(field_size_compat_map[key]) + fmt = f'0{field_size_compat_map[key]:d}d' value = default if value is None else value if replacement is None else replacement @@ -1197,7 +1180,7 @@ class YoutubeDL(object): value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes - value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') + value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( @@ -1310,7 +1293,7 @@ class YoutubeDL(object): if date is not None: dateRange = self.params.get('daterange', DateRange()) if date not in dateRange: - return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) + return f'{date_from_str(date).isoformat()} upload date is not in range {dateRange}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') @@ -1329,7 +1312,16 @@ class YoutubeDL(object): except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) - if ret is not None: + if ret is NO_DEFAULT: + while True: + filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) + reply = input(self._format_screen( + f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() + if reply in {'y', ''}: + return None + elif reply == 'n': + return f'Skipping {video_title}' + elif ret is not None: return ret return None @@ -1731,6 +1723,7 @@ class YoutubeDL(object): entries.append(entry) try: if entry is not None: + # TODO: Add auto-generated fields self._match_entry(entry, incomplete=True, silent=True) except (ExistingVideoReached, RejectedVideoReached): broken = True @@ -1774,21 +1767,22 @@ class YoutubeDL(object): x_forwarded_for = ie_result.get('__x_forwarded_for_ip') - self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries)) + self.to_screen(f'[{ie_result["extractor"]}] playlist {playlist}: {msg % n_entries}') failures = 0 max_failures = self.params.get('skip_playlist_after_errors') or float('inf') for i, entry_tuple in enumerate(entries, 1): playlist_index, entry = entry_tuple if 'playlist-index' in self.params.get('compat_opts', []): playlist_index = playlistitems[i - 1] if playlistitems else i + playliststart - 1 - self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) + self.to_screen('[download] Downloading video %s of %s' % ( + self._format_screen(i, self.Styles.ID), self._format_screen(n_entries, self.Styles.EMPHASIS))) # This __x_forwarded_for_ip thing is a bit ugly but requires # minimal changes if x_forwarded_for: entry['__x_forwarded_for_ip'] = x_forwarded_for extra = { 'n_entries': n_entries, - '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), + '__last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries), 'playlist_count': ie_result.get('playlist_count'), 'playlist_index': playlist_index, 'playlist_autonumber': i, @@ -1949,7 +1943,7 @@ class YoutubeDL(object): def syntax_error(note, start): message = ( 'Invalid format specification: ' - '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) + '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) return SyntaxError(message) PICKFIRST = 'PICKFIRST' @@ -2053,7 +2047,7 @@ class YoutubeDL(object): raise syntax_error('Expected a selector', start) current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: - raise syntax_error('Operator not recognized: "{0}"'.format(string), start) + raise syntax_error(f'Operator not recognized: "{string}"', start) elif type == tokenize.ENDMARKER: break if current_selector: @@ -2247,13 +2241,13 @@ class YoutubeDL(object): return selector_function(ctx_copy) return final_selector - stream = io.BytesIO(format_spec.encode('utf-8')) + stream = io.BytesIO(format_spec.encode()) try: - tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) + tokens = list(_remove_unused_ops(tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) - class TokenIterator(object): + class TokenIterator: def __init__(self, tokens): self.tokens = tokens self.counter = 0 @@ -2339,13 +2333,17 @@ class YoutubeDL(object): # TODO: move sanitization here if is_video: # playlists are allowed to lack "title" - info_dict['fulltitle'] = info_dict.get('title') - if 'title' not in info_dict: + title = info_dict.get('title', NO_DEFAULT) + if title is NO_DEFAULT: raise ExtractorError('Missing "title" field in extractor result', video_id=info_dict['id'], ie=info_dict['extractor']) - elif not info_dict.get('title'): - self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') - info_dict['title'] = f'{info_dict["extractor"]} video #{info_dict["id"]}' + info_dict['fulltitle'] = title + if not title: + if title == '': + self.write_debug('Extractor gave empty title. Creating a generic title') + else: + self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') + info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: info_dict['duration_string'] = formatSeconds(info_dict['duration']) @@ -2358,11 +2356,9 @@ class YoutubeDL(object): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: # Working around out-of-range timestamp values (e.g. negative ones on Windows, # see http://bugs.python.org/issue1646728) - try: + with contextlib.suppress(ValueError, OverflowError, OSError): upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key]) info_dict[date_key] = upload_date.strftime('%Y%m%d') - except (ValueError, OverflowError, OSError): - pass live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') @@ -2411,7 +2407,7 @@ class YoutubeDL(object): def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) - if field is None or isinstance(field, compat_numeric_types): + if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) @@ -2462,10 +2458,11 @@ class YoutubeDL(object): else: formats = info_dict['formats'] - info_dict['__has_drm'] = any(f.get('has_drm') for f in formats) + # or None ensures --clean-infojson removes it + info_dict['_has_drm'] = any(f.get('has_drm') for f in formats) or None if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] - if info_dict['__has_drm'] and all( + if info_dict['_has_drm'] and all( f.get('acodec') == f.get('vcodec') == 'none' for f in formats): self.report_warning( 'This video is DRM protected and only images are available for download. ' @@ -2653,7 +2650,7 @@ class YoutubeDL(object): if max_downloads_reached: break - write_archive = set(f.get('__write_download_archive', False) for f in formats_to_download) + write_archive = {f.get('__write_download_archive', False) for f in formats_to_download} assert write_archive.issubset({True, False, 'ignore'}) if True in write_archive and False not in write_archive: self.record_download_archive(info_dict) @@ -2721,7 +2718,7 @@ class YoutubeDL(object): for lang in requested_langs: formats = available_subs.get(lang) if formats is None: - self.report_warning('%s subtitles not available for %s' % (lang, video_id)) + self.report_warning(f'{lang} subtitles not available for {video_id}') continue for ext in formats_preference: if ext == 'best': @@ -2764,7 +2761,7 @@ class YoutubeDL(object): tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): - with io.open(filename, 'a', encoding='utf-8') as f: + with open(filename, 'a', encoding='utf-8') as f: f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') def __forced_printings(self, info_dict, filename, incomplete): @@ -2834,7 +2831,7 @@ class YoutubeDL(object): urls = '", "'.join( (f['url'].split(',')[0] + ',<data>' if f['url'].startswith('data:') else f['url']) for f in info.get('requested_formats', []) or [info]) - self.write_debug('Invoking downloader on "%s"' % urls) + self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') # Note: Ideally info should be a deep-copied so that hooks cannot modify it. # But it may contain objects that are not deep-copyable @@ -2929,11 +2926,11 @@ class YoutubeDL(object): else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) - with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: + with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') - except (OSError, IOError): + except OSError: self.report_error('Cannot write annotations file: ' + annofn) return @@ -2952,13 +2949,13 @@ class YoutubeDL(object): return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') - with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', - newline='\r\n' if link_type == 'url' else '\n') as linkfile: + with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', + newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write internet shortcut {linkfn}') return False return True @@ -3023,10 +3020,10 @@ class YoutubeDL(object): return False # Check extension - exts = set(format.get('ext') for format in formats) + exts = {format.get('ext') for format in formats} COMPATIBLE_EXTS = ( - set(('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma')), - set(('webm',)), + {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'}, + {'webm'}, ) for ext_sets in COMPATIBLE_EXTS: if ext_sets.issuperset(exts): @@ -3059,7 +3056,7 @@ class YoutubeDL(object): os.path.splitext(filename)[0] if filename_real_ext in (old_ext, new_ext) else filename) - return '%s.%s' % (filename_wo_ext, ext) + return f'{filename_wo_ext}.{ext}' # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) @@ -3144,10 +3141,10 @@ class YoutubeDL(object): except network_exceptions as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return - except (OSError, IOError) as err: + except OSError as err: raise UnavailableVideoError(err) except (ContentTooShortError, ) as err: - self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return if success and full_filename != '-': @@ -3160,16 +3157,16 @@ class YoutubeDL(object): if fixup_policy in ('ignore', 'never'): return elif fixup_policy == 'warn': - do_fixup = False + do_fixup = 'warn' elif fixup_policy != 'force': assert fixup_policy in ('detect_or_warn', None) if not info_dict.get('__real_download'): do_fixup = False def ffmpeg_fixup(cndn, msg, cls): - if not cndn: + if not (do_fixup and cndn): return - if not do_fixup: + elif do_fixup == 'warn': self.report_warning(f'{vid}: {msg}') return pp = cls(self) @@ -3192,17 +3189,18 @@ class YoutubeDL(object): FFmpegFixupM4aPP) downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None - downloader = downloader.__name__ if downloader else None + downloader = downloader.FD_NAME if downloader else None if info_dict.get('requested_formats') is None: # Not necessary if doing merger - ffmpeg_fixup(downloader == 'HlsFD', + ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') + or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) - ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP) - ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) + ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP) fixup() try: @@ -3235,9 +3233,6 @@ class YoutubeDL(object): res = func(*args, **kwargs) except UnavailableVideoError as e: self.report_error(e) - except MaxDownloadsReached as e: - self.to_screen(f'[info] {e}') - raise except DownloadCancelled as e: self.to_screen(f'[info] {e}') if not self.params.get('break_per_url'): @@ -3292,9 +3287,9 @@ class YoutubeDL(object): info_dict.setdefault('_type', 'video') if remove_private_keys: - reject = lambda k, v: v is None or (k.startswith('_') and k != '_type') or k in { + reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', - 'entries', 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', + 'entries', 'filepath', '_filename', 'infojson_filename', 'original_url', 'playlist_autonumber', } else: reject = lambda k, v: False @@ -3316,6 +3311,17 @@ class YoutubeDL(object): ''' Alias of sanitize_info for backward compatibility ''' return YoutubeDL.sanitize_info(info_dict, actually_filter) + def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): + for filename in set(filter(None, files_to_delete)): + if msg: + self.to_screen(msg % filename) + try: + os.remove(filename) + except OSError: + self.report_warning(f'Unable to delete file {filename}') + if filename in info.get('__files_to_move', []): # NB: Delete even if None + del info['__files_to_move'][filename] + @staticmethod def post_extract(info_dict): def actual_post_extract(info_dict): @@ -3348,14 +3354,8 @@ class YoutubeDL(object): for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') else: - for old_filename in set(files_to_delete): - self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename) - try: - os.remove(encodeFilename(old_filename)) - except (IOError, OSError): - self.report_warning('Unable to remove downloaded original file') - if old_filename in infodict['__files_to_move']: - del infodict['__files_to_move'][old_filename] + self._delete_downloaded_files( + *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict def run_all_pps(self, key, info, *, additional_pps=None): @@ -3397,7 +3397,7 @@ class YoutubeDL(object): break else: return - return '%s %s' % (extractor.lower(), video_id) + return f'{extractor.lower()} {video_id}' def in_download_archive(self, info_dict): fn = self.params.get('download_archive') @@ -3594,7 +3594,7 @@ class YoutubeDL(object): def urlopen(self, req): """ Start an HTTP download """ - if isinstance(req, compat_basestring): + if isinstance(req, str): req = sanitized_Request(req) return self._opener.open(req, timeout=self._socket_timeout) @@ -3605,7 +3605,7 @@ class YoutubeDL(object): def get_encoding(stream): ret = str(getattr(stream, 'encoding', 'missing (%s)' % type(stream).__name__)) if not supports_terminal_sequences(stream): - from .compat import WINDOWS_VT_MODE + from .compat import WINDOWS_VT_MODE # Must be imported locally ret += ' (No VT)' if WINDOWS_VT_MODE is False else ' (No ANSI)' return ret @@ -3652,10 +3652,8 @@ class YoutubeDL(object): if re.match('[0-9a-f]+', out): write_debug('Git HEAD: %s' % out) except Exception: - try: + with contextlib.suppress(Exception): sys.exc_clear() - except Exception: - pass def python_implementation(): impl_name = platform.python_implementation() @@ -3672,7 +3670,7 @@ class YoutubeDL(object): exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: - exe_versions['ffmpeg'] += ' (%s)' % ','.join(ffmpeg_features) + exe_versions['ffmpeg'] += ' (%s)' % ','.join(sorted(ffmpeg_features)) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() @@ -3681,20 +3679,12 @@ class YoutubeDL(object): ) or 'none' write_debug('exe versions: %s' % exe_str) - from .downloader.websocket import has_websockets - from .postprocessor.embedthumbnail import has_mutagen - from .cookies import SQLITE_AVAILABLE, SECRETSTORAGE_AVAILABLE - - lib_str = join_nonempty( - compat_brotli and compat_brotli.__name__, - has_certifi and 'certifi', - compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0], - SECRETSTORAGE_AVAILABLE and 'secretstorage', - has_mutagen and 'mutagen', - SQLITE_AVAILABLE and 'sqlite', - has_websockets and 'websockets', - delim=', ') or 'none' - write_debug('Optional libraries: %s' % lib_str) + from .compat.compat_utils import get_package_info + from .dependencies import available_dependencies + + write_debug('Optional libraries: %s' % (', '.join(sorted({ + join_nonempty(*get_package_info(m)) for m in available_dependencies.values() + })) or 'none')) self._setup_opener() proxy_map = {} @@ -3705,10 +3695,10 @@ class YoutubeDL(object): # Not implemented if False and self.params.get('call_home'): - ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8') + ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug('Public IP address: %s' % ipaddr) latest_version = self.urlopen( - 'https://yt-dl.org/latest/version').read().decode('utf-8') + 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( 'You are using an outdated version (newest version: %s)! ' @@ -3744,7 +3734,7 @@ class YoutubeDL(object): https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) redirect_handler = YoutubeDLRedirectHandler() - data_handler = compat_urllib_request_DataHandler() + data_handler = urllib.request.DataHandler() # When passing our own FileHandler instance, build_opener won't add the # default FileHandler and allows us to disable the file protocol, which @@ -3800,7 +3790,7 @@ class YoutubeDL(object): try: write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) return True - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') return None @@ -3821,9 +3811,9 @@ class YoutubeDL(object): else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') - with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: + with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write {label} description file {descfn}') return None return True @@ -3857,12 +3847,12 @@ class YoutubeDL(object): try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 - with io.open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: + with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) continue - except (OSError, IOError): + except OSError: self.report_error(f'Cannot write video subtitles file {sub_filename}') return None diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 0599af92c..386996e16 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,60 +1,52 @@ #!/usr/bin/python -# coding: utf-8 +f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541 __license__ = 'CC0-1.0' -import codecs -import io import itertools import os -import random import re import sys +from .compat import compat_getpass, compat_os_name, compat_shlex_quote +from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .downloader import FileDownloader +from .extractor import GenericIE, list_extractor_classes +from .extractor.adobepass import MSO_INFO +from .extractor.common import InfoExtractor from .options import parseOpts -from .compat import ( - compat_getpass, - compat_os_name, - compat_shlex_quote, - workaround_optparse_bug9161, +from .postprocessor import ( + FFmpegExtractAudioPP, + FFmpegSubtitlesConvertorPP, + FFmpegThumbnailsConvertorPP, + FFmpegVideoConvertorPP, + FFmpegVideoRemuxerPP, + MetadataFromFieldPP, + MetadataParserPP, ) -from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS +from .update import run_update from .utils import ( + NO_DEFAULT, + POSTPROCESS_WHEN, DateRange, - decodeOption, DownloadCancelled, DownloadError, + GeoUtils, + SameFileError, + decodeOption, expand_path, float_or_none, - GeoUtils, int_or_none, match_filter_func, - NO_DEFAULT, parse_duration, preferredencoding, read_batch_urls, render_table, - SameFileError, setproctitle, std_headers, traverse_obj, write_string, ) -from .downloader import ( - FileDownloader, -) -from .extractor import gen_extractors, list_extractors -from .extractor.common import InfoExtractor -from .extractor.adobepass import MSO_INFO -from .postprocessor import ( - FFmpegExtractAudioPP, - FFmpegSubtitlesConvertorPP, - FFmpegThumbnailsConvertorPP, - FFmpegVideoConvertorPP, - FFmpegVideoRemuxerPP, - MetadataFromFieldPP, - MetadataParserPP, -) from .YoutubeDL import YoutubeDL diff --git a/yt_dlp/__main__.py b/yt_dlp/__main__.py index c9f41473d..c9d275b86 100644 --- a/yt_dlp/__main__.py +++ b/yt_dlp/__main__.py @@ -1,9 +1,6 @@ #!/usr/bin/env python3 -from __future__ import unicode_literals - # Execute with -# $ python yt_dlp/__main__.py (2.6+) -# $ python -m yt_dlp (2.7+) +# $ python -m yt_dlp import sys diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index b37f0dd39..d0e6d7549 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -1,26 +1,17 @@ -from __future__ import unicode_literals - from math import ceil -from .compat import ( - compat_b64decode, - compat_ord, - compat_pycrypto_AES, -) -from .utils import ( - bytes_to_intlist, - intlist_to_bytes, -) - +from .compat import compat_b64decode, compat_ord +from .dependencies import Cryptodome_AES +from .utils import bytes_to_intlist, intlist_to_bytes -if compat_pycrypto_AES: +if Cryptodome_AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ - return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_CBC, iv).decrypt(data) + return Cryptodome_AES.new(key, Cryptodome_AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ - return compat_pycrypto_AES.new(key, compat_pycrypto_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) + return Cryptodome_AES.new(key, Cryptodome_AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): @@ -274,7 +265,7 @@ def aes_decrypt_text(data, password, key_size_bytes): NONCE_LENGTH_BYTES = 8 data = bytes_to_intlist(compat_b64decode(data)) - password = bytes_to_intlist(password.encode('utf-8')) + password = bytes_to_intlist(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) @@ -503,7 +494,7 @@ def ghash(subkey, data): last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): - block = data[i : i + BLOCK_SIZE_BYTES] # noqa: E203 + block = data[i: i + BLOCK_SIZE_BYTES] last_y = block_product(xor(last_y, block), subkey) return last_y diff --git a/yt_dlp/cache.py b/yt_dlp/cache.py index e5cb193bc..e3f8a7dab 100644 --- a/yt_dlp/cache.py +++ b/yt_dlp/cache.py @@ -1,7 +1,5 @@ -from __future__ import unicode_literals - +import contextlib import errno -import io import json import os import re @@ -9,13 +7,10 @@ import shutil import traceback from .compat import compat_getenv -from .utils import ( - expand_path, - write_json_file, -) +from .utils import expand_path, write_json_file -class Cache(object): +class Cache: def __init__(self, ydl): self._ydl = ydl @@ -31,7 +26,7 @@ class Cache(object): 'invalid section %r' % section assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key return os.path.join( - self._get_root_dir(), section, '%s.%s' % (key, dtype)) + self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): @@ -54,8 +49,7 @@ class Cache(object): write_json_file(data, fn) except Exception: tb = traceback.format_exc() - self._ydl.report_warning( - 'Writing cache to %r failed: %s' % (fn, tb)) + self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') def load(self, section, key, dtype='json', default=None): assert dtype in ('json',) @@ -64,20 +58,17 @@ class Cache(object): return default cache_fn = self._get_cache_fn(section, key, dtype) - try: + with contextlib.suppress(OSError): try: - with io.open(cache_fn, 'r', encoding='utf-8') as cachef: + with open(cache_fn, encoding='utf-8') as cachef: self._ydl.write_debug(f'Loading {section}.{key} from cache') return json.load(cachef) except ValueError: try: file_size = os.path.getsize(cache_fn) - except (OSError, IOError) as oe: + except OSError as oe: file_size = str(oe) - self._ydl.report_warning( - 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size)) - except IOError: - pass # No cache available + self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') return default diff --git a/yt_dlp/compat.py b/yt_dlp/compat.py deleted file mode 100644 index 0a0d3b351..000000000 --- a/yt_dlp/compat.py +++ /dev/null @@ -1,330 +0,0 @@ -# coding: utf-8 - -import asyncio -import base64 -import collections -import ctypes -import getpass -import html -import html.parser -import http -import http.client -import http.cookiejar -import http.cookies -import http.server -import itertools -import optparse -import os -import re -import shlex -import shutil -import socket -import struct -import subprocess -import sys -import tokenize -import urllib -import xml.etree.ElementTree as etree -from subprocess import DEVNULL - - -# HTMLParseError has been deprecated in Python 3.3 and removed in -# Python 3.5. Introducing dummy exception for Python >3.5 for compatible -# and uniform cross-version exception handling -class compat_HTMLParseError(Exception): - pass - - -# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE -# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines -def compat_ctypes_WINFUNCTYPE(*args, **kwargs): - return ctypes.WINFUNCTYPE(*args, **kwargs) - - -class _TreeBuilder(etree.TreeBuilder): - def doctype(self, name, pubid, system): - pass - - -def compat_etree_fromstring(text): - return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) - - -compat_os_name = os._name if os.name == 'java' else os.name - - -if compat_os_name == 'nt': - def compat_shlex_quote(s): - return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') -else: - from shlex import quote as compat_shlex_quote - - -def compat_ord(c): - if type(c) is int: - return c - else: - return ord(c) - - -def compat_setenv(key, value, env=os.environ): - env[key] = value - - -if compat_os_name == 'nt' and sys.version_info < (3, 8): - # os.path.realpath on Windows does not follow symbolic links - # prior to Python 3.8 (see https://bugs.python.org/issue9949) - def compat_realpath(path): - while os.path.islink(path): - path = os.path.abspath(os.readlink(path)) - return path -else: - compat_realpath = os.path.realpath - - -def compat_print(s): - assert isinstance(s, compat_str) - print(s) - - -# Fix https://github.com/ytdl-org/youtube-dl/issues/4223 -# See http://bugs.python.org/issue9161 for what is broken -def workaround_optparse_bug9161(): - op = optparse.OptionParser() - og = optparse.OptionGroup(op, 'foo') - try: - og.add_option('-t') - except TypeError: - real_add_option = optparse.OptionGroup.add_option - - def _compat_add_option(self, *args, **kwargs): - enc = lambda v: ( - v.encode('ascii', 'replace') if isinstance(v, compat_str) - else v) - bargs = [enc(a) for a in args] - bkwargs = dict( - (k, enc(v)) for k, v in kwargs.items()) - return real_add_option(self, *bargs, **bkwargs) - optparse.OptionGroup.add_option = _compat_add_option - - -try: - compat_Pattern = re.Pattern -except AttributeError: - compat_Pattern = type(re.compile('')) - - -try: - compat_Match = re.Match -except AttributeError: - compat_Match = type(re.compile('').match('')) - - -try: - compat_asyncio_run = asyncio.run # >= 3.7 -except AttributeError: - def compat_asyncio_run(coro): - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop.run_until_complete(coro) - - asyncio.run = compat_asyncio_run - - -try: # >= 3.7 - asyncio.tasks.all_tasks -except AttributeError: - asyncio.tasks.all_tasks = asyncio.tasks.Task.all_tasks - -try: - import websockets as compat_websockets -except ImportError: - compat_websockets = None - -# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl -# See https://github.com/yt-dlp/yt-dlp/issues/792 -# https://docs.python.org/3/library/os.path.html#os.path.expanduser -if compat_os_name in ('nt', 'ce') and 'HOME' in os.environ: - _userhome = os.environ['HOME'] - - def compat_expanduser(path): - if not path.startswith('~'): - return path - i = path.replace('\\', '/', 1).find('/') # ~user - if i < 0: - i = len(path) - userhome = os.path.join(os.path.dirname(_userhome), path[1:i]) if i > 1 else _userhome - return userhome + path[i:] -else: - compat_expanduser = os.path.expanduser - - -try: - from Cryptodome.Cipher import AES as compat_pycrypto_AES -except ImportError: - try: - from Crypto.Cipher import AES as compat_pycrypto_AES - except ImportError: - compat_pycrypto_AES = None - -try: - import brotlicffi as compat_brotli -except ImportError: - try: - import brotli as compat_brotli - except ImportError: - compat_brotli = None - -WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None - - -def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 - if compat_os_name != 'nt': - return - global WINDOWS_VT_MODE - startupinfo = subprocess.STARTUPINFO() - startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW - try: - subprocess.Popen('', shell=True, startupinfo=startupinfo) - WINDOWS_VT_MODE = True - except Exception: - pass - - -# Deprecated - -compat_basestring = str -compat_chr = chr -compat_filter = filter -compat_input = input -compat_integer_types = (int, ) -compat_kwargs = lambda kwargs: kwargs -compat_map = map -compat_numeric_types = (int, float, complex) -compat_str = str -compat_xpath = lambda xpath: xpath -compat_zip = zip - -compat_collections_abc = collections.abc -compat_HTMLParser = html.parser.HTMLParser -compat_HTTPError = urllib.error.HTTPError -compat_Struct = struct.Struct -compat_b64decode = base64.b64decode -compat_cookiejar = http.cookiejar -compat_cookiejar_Cookie = compat_cookiejar.Cookie -compat_cookies = http.cookies -compat_cookies_SimpleCookie = compat_cookies.SimpleCookie -compat_etree_Element = etree.Element -compat_etree_register_namespace = etree.register_namespace -compat_get_terminal_size = shutil.get_terminal_size -compat_getenv = os.getenv -compat_getpass = getpass.getpass -compat_html_entities = html.entities -compat_html_entities_html5 = compat_html_entities.html5 -compat_http_client = http.client -compat_http_server = http.server -compat_itertools_count = itertools.count -compat_parse_qs = urllib.parse.parse_qs -compat_shlex_split = shlex.split -compat_socket_create_connection = socket.create_connection -compat_struct_pack = struct.pack -compat_struct_unpack = struct.unpack -compat_subprocess_get_DEVNULL = lambda: DEVNULL -compat_tokenize_tokenize = tokenize.tokenize -compat_urllib_error = urllib.error -compat_urllib_parse = urllib.parse -compat_urllib_parse_quote = urllib.parse.quote -compat_urllib_parse_quote_plus = urllib.parse.quote_plus -compat_urllib_parse_unquote = urllib.parse.unquote -compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus -compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes -compat_urllib_parse_urlencode = urllib.parse.urlencode -compat_urllib_parse_urlparse = urllib.parse.urlparse -compat_urllib_parse_urlunparse = urllib.parse.urlunparse -compat_urllib_request = urllib.request -compat_urllib_request_DataHandler = urllib.request.DataHandler -compat_urllib_response = urllib.response -compat_urlparse = urllib.parse -compat_urlretrieve = urllib.request.urlretrieve -compat_xml_parse_error = etree.ParseError - - -# Set public objects - -__all__ = [ - 'WINDOWS_VT_MODE', - 'compat_HTMLParseError', - 'compat_HTMLParser', - 'compat_HTTPError', - 'compat_Match', - 'compat_Pattern', - 'compat_Struct', - 'compat_asyncio_run', - 'compat_b64decode', - 'compat_basestring', - 'compat_brotli', - 'compat_chr', - 'compat_collections_abc', - 'compat_cookiejar', - 'compat_cookiejar_Cookie', - 'compat_cookies', - 'compat_cookies_SimpleCookie', - 'compat_ctypes_WINFUNCTYPE', - 'compat_etree_Element', - 'compat_etree_fromstring', - 'compat_etree_register_namespace', - 'compat_expanduser', - 'compat_filter', - 'compat_get_terminal_size', - 'compat_getenv', - 'compat_getpass', - 'compat_html_entities', - 'compat_html_entities_html5', - 'compat_http_client', - 'compat_http_server', - 'compat_input', - 'compat_integer_types', - 'compat_itertools_count', - 'compat_kwargs', - 'compat_map', - 'compat_numeric_types', - 'compat_ord', - 'compat_os_name', - 'compat_parse_qs', - 'compat_print', - 'compat_pycrypto_AES', - 'compat_realpath', - 'compat_setenv', - 'compat_shlex_quote', - 'compat_shlex_split', - 'compat_socket_create_connection', - 'compat_str', - 'compat_struct_pack', - 'compat_struct_unpack', - 'compat_subprocess_get_DEVNULL', - 'compat_tokenize_tokenize', - 'compat_urllib_error', - 'compat_urllib_parse', - 'compat_urllib_parse_quote', - 'compat_urllib_parse_quote_plus', - 'compat_urllib_parse_unquote', - 'compat_urllib_parse_unquote_plus', - 'compat_urllib_parse_unquote_to_bytes', - 'compat_urllib_parse_urlencode', - 'compat_urllib_parse_urlparse', - 'compat_urllib_parse_urlunparse', - 'compat_urllib_request', - 'compat_urllib_request_DataHandler', - 'compat_urllib_response', - 'compat_urlparse', - 'compat_urlretrieve', - 'compat_websockets', - 'compat_xml_parse_error', - 'compat_xpath', - 'compat_zip', - 'windows_enable_vt_mode', - 'workaround_optparse_bug9161', -] diff --git a/yt_dlp/compat/__init__.py b/yt_dlp/compat/__init__.py new file mode 100644 index 000000000..a0cd62110 --- /dev/null +++ b/yt_dlp/compat/__init__.py @@ -0,0 +1,90 @@ +import contextlib +import os +import subprocess +import sys +import warnings +import xml.etree.ElementTree as etree + +from . import re +from ._deprecated import * # noqa: F401, F403 +from .compat_utils import passthrough_module + + +# XXX: Implement this the same way as other DeprecationWarnings without circular import +passthrough_module(__name__, '._legacy', callback=lambda attr: warnings.warn( + DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=2)) +del passthrough_module + + +# HTMLParseError has been deprecated in Python 3.3 and removed in +# Python 3.5. Introducing dummy exception for Python >3.5 for compatible +# and uniform cross-version exception handling +class compat_HTMLParseError(Exception): + pass + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + + +def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) + + +compat_os_name = os._name if os.name == 'java' else os.name + + +if compat_os_name == 'nt': + def compat_shlex_quote(s): + return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"') +else: + from shlex import quote as compat_shlex_quote # noqa: F401 + + +def compat_ord(c): + return c if isinstance(c, int) else ord(c) + + +if compat_os_name == 'nt' and sys.version_info < (3, 8): + # os.path.realpath on Windows does not follow symbolic links + # prior to Python 3.8 (see https://bugs.python.org/issue9949) + def compat_realpath(path): + while os.path.islink(path): + path = os.path.abspath(os.readlink(path)) + return path +else: + compat_realpath = os.path.realpath + + +# Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl +# See https://github.com/yt-dlp/yt-dlp/issues/792 +# https://docs.python.org/3/library/os.path.html#os.path.expanduser +if compat_os_name in ('nt', 'ce'): + def compat_expanduser(path): + HOME = os.environ.get('HOME') + if not HOME: + return os.path.expanduser(path) + elif not path.startswith('~'): + return path + i = path.replace('\\', '/', 1).find('/') # ~user + if i < 0: + i = len(path) + userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME + return userhome + path[i:] +else: + compat_expanduser = os.path.expanduser + + +WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None + + +def windows_enable_vt_mode(): # TODO: Do this the proper way https://bugs.python.org/issue30075 + if compat_os_name != 'nt': + return + global WINDOWS_VT_MODE + startupinfo = subprocess.STARTUPINFO() + startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW + with contextlib.suppress(Exception): + subprocess.Popen('', shell=True, startupinfo=startupinfo).wait() + WINDOWS_VT_MODE = True diff --git a/yt_dlp/compat/_deprecated.py b/yt_dlp/compat/_deprecated.py new file mode 100644 index 000000000..390f76577 --- /dev/null +++ b/yt_dlp/compat/_deprecated.py @@ -0,0 +1,52 @@ +"""Deprecated - New code should avoid these""" + +import base64 +import getpass +import html +import html.parser +import http +import http.client +import http.cookiejar +import http.cookies +import http.server +import itertools +import os +import shutil +import struct +import tokenize +import urllib + +compat_b64decode = base64.b64decode +compat_chr = chr +compat_cookiejar = http.cookiejar +compat_cookiejar_Cookie = http.cookiejar.Cookie +compat_cookies_SimpleCookie = http.cookies.SimpleCookie +compat_get_terminal_size = shutil.get_terminal_size +compat_getenv = os.getenv +compat_getpass = getpass.getpass +compat_html_entities = html.entities +compat_html_entities_html5 = html.entities.html5 +compat_HTMLParser = html.parser.HTMLParser +compat_http_client = http.client +compat_http_server = http.server +compat_HTTPError = urllib.error.HTTPError +compat_itertools_count = itertools.count +compat_parse_qs = urllib.parse.parse_qs +compat_str = str +compat_struct_pack = struct.pack +compat_struct_unpack = struct.unpack +compat_tokenize_tokenize = tokenize.tokenize +compat_urllib_error = urllib.error +compat_urllib_parse_unquote = urllib.parse.unquote +compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus +compat_urllib_parse_urlencode = urllib.parse.urlencode +compat_urllib_parse_urlparse = urllib.parse.urlparse +compat_urllib_request = urllib.request +compat_urlparse = compat_urllib_parse = urllib.parse + + +def compat_setenv(key, value, env=os.environ): + env[key] = value + + +__all__ = [x for x in globals() if x.startswith('compat_')] diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py new file mode 100644 index 000000000..ce24760e5 --- /dev/null +++ b/yt_dlp/compat/_legacy.py @@ -0,0 +1,57 @@ +""" Do not use! """ + +import collections +import ctypes +import http +import http.client +import http.cookiejar +import http.cookies +import http.server +import shlex +import socket +import struct +import urllib +import xml.etree.ElementTree as etree +from subprocess import DEVNULL + +from .asyncio import run as compat_asyncio_run # noqa: F401 +from .re import Pattern as compat_Pattern # noqa: F401 +from .re import match as compat_Match # noqa: F401 +from ..dependencies import Cryptodome_AES as compat_pycrypto_AES # noqa: F401 +from ..dependencies import brotli as compat_brotli # noqa: F401 +from ..dependencies import websockets as compat_websockets # noqa: F401 + + +# compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE +# will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines +def compat_ctypes_WINFUNCTYPE(*args, **kwargs): + return ctypes.WINFUNCTYPE(*args, **kwargs) + + +compat_basestring = str +compat_collections_abc = collections.abc +compat_cookies = http.cookies +compat_etree_Element = etree.Element +compat_etree_register_namespace = etree.register_namespace +compat_filter = filter +compat_input = input +compat_integer_types = (int, ) +compat_kwargs = lambda kwargs: kwargs +compat_map = map +compat_numeric_types = (int, float, complex) +compat_print = print +compat_shlex_split = shlex.split +compat_socket_create_connection = socket.create_connection +compat_Struct = struct.Struct +compat_subprocess_get_DEVNULL = lambda: DEVNULL +compat_urllib_parse_quote = urllib.parse.quote +compat_urllib_parse_quote_plus = urllib.parse.quote_plus +compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes +compat_urllib_parse_urlunparse = urllib.parse.urlunparse +compat_urllib_request_DataHandler = urllib.request.DataHandler +compat_urllib_response = urllib.response +compat_urlretrieve = urllib.request.urlretrieve +compat_xml_parse_error = etree.ParseError +compat_xpath = lambda xpath: xpath +compat_zip = zip +workaround_optparse_bug9161 = lambda: None diff --git a/yt_dlp/compat/asyncio.py b/yt_dlp/compat/asyncio.py new file mode 100644 index 000000000..c61e5c8fd --- /dev/null +++ b/yt_dlp/compat/asyncio.py @@ -0,0 +1,23 @@ +# flake8: noqa: F405 +from asyncio import * # noqa: F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 'asyncio') +del passthrough_module + +try: + run # >= 3.7 +except NameError: + def run(coro): + try: + loop = get_event_loop() + except RuntimeError: + loop = new_event_loop() + set_event_loop(loop) + loop.run_until_complete(coro) + +try: + all_tasks # >= 3.7 +except NameError: + all_tasks = Task.all_tasks diff --git a/yt_dlp/compat/compat_utils.py b/yt_dlp/compat/compat_utils.py new file mode 100644 index 000000000..b1d58f5b9 --- /dev/null +++ b/yt_dlp/compat/compat_utils.py @@ -0,0 +1,60 @@ +import collections +import contextlib +import importlib +import sys +import types + + +_NO_ATTRIBUTE = object() + +_Package = collections.namedtuple('Package', ('name', 'version')) + + +def get_package_info(module): + parent = module.__name__.split('.')[0] + parent_module = None + with contextlib.suppress(ImportError): + parent_module = importlib.import_module(parent) + + for attr in ('__version__', 'version_string', 'version'): + version = getattr(parent_module, attr, None) + if version is not None: + break + return _Package(getattr(module, '_yt_dlp__identifier', parent), str(version)) + + +def _is_package(module): + try: + module.__getattribute__('__path__') + except AttributeError: + return False + return True + + +def passthrough_module(parent, child, *, callback=lambda _: None): + parent_module = importlib.import_module(parent) + child_module = importlib.import_module(child, parent) + + class PassthroughModule(types.ModuleType): + def __getattr__(self, attr): + if _is_package(parent_module): + with contextlib.suppress(ImportError): + return importlib.import_module(f'.{attr}', parent) + + ret = _NO_ATTRIBUTE + with contextlib.suppress(AttributeError): + ret = getattr(child_module, attr) + + if _is_package(child_module): + with contextlib.suppress(ImportError): + ret = importlib.import_module(f'.{attr}', child) + + if ret is _NO_ATTRIBUTE: + raise AttributeError(f'module {parent} has no attribute {attr}') + + callback(attr) + return ret + + # Python 3.6 does not have module level __getattr__ + # https://peps.python.org/pep-0562/ + sys.modules[parent].__class__ = PassthroughModule diff --git a/yt_dlp/compat/re.py b/yt_dlp/compat/re.py new file mode 100644 index 000000000..e1d3a2645 --- /dev/null +++ b/yt_dlp/compat/re.py @@ -0,0 +1,18 @@ +# flake8: noqa: F405 +from re import * # F403 + +from .compat_utils import passthrough_module + +passthrough_module(__name__, 're') +del passthrough_module + +try: + Pattern # >= 3.7 +except NameError: + Pattern = type(compile('')) + + +try: + Match # >= 3.7 +except NameError: + Match = type(compile('').match('')) diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index 3476595d3..1598828f2 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -7,6 +7,7 @@ import struct import subprocess import sys import tempfile +import time from datetime import datetime, timedelta, timezone from enum import Enum, auto from hashlib import pbkdf2_hmac @@ -16,39 +17,14 @@ from .aes import ( aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) -from .compat import ( - compat_b64decode, - compat_cookiejar_Cookie, +from .compat import compat_b64decode, compat_cookiejar_Cookie +from .dependencies import ( + _SECRETSTORAGE_UNAVAILABLE_REASON, + secretstorage, + sqlite3, ) from .minicurses import MultilinePrinter, QuietMultilinePrinter -from .utils import ( - error_to_str, - expand_path, - Popen, - YoutubeDLCookieJar, -) - -try: - import sqlite3 - SQLITE_AVAILABLE = True -except ImportError: - # although sqlite3 is part of the standard library, it is possible to compile python without - # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 - SQLITE_AVAILABLE = False - - -try: - import secretstorage - SECRETSTORAGE_AVAILABLE = True -except ImportError: - SECRETSTORAGE_AVAILABLE = False - SECRETSTORAGE_UNAVAILABLE_REASON = ( - 'as the `secretstorage` module is not installed. ' - 'Please install by running `python3 -m pip install secretstorage`.') -except Exception as _err: - SECRETSTORAGE_AVAILABLE = False - SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' - +from .utils import Popen, YoutubeDLCookieJar, error_to_str, expand_path CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} @@ -74,6 +50,14 @@ class YDLLogger: if self._ydl: self._ydl.report_error(message) + class ProgressBar(MultilinePrinter): + _DELAY, _timer = 0.1, 0 + + def print(self, message): + if time.time() - self._timer > self._DELAY: + self.print_at_line(f'[Cookies] {message}', 0) + self._timer = time.time() + def progress_bar(self): """Return a context manager with a print method. (Optional)""" # Do not print to files/pipes, loggers, or when --no-progress is used @@ -85,10 +69,7 @@ class YDLLogger: return except BaseException: return - - printer = MultilinePrinter(file, preserve_output=False) - printer.print = lambda message: printer.print_at_line(f'[Cookies] {message}', 0) - return printer + return self.ProgressBar(file, preserve_output=False) def _create_progress_bar(logger): @@ -108,9 +89,12 @@ def load_cookies(cookie_file, browser_specification, ydl): cookie_jars.append(extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring)) if cookie_file is not None: - cookie_file = expand_path(cookie_file) + is_filename = YoutubeDLCookieJar.is_path(cookie_file) + if is_filename: + cookie_file = expand_path(cookie_file) + jar = YoutubeDLCookieJar(cookie_file) - if os.access(cookie_file, os.R_OK): + if not is_filename or os.access(cookie_file, os.R_OK): jar.load(ignore_discard=True, ignore_expires=True) cookie_jars.append(jar) @@ -125,12 +109,12 @@ def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), elif browser_name in CHROMIUM_BASED_BROWSERS: return _extract_chrome_cookies(browser_name, profile, keyring, logger) else: - raise ValueError('unknown browser: {}'.format(browser_name)) + raise ValueError(f'unknown browser: {browser_name}') def _extract_firefox_cookies(profile, logger): logger.info('Extracting cookies from firefox') - if not SQLITE_AVAILABLE: + if not sqlite3: logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() @@ -144,8 +128,8 @@ def _extract_firefox_cookies(profile, logger): cookie_database_path = _find_most_recently_used_file(search_root, 'cookies.sqlite', logger) if cookie_database_path is None: - raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) - logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None @@ -164,7 +148,7 @@ def _extract_firefox_cookies(profile, logger): path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, comment=None, comment_url=None, rest={}) jar.set_cookie(cookie) - logger.info('Extracted {} cookies from firefox'.format(len(jar))) + logger.info(f'Extracted {len(jar)} cookies from firefox') return jar finally: if cursor is not None: @@ -175,11 +159,11 @@ def _firefox_browser_dir(): if sys.platform in ('linux', 'linux2'): return os.path.expanduser('~/.mozilla/firefox') elif sys.platform == 'win32': - return os.path.expandvars(r'%APPDATA%\Mozilla\Firefox\Profiles') + return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') elif sys.platform == 'darwin': return os.path.expanduser('~/Library/Application Support/Firefox') else: - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') def _get_chromium_based_browser_settings(browser_name): @@ -199,12 +183,12 @@ def _get_chromium_based_browser_settings(browser_name): appdata_local = os.path.expandvars('%LOCALAPPDATA%') appdata_roaming = os.path.expandvars('%APPDATA%') browser_dir = { - 'brave': os.path.join(appdata_local, r'BraveSoftware\Brave-Browser\User Data'), - 'chrome': os.path.join(appdata_local, r'Google\Chrome\User Data'), - 'chromium': os.path.join(appdata_local, r'Chromium\User Data'), - 'edge': os.path.join(appdata_local, r'Microsoft\Edge\User Data'), - 'opera': os.path.join(appdata_roaming, r'Opera Software\Opera Stable'), - 'vivaldi': os.path.join(appdata_local, r'Vivaldi\User Data'), + 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'), + 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'), + 'chromium': os.path.join(appdata_local, R'Chromium\User Data'), + 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), + 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), + 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), }[browser_name] elif sys.platform == 'darwin': @@ -219,7 +203,7 @@ def _get_chromium_based_browser_settings(browser_name): }[browser_name] else: - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" @@ -242,11 +226,11 @@ def _get_chromium_based_browser_settings(browser_name): def _extract_chrome_cookies(browser_name, profile, keyring, logger): - logger.info('Extracting cookies from {}'.format(browser_name)) + logger.info(f'Extracting cookies from {browser_name}') - if not SQLITE_AVAILABLE: - logger.warning(('Cannot extract cookies from {} without sqlite3 support. ' - 'Please use a python interpreter compiled with sqlite3 support').format(browser_name)) + if not sqlite3: + logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' + 'Please use a python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() config = _get_chromium_based_browser_settings(browser_name) @@ -260,13 +244,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): if config['supports_profiles']: search_root = os.path.join(config['browser_dir'], profile) else: - logger.error('{} does not support profiles'.format(browser_name)) + logger.error(f'{browser_name} does not support profiles') search_root = config['browser_dir'] cookie_database_path = _find_most_recently_used_file(search_root, 'Cookies', logger) if cookie_database_path is None: - raise FileNotFoundError('could not find {} cookies database in "{}"'.format(browser_name, search_root)) - logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) + raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') + logger.debug(f'Extracting cookies from: "{cookie_database_path}"') decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) @@ -277,8 +261,7 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): cursor.connection.text_factory = bytes column_names = _get_column_names(cursor, 'cookies') secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' - cursor.execute('SELECT host_key, name, value, encrypted_value, path, ' - 'expires_utc, {} FROM cookies'.format(secure_column)) + cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies') jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 @@ -295,13 +278,13 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): unencrypted_cookies += 1 jar.set_cookie(cookie) if failed_cookies > 0: - failed_message = ' ({} could not be decrypted)'.format(failed_cookies) + failed_message = f' ({failed_cookies} could not be decrypted)' else: failed_message = '' - logger.info('Extracted {} cookies from {}{}'.format(len(jar), browser_name, failed_message)) - counts = decryptor.cookie_counts.copy() + logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}') + counts = decryptor._cookie_counts.copy() counts['unencrypted'] = unencrypted_cookies - logger.debug('cookie version breakdown: {}'.format(counts)) + logger.debug(f'cookie version breakdown: {counts}') return jar finally: if cursor is not None: @@ -309,10 +292,10 @@ def _extract_chrome_cookies(browser_name, profile, keyring, logger): def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): - host_key = host_key.decode('utf-8') - name = name.decode('utf-8') - value = value.decode('utf-8') - path = path.decode('utf-8') + host_key = host_key.decode() + name = name.decode() + value = value.decode() + path = path.decode() is_encrypted = not value and encrypted_value if is_encrypted: @@ -353,12 +336,10 @@ class ChromeCookieDecryptor: - KeyStorageLinux::CreateService """ - def decrypt(self, encrypted_value): - raise NotImplementedError + _cookie_counts = {} - @property - def cookie_counts(self): - raise NotImplementedError + def decrypt(self, encrypted_value): + raise NotImplementedError('Must be implemented by sub classes') def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): @@ -369,8 +350,7 @@ def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring= elif sys.platform == 'win32': return WindowsChromeCookieDecryptor(browser_root, logger) else: - raise NotImplementedError('Chrome cookie decryption is not supported ' - 'on this platform: {}'.format(sys.platform)) + raise NotImplementedError(f'Chrome cookie decryption is not supported on this platform: {sys.platform}') class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): @@ -387,10 +367,6 @@ class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_linux.cc return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) - @property - def cookie_counts(self): - return self._cookie_counts - def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] @@ -424,10 +400,6 @@ class MacChromeCookieDecryptor(ChromeCookieDecryptor): # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_mac.mm return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) - @property - def cookie_counts(self): - return self._cookie_counts - def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] @@ -453,10 +425,6 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): self._v10_key = _get_windows_v10_key(browser_root, logger) self._cookie_counts = {'v10': 0, 'other': 0} - @property - def cookie_counts(self): - return self._cookie_counts - def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] @@ -485,14 +453,14 @@ class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): self._cookie_counts['other'] += 1 # any other prefix means the data is DPAPI encrypted # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/os_crypt_win.cc - return _decrypt_windows_dpapi(encrypted_value, self._logger).decode('utf-8') + return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() def _extract_safari_cookies(profile, logger): if profile is not None: logger.error('safari does not support profiles') if sys.platform != 'darwin': - raise ValueError('unsupported platform: {}'.format(sys.platform)) + raise ValueError(f'unsupported platform: {sys.platform}') cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') @@ -506,7 +474,7 @@ def _extract_safari_cookies(profile, logger): cookies_data = f.read() jar = parse_safari_cookies(cookies_data, logger=logger) - logger.info('Extracted {} cookies from safari'.format(len(jar))) + logger.info(f'Extracted {len(jar)} cookies from safari') return jar @@ -522,7 +490,7 @@ class DataParser: def read_bytes(self, num_bytes): if num_bytes < 0: - raise ParserError('invalid read of {} bytes'.format(num_bytes)) + raise ParserError(f'invalid read of {num_bytes} bytes') end = self.cursor + num_bytes if end > len(self._data): raise ParserError('reached end of input') @@ -533,7 +501,7 @@ class DataParser: def expect_bytes(self, expected_value, message): value = self.read_bytes(len(expected_value)) if value != expected_value: - raise ParserError('unexpected value: {} != {} ({})'.format(value, expected_value, message)) + raise ParserError(f'unexpected value: {value} != {expected_value} ({message})') def read_uint(self, big_endian=False): data_format = '>I' if big_endian else '<I' @@ -548,16 +516,15 @@ class DataParser: while True: c = self.read_bytes(1) if c == b'\x00': - return b''.join(buffer).decode('utf-8') + return b''.join(buffer).decode() else: buffer.append(c) def skip(self, num_bytes, description='unknown'): if num_bytes > 0: - self._logger.debug('skipping {} bytes ({}): {}'.format( - num_bytes, description, self.read_bytes(num_bytes))) + self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}') elif num_bytes < 0: - raise ParserError('invalid skip of {} bytes'.format(num_bytes)) + raise ParserError(f'invalid skip of {num_bytes} bytes') def skip_to(self, offset, description='unknown'): self.skip(offset - self.cursor, description) @@ -584,7 +551,7 @@ def _parse_safari_cookies_page(data, jar, logger): number_of_cookies = p.read_uint() record_offsets = [p.read_uint() for _ in range(number_of_cookies)] if number_of_cookies == 0: - logger.debug('a cookies page of size {} has no cookies'.format(len(data))) + logger.debug(f'a cookies page of size {len(data)} has no cookies') return p.skip_to(record_offsets[0], 'unknown page header field') @@ -730,7 +697,7 @@ def _choose_linux_keyring(logger): SelectBackend """ desktop_environment = _get_linux_desktop_environment(os.environ) - logger.debug('detected desktop environment: {}'.format(desktop_environment.name)) + logger.debug(f'detected desktop environment: {desktop_environment.name}') if desktop_environment == _LinuxDesktopEnvironment.KDE: linux_keyring = _LinuxKeyring.KWALLET elif desktop_environment == _LinuxDesktopEnvironment.OTHER: @@ -763,11 +730,11 @@ def _get_kwallet_network_wallet(logger): logger.warning('failed to read NetworkWallet') return default_wallet else: - network_wallet = stdout.decode('utf-8').strip() - logger.debug('NetworkWallet = "{}"'.format(network_wallet)) + network_wallet = stdout.decode().strip() + logger.debug(f'NetworkWallet = "{network_wallet}"') return network_wallet except Exception as e: - logger.warning('exception while obtaining NetworkWallet: {}'.format(e)) + logger.warning(f'exception while obtaining NetworkWallet: {e}') return default_wallet @@ -785,15 +752,15 @@ def _get_kwallet_password(browser_keyring_name, logger): try: proc = Popen([ 'kwallet-query', - '--read-password', '{} Safe Storage'.format(browser_keyring_name), - '--folder', '{} Keys'.format(browser_keyring_name), + '--read-password', f'{browser_keyring_name} Safe Storage', + '--folder', f'{browser_keyring_name} Keys', network_wallet ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) stdout, stderr = proc.communicate_or_kill() if proc.returncode != 0: - logger.error('kwallet-query failed with return code {}. Please consult ' - 'the kwallet-query man page for details'.format(proc.returncode)) + logger.error(f'kwallet-query failed with return code {proc.returncode}. Please consult ' + 'the kwallet-query man page for details') return b'' else: if stdout.lower().startswith(b'failed to read'): @@ -817,8 +784,8 @@ def _get_kwallet_password(browser_keyring_name, logger): def _get_gnome_keyring_password(browser_keyring_name, logger): - if not SECRETSTORAGE_AVAILABLE: - logger.error('secretstorage not available {}'.format(SECRETSTORAGE_UNAVAILABLE_REASON)) + if not secretstorage: + logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}') return b'' # the Gnome keyring does not seem to organise keys in the same way as KWallet, # using `dbus-monitor` during startup, it can be observed that chromium lists all keys @@ -827,7 +794,7 @@ def _get_gnome_keyring_password(browser_keyring_name, logger): with contextlib.closing(secretstorage.dbus_init()) as con: col = secretstorage.get_default_collection(con) for item in col.get_all_items(): - if item.get_label() == '{} Safe Storage'.format(browser_keyring_name): + if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() else: logger.error('failed to read from keyring') @@ -861,7 +828,7 @@ def _get_mac_keyring_password(browser_keyring_name, logger): ['security', 'find-generic-password', '-w', # write password to stdout '-a', browser_keyring_name, # match 'account' - '-s', '{} Safe Storage'.format(browser_keyring_name)], # match 'service' + '-s', f'{browser_keyring_name} Safe Storage'], # match 'service' stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) stdout, stderr = proc.communicate_or_kill() @@ -879,7 +846,7 @@ def _get_windows_v10_key(browser_root, logger): logger.error('could not find local state file') return None logger.debug(f'Found local state file at "{path}"') - with open(path, 'r', encoding='utf8') as f: + with open(path, encoding='utf8') as f: data = json.load(f) try: base64_key = data['os_crypt']['encrypted_key'] @@ -901,7 +868,7 @@ def pbkdf2_sha1(password, salt, iterations, key_length): def _decrypt_aes_cbc(ciphertext, key, logger, initialization_vector=b' ' * 16): plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -915,7 +882,7 @@ def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): return None try: - return plaintext.decode('utf-8') + return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None @@ -966,8 +933,8 @@ def _open_database_copy(database_path, tmpdir): def _get_column_names(cursor, table_name): - table_info = cursor.execute('PRAGMA table_info({})'.format(table_name)).fetchall() - return [row[1].decode('utf-8') for row in table_info] + table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() + return [row[1].decode() for row in table_info] def _find_most_recently_used_file(root, filename, logger): diff --git a/yt_dlp/dependencies.py b/yt_dlp/dependencies.py new file mode 100644 index 000000000..772cfb576 --- /dev/null +++ b/yt_dlp/dependencies.py @@ -0,0 +1,97 @@ +# flake8: noqa: F401 +"""Imports all optional dependencies for the project. +An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambigious namespace""" + +try: + import brotlicffi as brotli +except ImportError: + try: + import brotli + except ImportError: + brotli = None + + +try: + import certifi +except ImportError: + certifi = None +else: + from os.path import exists as _path_exists + + # The certificate may not be bundled in executable + if not _path_exists(certifi.where()): + certifi = None + + +try: + from Cryptodome.Cipher import AES as Cryptodome_AES +except ImportError: + try: + from Crypto.Cipher import AES as Cryptodome_AES + except ImportError: + Cryptodome_AES = None + else: + try: + # In pycrypto, mode defaults to ECB. See: + # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode + Cryptodome_AES.new(b'abcdefghijklmnop') + except TypeError: + pass + else: + Cryptodome_AES._yt_dlp__identifier = 'pycrypto' + + +try: + import mutagen +except ImportError: + mutagen = None + + +secretstorage = None +try: + import secretstorage + _SECRETSTORAGE_UNAVAILABLE_REASON = None +except ImportError: + _SECRETSTORAGE_UNAVAILABLE_REASON = ( + 'as the `secretstorage` module is not installed. ' + 'Please install by running `python3 -m pip install secretstorage`') +except Exception as _err: + _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' + + +try: + import sqlite3 +except ImportError: + # although sqlite3 is part of the standard library, it is possible to compile python without + # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 + sqlite3 = None + + +try: + import websockets +except (ImportError, SyntaxError): + # websockets 3.10 on python 3.6 causes SyntaxError + # See https://github.com/yt-dlp/yt-dlp/issues/2633 + websockets = None + + +try: + import xattr # xattr or pyxattr +except ImportError: + xattr = None +else: + if hasattr(xattr, 'set'): # pyxattr + xattr._yt_dlp__identifier = 'pyxattr' + + +all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} + + +available_dependencies = {k: v for k, v in all_dependencies.items() if v} + + +__all__ = [ + 'all_dependencies', + 'available_dependencies', + *all_dependencies.keys(), +] diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 96d484dee..5aba303dd 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -1,10 +1,5 @@ -from __future__ import unicode_literals - from ..compat import compat_str -from ..utils import ( - determine_protocol, - NO_DEFAULT -) +from ..utils import NO_DEFAULT, determine_protocol def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): @@ -29,21 +24,18 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N # Some of these require get_suitable_downloader from .common import FileDownloader from .dash import DashSegmentsFD +from .external import FFmpegFD, get_external_downloader from .f4m import F4mFD from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD -from .rtmp import RtmpFD -from .rtsp import RtspFD from .ism import IsmFD from .mhtml import MhtmlFD from .niconico import NiconicoDmcFD +from .rtmp import RtmpFD +from .rtsp import RtspFD from .websocket import WebSocketFragmentFD from .youtube_live_chat import YoutubeLiveChatFD -from .external import ( - get_external_downloader, - FFmpegFD, -) PROTOCOL_MAP = { 'rtmp': RtmpFD, diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index cbfea7a65..1f14ebb3a 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -1,31 +1,32 @@ -from __future__ import division, unicode_literals - +import contextlib +import errno import os +import random import re import time -import random -import errno +from ..minicurses import ( + BreaklineStatusPrinter, + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, +) from ..utils import ( + NUMBER_RE, + LockingUnsupportedError, + Namespace, decodeArgument, encodeFilename, error_to_compat_str, format_bytes, - LockingUnsupportedError, sanitize_open, shell_quote, timeconvert, timetuple_from_msec, ) -from ..minicurses import ( - MultilineLogger, - MultilinePrinter, - QuietMultilinePrinter, - BreaklineStatusPrinter -) -class FileDownloader(object): +class FileDownloader: """File Downloader class. File downloader objects are the ones responsible of downloading the @@ -72,12 +73,35 @@ class FileDownloader(object): def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" - self.ydl = ydl + self._set_ydl(ydl) self._progress_hooks = [] self.params = params self._prepare_multiline_status() self.add_progress_hook(self.report_progress) + def _set_ydl(self, ydl): + self.ydl = ydl + + for func in ( + 'deprecation_warning', + 'report_error', + 'report_file_already_downloaded', + 'report_warning', + 'to_console_title', + 'to_stderr', + 'trouble', + 'write_debug', + ): + if not hasattr(self, func): + setattr(self, func, getattr(ydl, func)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) + + @property + def FD_NAME(self): + return re.sub(r'(?<!^)(?=[A-Z])', '_', type(self).__name__[:-2]).lower() + @staticmethod def format_seconds(seconds): time = timetuple_from_msec(seconds * 1000) @@ -152,34 +176,13 @@ class FileDownloader(object): @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return int(round(number * multiplier)) - def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) - - def to_stderr(self, message): - self.ydl.to_stderr(message) - - def to_console_title(self, message): - self.ydl.to_console_title(message) - - def trouble(self, *args, **kargs): - self.ydl.trouble(*args, **kargs) - - def report_warning(self, *args, **kargs): - self.ydl.report_warning(*args, **kargs) - - def report_error(self, *args, **kargs): - self.ydl.report_error(*args, **kargs) - - def write_debug(self, *args, **kargs): - self.ydl.write_debug(*args, **kargs) - def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit') @@ -219,7 +222,7 @@ class FileDownloader(object): while True: try: return func(self, *args, **kwargs) - except (IOError, OSError) as err: + except OSError as err: retry = retry + 1 if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL): if not fatal: @@ -265,10 +268,8 @@ class FileDownloader(object): # Ignore obviously invalid dates if filetime == 0: return - try: + with contextlib.suppress(Exception): os.utime(filename, (time.time(), filetime)) - except Exception: - pass return filetime def report_destination(self, filename): @@ -289,18 +290,18 @@ class FileDownloader(object): def _finish_multiline_status(self): self._multiline.end() - _progress_styles = { - 'downloaded_bytes': 'light blue', - 'percent': 'light blue', - 'eta': 'yellow', - 'speed': 'green', - 'elapsed': 'bold white', - 'total_bytes': '', - 'total_bytes_estimate': '', - } + ProgressStyles = Namespace( + downloaded_bytes='light blue', + percent='light blue', + eta='yellow', + speed='green', + elapsed='bold white', + total_bytes='', + total_bytes_estimate='', + ) def _report_progress_status(self, s, default_template): - for name, style in self._progress_styles.items(): + for name, style in self.ProgressStyles._asdict().items(): name = f'_{name}_str' if name not in s: continue @@ -393,10 +394,6 @@ class FileDownloader(object): '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' % (error_to_compat_str(err), count, self.format_retries(retries))) - def report_file_already_downloaded(self, *args, **kwargs): - """Report file has already been fully downloaded.""" - return self.ydl.report_file_already_downloaded(*args, **kwargs) - def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') @@ -435,25 +432,16 @@ class FileDownloader(object): self._finish_multiline_status() return True, False - if subtitle is False: - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) + if subtitle: + sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: - sleep_interval_sub = 0 - if type(self.params.get('sleep_interval_subtitles')) is int: - sleep_interval_sub = self.params.get('sleep_interval_subtitles') - if sleep_interval_sub > 0: - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - sleep_interval_sub)) - time.sleep(sleep_interval_sub) + min_sleep_interval = self.params.get('sleep_interval') or 0 + sleep_interval = random.uniform( + min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) + if sleep_interval > 0: + self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') + time.sleep(sleep_interval) + ret = self.real_download(filename, info_dict) self._finish_multiline_status() return ret, True @@ -486,4 +474,4 @@ class FileDownloader(object): if exe is None: exe = os.path.basename(str_args[0]) - self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) + self.write_debug(f'{exe} command line: {shell_quote(str_args)}') diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index a845ee7d3..e6efae485 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,9 +1,7 @@ -from __future__ import unicode_literals import time -from ..downloader import get_suitable_downloader from .fragment import FragmentFD - +from ..downloader import get_suitable_downloader from ..utils import urljoin @@ -46,7 +44,7 @@ class DashSegmentsFD(FragmentFD): if real_downloader: self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') info_dict['fragments'] = list(fragments_to_download) fd = real_downloader(self.ydl, self.params) return fd.real_download(filename, info_dict) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 71af705ea..85c6a6977 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os.path import re import subprocess @@ -7,24 +5,22 @@ import sys import time from .fragment import FragmentFD -from ..compat import ( - compat_setenv, - compat_str, -) -from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS +from ..compat import compat_setenv, compat_str +from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( + Popen, + _configuration_args, + check_executable, classproperty, + cli_bool_option, cli_option, cli_valueless_option, - cli_bool_option, - _configuration_args, determine_ext, - encodeFilename, encodeArgument, + encodeFilename, handle_youtubedl_headers, - check_executable, - Popen, remove_end, + traverse_obj, ) @@ -56,7 +52,7 @@ class ExternalFD(FragmentFD): } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) + self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, @@ -157,7 +153,7 @@ class ExternalFD(FragmentFD): fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index) try: src, _ = self.sanitize_open(fragment_filename, 'rb') - except IOError as err: + except OSError as err: if skip_unavailable_fragments and frag_index > 1: self.report_skip_fragment(frag_index, err) continue @@ -179,7 +175,7 @@ class CurlFD(ExternalFD): cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') @@ -216,7 +212,7 @@ class AxelFD(ExternalFD): cmd = [self.exe, '-o', tmpfilename] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['-H', '%s: %s' % (key, val)] + cmd += ['-H', f'{key}: {val}'] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -229,7 +225,7 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: @@ -240,7 +236,7 @@ class WgetFD(ExternalFD): proxy = self.params.get('proxy') if proxy: for var in ('http_proxy', 'https_proxy'): - cmd += ['--execute', '%s=%s' % (var, proxy)] + cmd += ['--execute', f'{var}={proxy}'] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] @@ -271,7 +267,7 @@ class Aria2cFD(ExternalFD): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['--header', '%s: %s' % (key, val)] + cmd += ['--header', f'{key}: {val}'] cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') @@ -289,10 +285,10 @@ class Aria2cFD(ExternalFD): dn = os.path.dirname(tmpfilename) if dn: if not os.path.isabs(dn): - dn = '.%s%s' % (os.path.sep, dn) + dn = f'.{os.path.sep}{dn}' cmd += ['--dir', dn + os.path.sep] if 'fragments' not in info_dict: - cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))] + cmd += ['--out', f'.{os.path.sep}{os.path.basename(tmpfilename)}'] cmd += ['--auto-file-renaming=false'] if 'fragments' in info_dict: @@ -303,7 +299,7 @@ class Aria2cFD(ExternalFD): fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index) url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename)) stream, _ = self.sanitize_open(url_list_file, 'wb') - stream.write('\n'.join(url_list).encode('utf-8')) + stream.write('\n'.join(url_list).encode()) stream.close() cmd += ['-i', url_list_file] else: @@ -320,7 +316,7 @@ class HttpieFD(ExternalFD): if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): - cmd += ['%s:%s' % (key, val)] + cmd += [f'{key}:{val}'] return cmd @@ -368,9 +364,11 @@ class FFmpegFD(ExternalFD): if not self.params.get('verbose'): args += ['-hide_banner'] - args += info_dict.get('_ffmpeg_args', []) + args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[]) - # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead + # These exists only for compatibility. Extractors should use + # info_dict['downloader_options']['ffmpeg_args'] instead + args += info_dict.get('_ffmpeg_args') or [] seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server @@ -387,13 +385,15 @@ class FFmpegFD(ExternalFD): # if end_time: # args += ['-t', compat_str(end_time - start_time)] - if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]): - # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: - # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. - headers = handle_youtubedl_headers(info_dict['http_headers']) - args += [ + http_headers = None + if info_dict.get('http_headers'): + youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers']) + http_headers = [ + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. '-headers', - ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items()) + ] env = None proxy = self.params.get('proxy') @@ -446,6 +446,11 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_conn', conn] for i, url in enumerate(urls): + # We need to specify headers for each http input stream + # otherwise, it will only be applied to the first. + # https://github.com/yt-dlp/yt-dlp/issues/2696 + if http_headers is not None and re.match(r'^https?://', url): + args += http_headers args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url] args += ['-c', 'copy'] diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 0008b7c28..3629d63f5 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -1,5 +1,3 @@ -from __future__ import division, unicode_literals - import io import itertools import time @@ -8,16 +6,13 @@ from .fragment import FragmentFD from ..compat import ( compat_b64decode, compat_etree_fromstring, - compat_urlparse, - compat_urllib_error, - compat_urllib_parse_urlparse, compat_struct_pack, compat_struct_unpack, + compat_urllib_error, + compat_urllib_parse_urlparse, + compat_urlparse, ) -from ..utils import ( - fix_xml_ampersands, - xpath_text, -) +from ..utils import fix_xml_ampersands, xpath_text class DataTruncatedError(Exception): @@ -261,8 +256,6 @@ class F4mFD(FragmentFD): A downloader for f4m manifests or AdobeHDS. """ - FD_NAME = 'f4m' - def _get_unencrypted_media(self, doc): media = doc.findall(_add_ns('media')) if not media: @@ -417,7 +410,7 @@ class F4mFD(FragmentFD): if box_type == b'mdat': self._append_fragment(ctx, box_data) break - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue # with the next available fragment. diff --git a/yt_dlp/downloader/fc2.py b/yt_dlp/downloader/fc2.py index 157bcf23e..f9763debb 100644 --- a/yt_dlp/downloader/fc2.py +++ b/yt_dlp/downloader/fc2.py @@ -1,5 +1,3 @@ -from __future__ import division, unicode_literals - import threading from .common import FileDownloader @@ -20,6 +18,9 @@ class FC2LiveFD(FileDownloader): heartbeat_state = [None, 1] def heartbeat(): + if heartbeat_state[1] < 0: + return + try: heartbeat_state[1] += 1 ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) @@ -38,4 +39,8 @@ class FC2LiveFD(FileDownloader): 'ws': None, 'protocol': 'live_ffmpeg', }) - return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) + try: + return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) + finally: + # stop heartbeating + heartbeat_state[1] = -1 diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index c45a8a476..4655f067f 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,29 +1,19 @@ -from __future__ import division, unicode_literals - +import concurrent.futures +import contextlib import http.client import json import math import os import time -try: - import concurrent.futures - can_threaded_download = True -except ImportError: - can_threaded_download = False - from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 -from ..compat import ( - compat_os_name, - compat_urllib_error, - compat_struct_pack, -) +from ..compat import compat_os_name, compat_struct_pack, compat_urllib_error from ..utils import ( DownloadError, - error_to_compat_str, encodeFilename, + error_to_compat_str, sanitized_Request, traverse_obj, ) @@ -33,6 +23,8 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + console_title = to_screen + def report_retry(self, err, count, retries): super().to_screen( f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') @@ -131,7 +123,7 @@ class FragmentFD(FileDownloader): 'request_data': request_data, 'ctx_id': ctx.get('ctx_id'), } - success = ctx['dl'].download(fragment_filename, fragment_info_dict) + success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: return False if fragment_info_dict.get('filetime'): @@ -140,6 +132,8 @@ class FragmentFD(FileDownloader): return True def _read_fragment(self, ctx): + if not ctx.get('fragment_filename_sanitized'): + return None try: down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') except FileNotFoundError: @@ -172,8 +166,7 @@ class FragmentFD(FileDownloader): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') self.report_destination(ctx['filename']) dl = HttpQuietDownloader( self.ydl, @@ -184,7 +177,7 @@ class FragmentFD(FileDownloader): 'ratelimit': self.params.get('ratelimit'), 'retries': self.params.get('retries', 0), 'nopart': self.params.get('nopart', False), - 'test': self.params.get('test', False), + 'test': False, } ) tmpfilename = self.temp_name(ctx['filename']) @@ -315,10 +308,8 @@ class FragmentFD(FileDownloader): if self.params.get('updatetime', True): filetime = ctx.get('fragment_filetime') if filetime: - try: + with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) - except Exception: - pass downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) self._hook_progress({ @@ -342,8 +333,7 @@ class FragmentFD(FileDownloader): total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' - self.to_screen( - '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') tmpfilename = self.temp_name(ctx['filename']) @@ -508,8 +498,7 @@ class FragmentFD(FileDownloader): max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) - if can_threaded_download and max_workers > 1: - + if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() download_fragment(fragment, ctx_copy) @@ -527,8 +516,14 @@ class FragmentFD(FileDownloader): for fragment in fragments: if not interrupt_trigger[0]: break - download_fragment(fragment, ctx) - result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + try: + download_fragment(fragment, ctx) + result = append_fragment( + decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) + except KeyboardInterrupt: + if info_dict.get('is_live'): + break + raise if not result: return False diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index f3f32b514..0bd2f121c 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -1,23 +1,14 @@ -from __future__ import unicode_literals - -import re -import io import binascii +import io +import re -from ..downloader import get_suitable_downloader -from .fragment import FragmentFD from .external import FFmpegFD - -from ..compat import ( - compat_pycrypto_AES, - compat_urlparse, -) -from ..utils import ( - parse_m3u8_attributes, - update_url_query, - bug_reports_message, -) +from .fragment import FragmentFD from .. import webvtt +from ..compat import compat_urlparse +from ..dependencies import Cryptodome_AES +from ..downloader import get_suitable_downloader +from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query class HlsFD(FragmentFD): @@ -70,7 +61,7 @@ class HlsFD(FragmentFD): s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None - if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: + if can_download and not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s: if FFmpegFD.available(): can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available' else: @@ -102,8 +93,7 @@ class HlsFD(FragmentFD): if real_downloader and not real_downloader.supports_manifest(s): real_downloader = None if real_downloader: - self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s @@ -201,6 +191,14 @@ class HlsFD(FragmentFD): if extra_query: frag_url = update_url_query(frag_url, extra_query) + if map_info.get('BYTERANGE'): + splitted_byte_range = map_info.get('BYTERANGE').split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + fragments.append({ 'frag_index': frag_index, 'url': frag_url, @@ -210,14 +208,6 @@ class HlsFD(FragmentFD): }) media_sequence += 1 - if map_info.get('BYTERANGE'): - splitted_byte_range = map_info.get('BYTERANGE').split('@') - sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] - byte_range = { - 'start': sub_range_start, - 'end': sub_range_start + int(splitted_byte_range[0]), - } - elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) @@ -339,7 +329,7 @@ class HlsFD(FragmentFD): continue block.write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() def fin_fragments(): dedup_window = extra_state.get('webvtt_dedup_window') @@ -350,7 +340,7 @@ class HlsFD(FragmentFD): for cue in dedup_window: webvtt.CueBlock.from_json(cue).write_into(output) - return output.getvalue().encode('utf-8') + return output.getvalue().encode() self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index a232168fa..12a2f0cc7 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -1,29 +1,31 @@ -from __future__ import unicode_literals - import os +import random +import socket import ssl import time -import random from .common import FileDownloader -from ..compat import ( - compat_urllib_error, - compat_http_client -) +from ..compat import compat_http_client, compat_urllib_error from ..utils import ( ContentTooShortError, + ThrottledDownload, + XAttrMetadataError, + XAttrUnavailableError, encodeFilename, int_or_none, parse_http_range, sanitized_Request, - ThrottledDownload, try_call, write_xattr, - XAttrMetadataError, - XAttrUnavailableError, ) -RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException) +RESPONSE_READ_EXCEPTIONS = ( + TimeoutError, + socket.timeout, # compat: py < 3.10 + ConnectionError, + ssl.SSLError, + compat_http_client.HTTPException +) class HttpFD(FileDownloader): @@ -155,7 +157,7 @@ class HttpFD(FileDownloader): ctx.resume_len = 0 ctx.open_mode = 'wb' ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None)) - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if err.code == 416: # Unable to resume (requested range not satisfiable) try: @@ -163,7 +165,7 @@ class HttpFD(FileDownloader): ctx.data = self.ydl.urlopen( sanitized_Request(url, request_data, headers)) content_length = ctx.data.info()['Content-Length'] - except (compat_urllib_error.HTTPError, ) as err: + except compat_urllib_error.HTTPError as err: if err.code < 500 or err.code >= 600: raise else: @@ -221,10 +223,12 @@ class HttpFD(FileDownloader): min_data_len = self.params.get('min_filesize') max_data_len = self.params.get('max_filesize') if min_data_len is not None and data_len < min_data_len: - self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + self.to_screen( + f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.') return False if max_data_len is not None and data_len > max_data_len: - self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + self.to_screen( + f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.') return False byte_counter = 0 + ctx.resume_len @@ -265,19 +269,19 @@ class HttpFD(FileDownloader): assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) - except (OSError, IOError) as err: + except OSError as err: self.report_error('unable to open for writing: %s' % str(err)) return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: - write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error('unable to set filesize xattr: %s' % str(err)) try: ctx.stream.write(data_block) - except (IOError, OSError) as err: + except OSError as err: self.to_stderr('\n') self.report_error('unable to write data: %s' % str(err)) return False diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py index 4d5618c83..9efc5e4d9 100644 --- a/yt_dlp/downloader/ism.py +++ b/yt_dlp/downloader/ism.py @@ -1,27 +1,22 @@ -from __future__ import unicode_literals - -import time import binascii import io +import struct +import time from .fragment import FragmentFD -from ..compat import ( - compat_Struct, - compat_urllib_error, -) +from ..compat import compat_urllib_error +u8 = struct.Struct('>B') +u88 = struct.Struct('>Bx') +u16 = struct.Struct('>H') +u1616 = struct.Struct('>Hxx') +u32 = struct.Struct('>I') +u64 = struct.Struct('>Q') -u8 = compat_Struct('>B') -u88 = compat_Struct('>Bx') -u16 = compat_Struct('>H') -u1616 = compat_Struct('>Hxx') -u32 = compat_Struct('>I') -u64 = compat_Struct('>Q') - -s88 = compat_Struct('>bx') -s16 = compat_Struct('>h') -s1616 = compat_Struct('>hxx') -s32 = compat_Struct('>i') +s88 = struct.Struct('>bx') +s16 = struct.Struct('>h') +s1616 = struct.Struct('>hxx') +s32 = struct.Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) @@ -156,7 +151,7 @@ def write_piff_header(stream, params): sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined - codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version @@ -235,8 +230,6 @@ class IsmFD(FragmentFD): Download segments in a ISM manifest """ - FD_NAME = 'ism' - def real_download(self, filename, info_dict): segments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index 54e711792..ce2d39947 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -1,24 +1,14 @@ -# coding: utf-8 -from __future__ import unicode_literals - import io import quopri import re import uuid from .fragment import FragmentFD -from ..utils import ( - escapeHTML, - formatSeconds, - srt_subtitles_timecode, - urljoin, -) +from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): - FD_NAME = 'mhtml' - _STYLESHEET = """\ html, body { margin: 0; @@ -62,7 +52,7 @@ body > figure > img { def _escape_mime(s): return '=?utf-8?Q?' + (b''.join( bytes((b,)) if b >= 0x20 else b'=%02X' % b - for b in quopri.encodestring(s.encode('utf-8'), header=True) + for b in quopri.encodestring(s.encode(), header=True) )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): @@ -159,7 +149,7 @@ body > figure > img { length=len(stub), title=self._escape_mime(title), stub=stub - ).encode('utf-8')) + ).encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): @@ -181,7 +171,7 @@ body > figure > img { mime_type = b'image/png' if frag_content.startswith((b'GIF87a', b'GIF89a')): mime_type = b'image/gif' - if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP': + if frag_content.startswith(b'RIFF') and frag_content[8:12] == b'WEBP': mime_type = b'image/webp' frag_header = io.BytesIO() diff --git a/yt_dlp/downloader/niconico.py b/yt_dlp/downloader/niconico.py index 521dfece3..5e9dda03d 100644 --- a/yt_dlp/downloader/niconico.py +++ b/yt_dlp/downloader/niconico.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import threading from .common import FileDownloader @@ -12,8 +9,6 @@ from ..utils import sanitized_Request class NiconicoDmcFD(FileDownloader): """ Downloading niconico douga from DMC with heartbeat """ - FD_NAME = 'niconico_dmc' - def real_download(self, filename, info_dict): self.to_screen('[%s] Downloading from DMC' % self.FD_NAME) @@ -54,4 +49,4 @@ class NiconicoDmcFD(FileDownloader): with heartbeat_lock: timer[0].cancel() download_complete = True - return success + return success diff --git a/yt_dlp/downloader/rtmp.py b/yt_dlp/downloader/rtmp.py index 90f1acfd4..3464eeef9 100644 --- a/yt_dlp/downloader/rtmp.py +++ b/yt_dlp/downloader/rtmp.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import os import re import subprocess @@ -8,11 +6,11 @@ import time from .common import FileDownloader from ..compat import compat_str from ..utils import ( + Popen, check_executable, - encodeFilename, encodeArgument, + encodeFilename, get_exe_version, - Popen, ) diff --git a/yt_dlp/downloader/rtsp.py b/yt_dlp/downloader/rtsp.py index 7815d59d9..e89269fed 100644 --- a/yt_dlp/downloader/rtsp.py +++ b/yt_dlp/downloader/rtsp.py @@ -1,13 +1,8 @@ -from __future__ import unicode_literals - import os import subprocess from .common import FileDownloader -from ..utils import ( - check_executable, - encodeFilename, -) +from ..utils import check_executable, encodeFilename class RtspFD(FileDownloader): @@ -32,7 +27,7 @@ class RtspFD(FileDownloader): retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) - self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) + self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, diff --git a/yt_dlp/downloader/websocket.py b/yt_dlp/downloader/websocket.py index daac34884..727a15828 100644 --- a/yt_dlp/downloader/websocket.py +++ b/yt_dlp/downloader/websocket.py @@ -1,19 +1,12 @@ +import contextlib import os import signal -import asyncio import threading -try: - import websockets -except (ImportError, SyntaxError): - # websockets 3.10 on python 3.6 causes SyntaxError - # See https://github.com/yt-dlp/yt-dlp/issues/2633 - has_websockets = False -else: - has_websockets = True - from .common import FileDownloader from .external import FFmpegFD +from ..compat import asyncio +from ..dependencies import websockets class FFmpegSinkFD(FileDownloader): @@ -26,14 +19,12 @@ class FFmpegSinkFD(FileDownloader): async def call_conn(proc, stdin): try: await self.real_connection(stdin, info_dict) - except (BrokenPipeError, OSError): + except OSError: pass finally: - try: + with contextlib.suppress(OSError): stdin.flush() stdin.close() - except OSError: - pass os.kill(os.getpid(), signal.SIGINT) class FFmpegStdinFD(FFmpegFD): diff --git a/yt_dlp/downloader/youtube_live_chat.py b/yt_dlp/downloader/youtube_live_chat.py index cfca686ee..cc528029d 100644 --- a/yt_dlp/downloader/youtube_live_chat.py +++ b/yt_dlp/downloader/youtube_live_chat.py @@ -1,24 +1,15 @@ -from __future__ import division, unicode_literals - import json import time from .fragment import FragmentFD from ..compat import compat_urllib_error -from ..utils import ( - try_get, - dict_get, - int_or_none, - RegexNotFoundError, -) from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE +from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get class YoutubeLiveChatFD(FragmentFD): """ Downloads YouTube live chats fragment by fragment """ - FD_NAME = 'youtube_live_chat' - def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen('[%s] Downloading live chat' % self.FD_NAME) @@ -54,7 +45,7 @@ class YoutubeLiveChatFD(FragmentFD): replay_chat_item_action = action['replayChatItemAction'] offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( - json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(action, ensure_ascii=False).encode() + b'\n') if offset is not None: continuation = try_get( live_chat_continuation, @@ -96,7 +87,7 @@ class YoutubeLiveChatFD(FragmentFD): 'isLive': True, } processed_fragment.extend( - json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n') + json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') continuation_data_getters = [ lambda x: x['continuations'][0]['invalidationContinuationData'], lambda x: x['continuations'][0]['timedContinuationData'], @@ -190,7 +181,7 @@ class YoutubeLiveChatFD(FragmentFD): request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) - fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n' + fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( url, frag_index, fragment_request_data, headers) else: diff --git a/yt_dlp/extractor/__init__.py b/yt_dlp/extractor/__init__.py index b35484246..afd3d05ac 100644 --- a/yt_dlp/extractor/__init__.py +++ b/yt_dlp/extractor/__init__.py @@ -1,24 +1,23 @@ +import contextlib import os from ..utils import load_plugins _LAZY_LOADER = False if not os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): - try: - from .lazy_extractors import * + with contextlib.suppress(ImportError): + from .lazy_extractors import * # noqa: F403 from .lazy_extractors import _ALL_CLASSES _LAZY_LOADER = True - except ImportError: - pass if not _LAZY_LOADER: - from .extractors import * - _ALL_CLASSES = [ + from .extractors import * # noqa: F403 + _ALL_CLASSES = [ # noqa: F811 klass for name, klass in globals().items() if name.endswith('IE') and name != 'GenericIE' ] - _ALL_CLASSES.append(GenericIE) + _ALL_CLASSES.append(GenericIE) # noqa: F405 _PLUGIN_CLASSES = load_plugins('extractor', 'IE', globals()) _ALL_CLASSES = list(_PLUGIN_CLASSES.values()) + _ALL_CLASSES @@ -38,15 +37,17 @@ def gen_extractors(): return [klass() for klass in gen_extractor_classes()] -def list_extractors(age_limit): - """ - Return a list of extractors that are suitable for the given age, - sorted by extractor ID. - """ +def list_extractor_classes(age_limit=None): + """Return a list of extractors that are suitable for the given age, sorted by extractor name""" + yield from sorted(filter( + lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, # noqa: F405 + gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) + yield GenericIE # noqa: F405 + - return sorted( - filter(lambda ie: ie.is_suitable(age_limit), gen_extractors()), - key=lambda ie: ie.IE_NAME.lower()) +def list_extractors(age_limit=None): + """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" + return [ie() for ie in list_extractor_classes(age_limit)] def get_info_extractor(ie_name): diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 6fe195e82..03f10ab23 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import hashlib import hmac import re diff --git a/yt_dlp/extractor/abcnews.py b/yt_dlp/extractor/abcnews.py index 296b8cec1..a57295b13 100644 --- a/yt_dlp/extractor/abcnews.py +++ b/yt_dlp/extractor/abcnews.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .amp import AMPIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/abcotvs.py b/yt_dlp/extractor/abcotvs.py index 5bff46634..44a9f8ca5 100644 --- a/yt_dlp/extractor/abcotvs.py +++ b/yt_dlp/extractor/abcotvs.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/abematv.py b/yt_dlp/extractor/abematv.py index a839f0c1f..1b9deeae8 100644 --- a/yt_dlp/extractor/abematv.py +++ b/yt_dlp/extractor/abematv.py @@ -1,37 +1,34 @@ -import io -import json -import time +import base64 +import binascii import hashlib import hmac +import io +import json import re import struct -from base64 import urlsafe_b64encode -from binascii import unhexlify +import time +import urllib.response +import uuid from .common import InfoExtractor from ..aes import aes_ecb_decrypt -from ..compat import ( - compat_urllib_response, - compat_urllib_parse_urlparse, - compat_urllib_request, -) +from ..compat import compat_urllib_parse_urlparse, compat_urllib_request from ..utils import ( ExtractorError, + bytes_to_intlist, decode_base, int_or_none, - random_uuidv4, + intlist_to_bytes, request_to_url, time_seconds, - update_url_query, traverse_obj, - intlist_to_bytes, - bytes_to_intlist, + update_url_query, urljoin, ) - # NOTE: network handler related code is temporary thing until network stack overhaul PRs are merged (#2861/#2862) + def add_opener(ydl, handler): ''' Add a handler for opening URLs, like _download_webpage ''' # https://github.com/python/cpython/blob/main/Lib/urllib/request.py#L426 @@ -130,7 +127,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): encvideokey = bytes_to_intlist(struct.pack('>QQ', res >> 64, res & 0xffffffffffffffff)) h = hmac.new( - unhexlify(self.HKEY), + binascii.unhexlify(self.HKEY), (license_response['cid'] + self.ie._DEVICE_ID).encode('utf-8'), digestmod=hashlib.sha256) enckey = bytes_to_intlist(h.digest()) @@ -141,7 +138,7 @@ class AbemaLicenseHandler(compat_urllib_request.BaseHandler): url = request_to_url(url) ticket = compat_urllib_parse_urlparse(url).netloc response_data = self._get_videokey_from_ticket(ticket) - return compat_urllib_response.addinfourl(io.BytesIO(response_data), headers={ + return urllib.response.addinfourl(io.BytesIO(response_data), headers={ 'Content-Length': len(response_data), }, url=url, code=200) @@ -238,7 +235,7 @@ class AbemaTVIE(AbemaTVBaseIE): def mix_twist(nonce): nonlocal tmp - mix_once(urlsafe_b64encode(tmp).rstrip(b'=') + nonce) + mix_once(base64.urlsafe_b64encode(tmp).rstrip(b'=') + nonce) mix_once(self._SECRETKEY) mix_tmp(time_struct.tm_mon) @@ -247,13 +244,13 @@ class AbemaTVIE(AbemaTVBaseIE): mix_twist(ts_1hour_str) mix_tmp(time_struct.tm_hour % 5) - return urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') + return base64.urlsafe_b64encode(tmp).rstrip(b'=').decode('utf-8') def _get_device_token(self): if self._USERTOKEN: return self._USERTOKEN - self._DEVICE_ID = random_uuidv4() + self._DEVICE_ID = str(uuid.uuid4()) aks = self._generate_aks(self._DEVICE_ID) user_data = self._download_json( 'https://api.abema.io/v1/users', None, note='Authorizing', diff --git a/yt_dlp/extractor/academicearth.py b/yt_dlp/extractor/academicearth.py index 34095501c..d9691cb5c 100644 --- a/yt_dlp/extractor/academicearth.py +++ b/yt_dlp/extractor/academicearth.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/acast.py b/yt_dlp/extractor/acast.py index 63587c5cf..f2f828f8e 100644 --- a/yt_dlp/extractor/acast.py +++ b/yt_dlp/extractor/acast.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/adn.py b/yt_dlp/extractor/adn.py index fca6e605d..18ddc5729 100644 --- a/yt_dlp/extractor/adn.py +++ b/yt_dlp/extractor/adn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import binascii import json @@ -85,7 +82,7 @@ class ADNIE(InfoExtractor): # http://animedigitalnetwork.fr/components/com_vodvideo/videojs/adn-vjs.min.js dec_subtitles = unpad_pkcs7(aes_cbc_decrypt_bytes( compat_b64decode(enc_subtitles[24:]), - binascii.unhexlify(self._K + 'ab9f52f5baae7c72'), + binascii.unhexlify(self._K + '7fac1178830cfe0c'), compat_b64decode(enc_subtitles[:24]))) subtitles_json = self._parse_json(dec_subtitles.decode(), None, fatal=False) if not subtitles_json: diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py index e2e6f93f3..8963b128a 100644 --- a/yt_dlp/extractor/adobeconnect.py +++ b/yt_dlp/extractor/adobeconnect.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py index 5d98301b8..a8e6c4363 100644 --- a/yt_dlp/extractor/adobepass.py +++ b/yt_dlp/extractor/adobepass.py @@ -1,14 +1,11 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re import time +import urllib.error import xml.etree.ElementTree as etree from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_urlparse, compat_getpass ) @@ -1365,7 +1362,7 @@ class AdobePassIE(InfoExtractor): headers.update(kwargs.get('headers', {})) kwargs['headers'] = headers return super(AdobePassIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) @staticmethod def _get_mvpd_resource(provider_id, title, guid, rating): @@ -1439,27 +1436,29 @@ class AdobePassIE(InfoExtractor): if authn_token and is_expired(authn_token, 'simpleTokenExpires'): authn_token = None if not authn_token: - # TODO add support for other TV Providers mso_id = self.get_param('ap_mso') - if not mso_id: - raise_mvpd_required() - username, password = self._get_login_info('ap_username', 'ap_password', mso_id) - if not username or not password: - raise_mvpd_required() - mso_info = MSO_INFO[mso_id] + if mso_id: + username, password = self._get_login_info('ap_username', 'ap_password', mso_id) + if not username or not password: + raise_mvpd_required() + mso_info = MSO_INFO[mso_id] - provider_redirect_page_res = self._download_webpage_handle( - self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, - 'Downloading Provider Redirect Page', query={ - 'noflash': 'true', - 'mso_id': mso_id, - 'requestor_id': requestor_id, - 'no_iframe': 'false', - 'domain_name': 'adobe.com', - 'redirect_url': url, - }) + provider_redirect_page_res = self._download_webpage_handle( + self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, + 'Downloading Provider Redirect Page', query={ + 'noflash': 'true', + 'mso_id': mso_id, + 'requestor_id': requestor_id, + 'no_iframe': 'false', + 'domain_name': 'adobe.com', + 'redirect_url': url, + }) + elif not self._cookies_passed: + raise_mvpd_required() - if mso_id == 'Comcast_SSO': + if not mso_id: + pass + elif mso_id == 'Comcast_SSO': # Comcast page flow varies by video site and whether you # are on Comcast's network. provider_redirect_page, urlh = provider_redirect_page_res @@ -1715,12 +1714,17 @@ class AdobePassIE(InfoExtractor): if mso_id != 'Rogers': post_form(mvpd_confirm_page_res, 'Confirming Login') - session = self._download_webpage( - self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, - 'Retrieving Session', data=urlencode_postdata({ - '_method': 'GET', - 'requestor_id': requestor_id, - }), headers=mvpd_headers) + try: + session = self._download_webpage( + self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, + 'Retrieving Session', data=urlencode_postdata({ + '_method': 'GET', + 'requestor_id': requestor_id, + }), headers=mvpd_headers) + except ExtractorError as e: + if not mso_id and isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 401: + raise_mvpd_required() + raise if '<pendingLogout' in session: self._downloader.cache.store(self._MVPD_CACHE, requestor_id, {}) count += 1 diff --git a/yt_dlp/extractor/adobetv.py b/yt_dlp/extractor/adobetv.py index 3cfa1ff55..941254243 100644 --- a/yt_dlp/extractor/adobetv.py +++ b/yt_dlp/extractor/adobetv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/adultswim.py b/yt_dlp/extractor/adultswim.py index c97cfc161..1368954bc 100644 --- a/yt_dlp/extractor/adultswim.py +++ b/yt_dlp/extractor/adultswim.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/aenetworks.py b/yt_dlp/extractor/aenetworks.py index 8025de5a3..86a10f2dc 100644 --- a/yt_dlp/extractor/aenetworks.py +++ b/yt_dlp/extractor/aenetworks.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .theplatform import ThePlatformIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 28946e9dd..b0fd158f6 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,11 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re from .common import InfoExtractor -from ..compat import compat_xpath from ..utils import ( ExtractorError, OnDemandPagedList, @@ -282,7 +278,7 @@ class AfreecaTVIE(InfoExtractor): else: raise ExtractorError('Unable to download video info') - video_element = video_xml.findall(compat_xpath('./track/video'))[-1] + video_element = video_xml.findall('./track/video')[-1] if video_element is None or video_element.text is None: raise ExtractorError( 'Video %s does not exist' % video_id, expected=True) @@ -312,7 +308,7 @@ class AfreecaTVIE(InfoExtractor): if not video_url: entries = [] - file_elements = video_element.findall(compat_xpath('./file')) + file_elements = video_element.findall('./file') one = len(file_elements) == 1 for file_num, file_element in enumerate(file_elements, start=1): file_url = url_or_none(file_element.text) diff --git a/yt_dlp/extractor/airmozilla.py b/yt_dlp/extractor/airmozilla.py index 9e38136b4..669556b98 100644 --- a/yt_dlp/extractor/airmozilla.py +++ b/yt_dlp/extractor/airmozilla.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/aliexpress.py b/yt_dlp/extractor/aliexpress.py index 9722fe9ac..2e83f2eb6 100644 --- a/yt_dlp/extractor/aliexpress.py +++ b/yt_dlp/extractor/aliexpress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index 7bcdb7afb..124bab0d9 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/allocine.py b/yt_dlp/extractor/allocine.py index 403a277e9..1f881e2a0 100644 --- a/yt_dlp/extractor/allocine.py +++ b/yt_dlp/extractor/allocine.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/alphaporno.py b/yt_dlp/extractor/alphaporno.py index 3a6d99f6b..8d5b472d3 100644 --- a/yt_dlp/extractor/alphaporno.py +++ b/yt_dlp/extractor/alphaporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/alsace20tv.py b/yt_dlp/extractor/alsace20tv.py index 4aae6fe74..d16ab496e 100644 --- a/yt_dlp/extractor/alsace20tv.py +++ b/yt_dlp/extractor/alsace20tv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/alura.py b/yt_dlp/extractor/alura.py index d2e2df270..b76ccb2a1 100644 --- a/yt_dlp/extractor/alura.py +++ b/yt_dlp/extractor/alura.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/amara.py b/yt_dlp/extractor/amara.py index 61d469574..5018710e0 100644 --- a/yt_dlp/extractor/amara.py +++ b/yt_dlp/extractor/amara.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE from .vimeo import VimeoIE diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 07b1b1861..de4917adc 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index e38e215d3..e04ecf65f 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .theplatform import ThePlatformIE diff --git a/yt_dlp/extractor/americastestkitchen.py b/yt_dlp/extractor/americastestkitchen.py index 6e6099a03..f5747cf1e 100644 --- a/yt_dlp/extractor/americastestkitchen.py +++ b/yt_dlp/extractor/americastestkitchen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/amp.py b/yt_dlp/extractor/amp.py index 24c684cad..73b72b085 100644 --- a/yt_dlp/extractor/amp.py +++ b/yt_dlp/extractor/amp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/animelab.py b/yt_dlp/extractor/animelab.py index 1c2cc47dd..fe2b70aed 100644 --- a/yt_dlp/extractor/animelab.py +++ b/yt_dlp/extractor/animelab.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( @@ -56,11 +53,6 @@ class AnimeLabBaseIE(InfoExtractor): class AnimeLabIE(AnimeLabBaseIE): _VALID_URL = r'https?://(?:www\.)?animelab\.com/player/(?P<id>[^/]+)' - # the following tests require authentication, but a free account will suffice - # just set 'usenetrc' to true in test/local_parameters.json if you use a .netrc file - # or you can set 'username' and 'password' there - # the tests also select a specific format so that the same video is downloaded - # regardless of whether the user is premium or not (needs testing on a premium account) _TEST = { 'url': 'https://www.animelab.com/player/fullmetal-alchemist-brotherhood-episode-42', 'md5': '05bde4b91a5d1ff46ef5b94df05b0f7f', @@ -79,9 +71,9 @@ class AnimeLabIE(AnimeLabBaseIE): 'season_id': '38', }, 'params': { + # Ensure the same video is downloaded whether the user is premium or not 'format': '[format_id=21711_yeshardsubbed_ja-JP][height=480]', }, - 'skip': 'All AnimeLab content requires authentication', } def _real_extract(self, url): diff --git a/yt_dlp/extractor/animeondemand.py b/yt_dlp/extractor/animeondemand.py index 2e674d58f..de49db4ea 100644 --- a/yt_dlp/extractor/animeondemand.py +++ b/yt_dlp/extractor/animeondemand.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ant1newsgr.py b/yt_dlp/extractor/ant1newsgr.py index 1075b461e..cd0f36856 100644 --- a/yt_dlp/extractor/ant1newsgr.py +++ b/yt_dlp/extractor/ant1newsgr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import urllib.parse diff --git a/yt_dlp/extractor/anvato.py b/yt_dlp/extractor/anvato.py index 0d444fc33..09dfffdb0 100644 --- a/yt_dlp/extractor/anvato.py +++ b/yt_dlp/extractor/anvato.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib import json @@ -8,6 +5,7 @@ import random import re import time +from .anvato_token_generator import NFLTokenGenerator from .common import InfoExtractor from ..aes import aes_encrypt from ..compat import compat_str @@ -22,16 +20,6 @@ from ..utils import ( unsmuggle_url, ) -# This import causes a ModuleNotFoundError on some systems for unknown reason. -# See issues: -# https://github.com/yt-dlp/yt-dlp/issues/35 -# https://github.com/ytdl-org/youtube-dl/issues/27449 -# https://github.com/animelover1984/youtube-dl/issues/17 -try: - from .anvato_token_generator import NFLTokenGenerator -except ImportError: - NFLTokenGenerator = None - def md5_text(s): if not isinstance(s, compat_str): diff --git a/yt_dlp/extractor/anvato_token_generator/__init__.py b/yt_dlp/extractor/anvato_token_generator/__init__.py index 6e223db9f..6530caf53 100644 --- a/yt_dlp/extractor/anvato_token_generator/__init__.py +++ b/yt_dlp/extractor/anvato_token_generator/__init__.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .nfl import NFLTokenGenerator __all__ = [ diff --git a/yt_dlp/extractor/anvato_token_generator/common.py b/yt_dlp/extractor/anvato_token_generator/common.py index b959a903b..3800b5808 100644 --- a/yt_dlp/extractor/anvato_token_generator/common.py +++ b/yt_dlp/extractor/anvato_token_generator/common.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - class TokenGenerator: def generate(self, anvack, mcp_id): raise NotImplementedError('This method must be implemented by subclasses') diff --git a/yt_dlp/extractor/anvato_token_generator/nfl.py b/yt_dlp/extractor/anvato_token_generator/nfl.py index 97a2b245f..9ee4aa002 100644 --- a/yt_dlp/extractor/anvato_token_generator/nfl.py +++ b/yt_dlp/extractor/anvato_token_generator/nfl.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import TokenGenerator diff --git a/yt_dlp/extractor/aol.py b/yt_dlp/extractor/aol.py index 4766a2c77..b67db2adc 100644 --- a/yt_dlp/extractor/aol.py +++ b/yt_dlp/extractor/aol.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .yahoo import YahooIE diff --git a/yt_dlp/extractor/apa.py b/yt_dlp/extractor/apa.py index 1736cdf56..847be6edf 100644 --- a/yt_dlp/extractor/apa.py +++ b/yt_dlp/extractor/apa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/aparat.py b/yt_dlp/extractor/aparat.py index 1057233cf..cd6cd1c79 100644 --- a/yt_dlp/extractor/aparat.py +++ b/yt_dlp/extractor/aparat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( get_element_by_id, diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py index 494f8330c..d00b0f906 100644 --- a/yt_dlp/extractor/appleconnect.py +++ b/yt_dlp/extractor/appleconnect.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/applepodcasts.py b/yt_dlp/extractor/applepodcasts.py index 9139ff777..49bbeab82 100644 --- a/yt_dlp/extractor/applepodcasts.py +++ b/yt_dlp/extractor/applepodcasts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/appletrailers.py b/yt_dlp/extractor/appletrailers.py index 8140e332b..6b63f070d 100644 --- a/yt_dlp/extractor/appletrailers.py +++ b/yt_dlp/extractor/appletrailers.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py index 2ab3c1beb..c85d5297d 100644 --- a/yt_dlp/extractor/archiveorg.py +++ b/yt_dlp/extractor/archiveorg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json from .common import InfoExtractor @@ -479,7 +476,7 @@ class YoutubeWebArchiveIE(InfoExtractor): def _extract_yt_initial_variable(self, webpage, regex, video_id, name): return self._parse_json(self._search_regex( - (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_webpage_title(self, webpage): @@ -597,7 +594,7 @@ class YoutubeWebArchiveIE(InfoExtractor): response = self._call_cdx_api( video_id, f'https://www.youtube.com/watch?v={video_id}', filters=['mimetype:text/html'], collapse=['timestamp:6', 'digest'], query={'matchType': 'prefix'}) or [] - all_captures = sorted([int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None]) + all_captures = sorted(int_or_none(r['timestamp']) for r in response if int_or_none(r['timestamp']) is not None) # Prefer the new polymer UI captures as we support extracting more metadata from them # WBM captures seem to all switch to this layout ~July 2020 diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 8880e5c95..2e3f3cc5f 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 7ea339b39..f294679ef 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/arkena.py b/yt_dlp/extractor/arkena.py index 4f4f457c1..9da2bfd5e 100644 --- a/yt_dlp/extractor/arkena.py +++ b/yt_dlp/extractor/arkena.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 050c252e3..96b134fa0 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index c2f2c1bd3..443b0d4b9 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/asiancrush.py b/yt_dlp/extractor/asiancrush.py index 7f1940fca..23f310edb 100644 --- a/yt_dlp/extractor/asiancrush.py +++ b/yt_dlp/extractor/asiancrush.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/atresplayer.py b/yt_dlp/extractor/atresplayer.py index 465af4ed3..39d1f1cc5 100644 --- a/yt_dlp/extractor/atresplayer.py +++ b/yt_dlp/extractor/atresplayer.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/atttechchannel.py b/yt_dlp/extractor/atttechchannel.py index 8f93fb353..6ff4ec0ad 100644 --- a/yt_dlp/extractor/atttechchannel.py +++ b/yt_dlp/extractor/atttechchannel.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/atvat.py b/yt_dlp/extractor/atvat.py index 481a09737..2311837e9 100644 --- a/yt_dlp/extractor/atvat.py +++ b/yt_dlp/extractor/atvat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime from .common import InfoExtractor diff --git a/yt_dlp/extractor/audimedia.py b/yt_dlp/extractor/audimedia.py index 6bd48ef15..c1c4f67d0 100644 --- a/yt_dlp/extractor/audimedia.py +++ b/yt_dlp/extractor/audimedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/audioboom.py b/yt_dlp/extractor/audioboom.py index c51837b40..dc19a3874 100644 --- a/yt_dlp/extractor/audioboom.py +++ b/yt_dlp/extractor/audioboom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index 19775cf0f..5c4160fe4 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import time diff --git a/yt_dlp/extractor/audius.py b/yt_dlp/extractor/audius.py index fa64995d5..189d1224f 100644 --- a/yt_dlp/extractor/audius.py +++ b/yt_dlp/extractor/audius.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index f5e559c9f..d289f6be3 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 from .common import InfoExtractor diff --git a/yt_dlp/extractor/aws.py b/yt_dlp/extractor/aws.py index dccfeaf73..c2b22922b 100644 --- a/yt_dlp/extractor/aws.py +++ b/yt_dlp/extractor/aws.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import hashlib import hmac diff --git a/yt_dlp/extractor/azmedien.py b/yt_dlp/extractor/azmedien.py index 0168340b9..d1686eed6 100644 --- a/yt_dlp/extractor/azmedien.py +++ b/yt_dlp/extractor/azmedien.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/baidu.py b/yt_dlp/extractor/baidu.py index 364fd9459..8786d67e0 100644 --- a/yt_dlp/extractor/baidu.py +++ b/yt_dlp/extractor/baidu.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import unescapeHTML diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index 3d4d36ec3..92f567c5d 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import math from .common import InfoExtractor diff --git a/yt_dlp/extractor/bandaichannel.py b/yt_dlp/extractor/bandaichannel.py index f1bcdef7a..2e3233376 100644 --- a/yt_dlp/extractor/bandaichannel.py +++ b/yt_dlp/extractor/bandaichannel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from ..utils import extract_attributes diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index 745055e2d..6f806d84e 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import time @@ -439,7 +436,7 @@ class BandcampUserIE(InfoExtractor): uploader = self._match_id(url) webpage = self._download_webpage(url, uploader) - discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage) + discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\'](?![^"\'/]*?/merch)([^"\']+)', webpage) or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) return self.playlist_from_matches( diff --git a/yt_dlp/extractor/bannedvideo.py b/yt_dlp/extractor/bannedvideo.py index 3db1151f6..ec9bdd8ca 100644 --- a/yt_dlp/extractor/bannedvideo.py +++ b/yt_dlp/extractor/bannedvideo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 29ad7ded7..9cb019a49 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1,6 +1,4 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import xml.etree.ElementTree import functools import itertools import json @@ -8,7 +6,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_etree_Element, compat_HTTPError, compat_str, compat_urllib_error, @@ -318,7 +315,7 @@ class BBCCoUkIE(InfoExtractor): continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, compat_etree_Element): + if not isinstance(captions, xml.etree.ElementTree.Element): continue subtitles['en'] = [ { diff --git a/yt_dlp/extractor/beatport.py b/yt_dlp/extractor/beatport.py index e1cf8b4fe..f71f1f308 100644 --- a/yt_dlp/extractor/beatport.py +++ b/yt_dlp/extractor/beatport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/beeg.py b/yt_dlp/extractor/beeg.py index 717fff3a6..5957e370a 100644 --- a/yt_dlp/extractor/beeg.py +++ b/yt_dlp/extractor/beeg.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/behindkink.py b/yt_dlp/extractor/behindkink.py index 2c97f9817..ca4498150 100644 --- a/yt_dlp/extractor/behindkink.py +++ b/yt_dlp/extractor/behindkink.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import url_basename diff --git a/yt_dlp/extractor/bellmedia.py b/yt_dlp/extractor/bellmedia.py index 904c17ed0..8f9849d9b 100644 --- a/yt_dlp/extractor/bellmedia.py +++ b/yt_dlp/extractor/bellmedia.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/bet.py b/yt_dlp/extractor/bet.py index 2c7144235..6b867d135 100644 --- a/yt_dlp/extractor/bet.py +++ b/yt_dlp/extractor/bet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/bfi.py b/yt_dlp/extractor/bfi.py index 60c8944b5..76f0516a4 100644 --- a/yt_dlp/extractor/bfi.py +++ b/yt_dlp/extractor/bfi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bfmtv.py b/yt_dlp/extractor/bfmtv.py index 501f69d80..48526e38b 100644 --- a/yt_dlp/extractor/bfmtv.py +++ b/yt_dlp/extractor/bfmtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py index 56c2bfee8..fd20aadad 100644 --- a/yt_dlp/extractor/bibeltv.py +++ b/yt_dlp/extractor/bibeltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/bigflix.py b/yt_dlp/extractor/bigflix.py index 28e3e59f6..6b2797ca0 100644 --- a/yt_dlp/extractor/bigflix.py +++ b/yt_dlp/extractor/bigflix.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index ddf76ac55..f39e15002 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, urlencode_postdata diff --git a/yt_dlp/extractor/bild.py b/yt_dlp/extractor/bild.py index b8dfbd42b..f3dea33c4 100644 --- a/yt_dlp/extractor/bild.py +++ b/yt_dlp/extractor/bild.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index a9574758c..ead0dd88b 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import base64 import hashlib import itertools @@ -20,6 +18,7 @@ from ..utils import ( float_or_none, mimetype2ext, parse_iso8601, + qualities, traverse_obj, parse_count, smuggle_url, @@ -998,3 +997,88 @@ class BiliIntlSeriesIE(BiliIntlBaseIE): self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'), categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none), thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view'))) + + +class BiliLiveIE(InfoExtractor): + _VALID_URL = r'https?://live.bilibili.com/(?P<id>\d+)' + + _TESTS = [{ + 'url': 'https://live.bilibili.com/196', + 'info_dict': { + 'id': '33989', + 'description': "ๅจๅ
ญๆ่ฐๅ๏ผๅ
ถไปๆถๅ้ๆบๆธธๆใ | \nๅฝๆญ๏ผ@ไธๆญๅๆณๅผๅฝๆญ็ปใ | \n็ดๆญ้็ฅ็พค๏ผๅ
จๅ็ฆ่จ๏ผ๏ผ666906670๏ผ902092584๏ผ59971โง481 ๏ผๅ่ฝไธๆ ท๏ผๅซๅคๅ ๏ผ", + 'ext': 'flv', + 'title': "ๅคช็ฉบ็ผไบบๆ่ๅจ๏ผไธ่ขซ็ๆๅฐฑ็ฎ่ตข", + 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg", + 'timestamp': 1650802769, + }, + 'skip': 'not live' + }, { + 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click', + 'only_matching': True + }] + + _FORMATS = { + 80: {'format_id': 'low', 'format_note': 'ๆต็
'}, + 150: {'format_id': 'high_res', 'format_note': '้ซๆธ
'}, + 250: {'format_id': 'ultra_high_res', 'format_note': '่ถ
ๆธ
'}, + 400: {'format_id': 'blue_ray', 'format_note': '่ๅ
'}, + 10000: {'format_id': 'source', 'format_note': 'ๅ็ป'}, + 20000: {'format_id': '4K', 'format_note': '4K'}, + 30000: {'format_id': 'dolby', 'format_note': 'ๆๆฏ'}, + } + + _quality = staticmethod(qualities(list(_FORMATS))) + + def _call_api(self, path, room_id, query): + api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query) + if api_result.get('code') != 0: + raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata') + return api_result.get('data') or {} + + def _parse_formats(self, qn, fmt): + for codec in fmt.get('codec') or []: + if codec.get('current_qn') != qn: + continue + for url_info in codec['url_info']: + yield { + 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}', + 'ext': fmt.get('format_name'), + 'vcodec': codec.get('codec_name'), + 'quality': self._quality(qn), + **self._FORMATS[qn], + } + + def _real_extract(self, url): + room_id = self._match_id(url) + room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id}) + if room_data.get('live_status') == 0: + raise ExtractorError('Streamer is not live', expected=True) + + formats = [] + for qn in self._FORMATS.keys(): + stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, { + 'room_id': room_id, + 'qn': qn, + 'codec': '0,1', + 'format': '0,2', + 'mask': '0', + 'no_playurl': '0', + 'platform': 'web', + 'protocol': '0,1', + }) + for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []: + formats.extend(self._parse_formats(qn, fmt)) + self._sort_formats(formats) + + return { + 'id': room_id, + 'title': room_data.get('title'), + 'description': room_data.get('description'), + 'thumbnail': room_data.get('user_cover'), + 'timestamp': stream_data.get('live_time'), + 'formats': formats, + 'http_headers': { + 'Referer': url, + }, + } diff --git a/yt_dlp/extractor/biobiochiletv.py b/yt_dlp/extractor/biobiochiletv.py index dc86c57c5..180c9656e 100644 --- a/yt_dlp/extractor/biobiochiletv.py +++ b/yt_dlp/extractor/biobiochiletv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 2b57bade3..3a4234491 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .vk import VKIE from ..compat import compat_b64decode diff --git a/yt_dlp/extractor/bitchute.py b/yt_dlp/extractor/bitchute.py index dcae6f4cc..c831092d4 100644 --- a/yt_dlp/extractor/bitchute.py +++ b/yt_dlp/extractor/bitchute.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/bitwave.py b/yt_dlp/extractor/bitwave.py index e6e093f59..bd8eac1f1 100644 --- a/yt_dlp/extractor/bitwave.py +++ b/yt_dlp/extractor/bitwave.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/blackboardcollaborate.py b/yt_dlp/extractor/blackboardcollaborate.py index 8ae294198..8f41c897a 100644 --- a/yt_dlp/extractor/blackboardcollaborate.py +++ b/yt_dlp/extractor/blackboardcollaborate.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import parse_iso8601 diff --git a/yt_dlp/extractor/bleacherreport.py b/yt_dlp/extractor/bleacherreport.py index d1bf8e829..8d8fabe33 100644 --- a/yt_dlp/extractor/bleacherreport.py +++ b/yt_dlp/extractor/bleacherreport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .amp import AMPIE from ..utils import ( diff --git a/yt_dlp/extractor/blinkx.py b/yt_dlp/extractor/blinkx.py deleted file mode 100644 index d70a3b30f..000000000 --- a/yt_dlp/extractor/blinkx.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import unicode_literals - -import json - -from .common import InfoExtractor -from ..utils import ( - remove_start, - int_or_none, -) - - -class BlinkxIE(InfoExtractor): - _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' - IE_NAME = 'blinkx' - - _TEST = { - 'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', - 'md5': '337cf7a344663ec79bf93a526a2e06c7', - 'info_dict': { - 'id': 'Da0Gw3xc', - 'ext': 'mp4', - 'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', - 'uploader': 'IGN News', - 'upload_date': '20150217', - 'timestamp': 1424215740, - 'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', - 'duration': 47.743333, - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - display_id = video_id[:8] - - api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' - + 'video=%s' % video_id) - data_json = self._download_webpage(api_url, display_id) - data = json.loads(data_json)['api']['results'][0] - duration = None - thumbnails = [] - formats = [] - for m in data['media']: - if m['type'] == 'jpg': - thumbnails.append({ - 'url': m['link'], - 'width': int(m['w']), - 'height': int(m['h']), - }) - elif m['type'] == 'original': - duration = float(m['d']) - elif m['type'] == 'youtube': - yt_id = m['link'] - self.to_screen('Youtube video detected: %s' % yt_id) - return self.url_result(yt_id, 'Youtube', video_id=yt_id) - elif m['type'] in ('flv', 'mp4'): - vcodec = remove_start(m['vcodec'], 'ff') - acodec = remove_start(m['acodec'], 'ff') - vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) - abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) - tbr = vbr + abr if vbr and abr else None - format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) - formats.append({ - 'format_id': format_id, - 'url': m['link'], - 'vcodec': vcodec, - 'acodec': acodec, - 'abr': abr, - 'vbr': vbr, - 'tbr': tbr, - 'width': int_or_none(m.get('w')), - 'height': int_or_none(m.get('h')), - }) - - self._sort_formats(formats) - - return { - 'id': display_id, - 'fullid': video_id, - 'title': data['title'], - 'formats': formats, - 'uploader': data.get('channel_name'), - 'timestamp': data.get('pubdate_epoch'), - 'description': data.get('description'), - 'thumbnails': thumbnails, - 'duration': duration, - } diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py index dba131cb0..d7aa7f94e 100644 --- a/yt_dlp/extractor/blogger.py +++ b/yt_dlp/extractor/blogger.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from ..utils import ( diff --git a/yt_dlp/extractor/bloomberg.py b/yt_dlp/extractor/bloomberg.py index 2fbfad1ba..c0aaeae02 100644 --- a/yt_dlp/extractor/bloomberg.py +++ b/yt_dlp/extractor/bloomberg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/bokecc.py b/yt_dlp/extractor/bokecc.py index 6a89d36f4..0c081750e 100644 --- a/yt_dlp/extractor/bokecc.py +++ b/yt_dlp/extractor/bokecc.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ExtractorError diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index 4e346e7b6..cbef0fc53 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/bostonglobe.py b/yt_dlp/extractor/bostonglobe.py index 57882fbee..92f8ea2cb 100644 --- a/yt_dlp/extractor/bostonglobe.py +++ b/yt_dlp/extractor/bostonglobe.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/box.py b/yt_dlp/extractor/box.py index 8214086a6..5842de88a 100644 --- a/yt_dlp/extractor/box.py +++ b/yt_dlp/extractor/box.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bpb.py b/yt_dlp/extractor/bpb.py index 8f6ef3cf0..388f1f94f 100644 --- a/yt_dlp/extractor/bpb.py +++ b/yt_dlp/extractor/bpb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -16,7 +13,6 @@ class BpbIE(InfoExtractor): _TEST = { 'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr', - # md5 fails in Python 2.6 due to buggy server response and wrong handling of urllib2 'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f', 'info_dict': { 'id': '297', diff --git a/yt_dlp/extractor/br.py b/yt_dlp/extractor/br.py index 0155827d8..faac442e8 100644 --- a/yt_dlp/extractor/br.py +++ b/yt_dlp/extractor/br.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/bravotv.py b/yt_dlp/extractor/bravotv.py index 139d51c09..d4895848e 100644 --- a/yt_dlp/extractor/bravotv.py +++ b/yt_dlp/extractor/bravotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/breakcom.py b/yt_dlp/extractor/breakcom.py index f38789f99..51c8c822f 100644 --- a/yt_dlp/extractor/breakcom.py +++ b/yt_dlp/extractor/breakcom.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py index e029aa627..a2b04fcce 100644 --- a/yt_dlp/extractor/breitbart.py +++ b/yt_dlp/extractor/breitbart.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index dcd332b43..936c34e15 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -1,9 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re import struct +import xml.etree.ElementTree from .adobepass import AdobePassIE from .common import InfoExtractor @@ -12,7 +10,6 @@ from ..compat import ( compat_HTTPError, compat_parse_qs, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( clean_html, @@ -166,7 +163,7 @@ class BrightcoveLegacyIE(InfoExtractor): try: object_doc = compat_etree_fromstring(object_str.encode('utf-8')) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: return fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars') diff --git a/yt_dlp/extractor/businessinsider.py b/yt_dlp/extractor/businessinsider.py index 73a57b1e4..4b3f5e68b 100644 --- a/yt_dlp/extractor/businessinsider.py +++ b/yt_dlp/extractor/businessinsider.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/buzzfeed.py b/yt_dlp/extractor/buzzfeed.py index ec411091e..1b4cba63e 100644 --- a/yt_dlp/extractor/buzzfeed.py +++ b/yt_dlp/extractor/buzzfeed.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/byutv.py b/yt_dlp/extractor/byutv.py index f4d5086ed..eca2e294e 100644 --- a/yt_dlp/extractor/byutv.py +++ b/yt_dlp/extractor/byutv.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/c56.py b/yt_dlp/extractor/c56.py index a853c530c..1d98ea598 100644 --- a/yt_dlp/extractor/c56.py +++ b/yt_dlp/extractor/c56.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py index 77efdf45a..3200b5677 100644 --- a/yt_dlp/extractor/cableav.py +++ b/yt_dlp/extractor/cableav.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/callin.py b/yt_dlp/extractor/callin.py index 1f3b7cfff..fc5da7028 100644 --- a/yt_dlp/extractor/callin.py +++ b/yt_dlp/extractor/callin.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/caltrans.py b/yt_dlp/extractor/caltrans.py index 9ac740f7e..e52dfb170 100644 --- a/yt_dlp/extractor/caltrans.py +++ b/yt_dlp/extractor/caltrans.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 2a3931fd0..4256b28e0 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/camdemy.py b/yt_dlp/extractor/camdemy.py index 8f0c6c545..c7079e422 100644 --- a/yt_dlp/extractor/camdemy.py +++ b/yt_dlp/extractor/camdemy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index 3dc19377b..32fbffcc2 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/camwithher.py b/yt_dlp/extractor/camwithher.py index bbc5205fd..a0b3749ed 100644 --- a/yt_dlp/extractor/camwithher.py +++ b/yt_dlp/extractor/camwithher.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 0365cb2f6..f2ec9355f 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/canalc2.py b/yt_dlp/extractor/canalc2.py index 407cc8084..c9bb94c40 100644 --- a/yt_dlp/extractor/canalc2.py +++ b/yt_dlp/extractor/canalc2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/canalplus.py b/yt_dlp/extractor/canalplus.py index 211ea267a..b184398e2 100644 --- a/yt_dlp/extractor/canalplus.py +++ b/yt_dlp/extractor/canalplus.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( # ExtractorError, diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 8b9903774..8eff4a57c 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import json diff --git a/yt_dlp/extractor/carambatv.py b/yt_dlp/extractor/carambatv.py index 7e5cc90fb..087ea8aa0 100644 --- a/yt_dlp/extractor/carambatv.py +++ b/yt_dlp/extractor/carambatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/cartoonnetwork.py b/yt_dlp/extractor/cartoonnetwork.py index 48b33617f..4dd7ac46d 100644 --- a/yt_dlp/extractor/cartoonnetwork.py +++ b/yt_dlp/extractor/cartoonnetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .turner import TurnerBaseIE from ..utils import int_or_none diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index fba8bf965..cac3f1e9d 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json import base64 diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py index 2af36ea82..e32539c9e 100644 --- a/yt_dlp/extractor/cbs.py +++ b/yt_dlp/extractor/cbs.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .theplatform import ThePlatformFeedIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cbsinteractive.py b/yt_dlp/extractor/cbsinteractive.py index 9d4f75435..7abeecf78 100644 --- a/yt_dlp/extractor/cbsinteractive.py +++ b/yt_dlp/extractor/cbsinteractive.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .cbs import CBSIE from ..utils import int_or_none diff --git a/yt_dlp/extractor/cbslocal.py b/yt_dlp/extractor/cbslocal.py index 3b7e1a8b9..c6495c95f 100644 --- a/yt_dlp/extractor/cbslocal.py +++ b/yt_dlp/extractor/cbslocal.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/cbsnews.py b/yt_dlp/extractor/cbsnews.py index 1285ed65e..76925b4f9 100644 --- a/yt_dlp/extractor/cbsnews.py +++ b/yt_dlp/extractor/cbsnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import zlib diff --git a/yt_dlp/extractor/cbssports.py b/yt_dlp/extractor/cbssports.py index b8a6e5967..56a255149 100644 --- a/yt_dlp/extractor/cbssports.py +++ b/yt_dlp/extractor/cbssports.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - # from .cbs import CBSBaseIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/ccc.py b/yt_dlp/extractor/ccc.py index 36e6dff72..b11e1f74e 100644 --- a/yt_dlp/extractor/ccc.py +++ b/yt_dlp/extractor/ccc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/ccma.py b/yt_dlp/extractor/ccma.py index 9dbaabfa0..ca739f8a1 100644 --- a/yt_dlp/extractor/ccma.py +++ b/yt_dlp/extractor/ccma.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/cctv.py b/yt_dlp/extractor/cctv.py index 0ed5f327b..623cbb342 100644 --- a/yt_dlp/extractor/cctv.py +++ b/yt_dlp/extractor/cctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cda.py b/yt_dlp/extractor/cda.py index 72c47050f..9b257bee9 100644 --- a/yt_dlp/extractor/cda.py +++ b/yt_dlp/extractor/cda.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import codecs import re import json diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index ddf66b207..331b350f1 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cgtn.py b/yt_dlp/extractor/cgtn.py index 89f173887..aaafa02d1 100644 --- a/yt_dlp/extractor/cgtn.py +++ b/yt_dlp/extractor/cgtn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/channel9.py b/yt_dlp/extractor/channel9.py index 90024dbba..90a1ab2be 100644 --- a/yt_dlp/extractor/channel9.py +++ b/yt_dlp/extractor/channel9.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/charlierose.py b/yt_dlp/extractor/charlierose.py index 42c9af263..27f8b33e5 100644 --- a/yt_dlp/extractor/charlierose.py +++ b/yt_dlp/extractor/charlierose.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import remove_end diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index 8da51f919..d39210bf7 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/chilloutzone.py b/yt_dlp/extractor/chilloutzone.py index fd5202b9e..1a2f77c4e 100644 --- a/yt_dlp/extractor/chilloutzone.py +++ b/yt_dlp/extractor/chilloutzone.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index e6841fb8b..7e8c0bfc9 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json @@ -48,6 +45,8 @@ class ChingariBaseIE(InfoExtractor): return { 'id': id, + 'extractor_key': ChingariIE.ie_key(), + 'extractor': 'Chingari', 'title': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), 'description': compat_urllib_parse_unquote_plus(clean_html(post_data.get('caption'))), 'duration': media_data.get('duration'), @@ -105,11 +104,11 @@ class ChingariUserIE(ChingariBaseIE): _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)' _TESTS = [{ 'url': 'https://chingari.io/dada1023', - 'playlist_mincount': 3, 'info_dict': { 'id': 'dada1023', }, - 'entries': [{ + 'params': {'playlistend': 3}, + 'playlist': [{ 'url': 'https://chingari.io/share/post?id=614781f3ade60b3a0bfff42a', 'info_dict': { 'id': '614781f3ade60b3a0bfff42a', diff --git a/yt_dlp/extractor/chirbit.py b/yt_dlp/extractor/chirbit.py index 8d75cdf19..452711d97 100644 --- a/yt_dlp/extractor/chirbit.py +++ b/yt_dlp/extractor/chirbit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cinchcast.py b/yt_dlp/extractor/cinchcast.py index b861d54b0..393df3698 100644 --- a/yt_dlp/extractor/cinchcast.py +++ b/yt_dlp/extractor/cinchcast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/cinemax.py b/yt_dlp/extractor/cinemax.py index 2c3ff8d4f..54cab2285 100644 --- a/yt_dlp/extractor/cinemax.py +++ b/yt_dlp/extractor/cinemax.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .hbo import HBOBaseIE diff --git a/yt_dlp/extractor/ciscolive.py b/yt_dlp/extractor/ciscolive.py index 349c5eb50..066857817 100644 --- a/yt_dlp/extractor/ciscolive.py +++ b/yt_dlp/extractor/ciscolive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/ciscowebex.py b/yt_dlp/extractor/ciscowebex.py index 882dae91b..e1aae9bda 100644 --- a/yt_dlp/extractor/ciscowebex.py +++ b/yt_dlp/extractor/ciscowebex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/cjsw.py b/yt_dlp/extractor/cjsw.py index 1dea0d7c7..c37a3b848 100644 --- a/yt_dlp/extractor/cjsw.py +++ b/yt_dlp/extractor/cjsw.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/cliphunter.py b/yt_dlp/extractor/cliphunter.py index f2ca7a337..7e5fd3175 100644 --- a/yt_dlp/extractor/cliphunter.py +++ b/yt_dlp/extractor/cliphunter.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/clippit.py b/yt_dlp/extractor/clippit.py index a1a7a774c..006a713b2 100644 --- a/yt_dlp/extractor/clippit.py +++ b/yt_dlp/extractor/clippit.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/cliprs.py b/yt_dlp/extractor/cliprs.py index d55b26d59..567f77b94 100644 --- a/yt_dlp/extractor/cliprs.py +++ b/yt_dlp/extractor/cliprs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .onet import OnetBaseIE diff --git a/yt_dlp/extractor/clipsyndicate.py b/yt_dlp/extractor/clipsyndicate.py index 6cdb42f5a..606444321 100644 --- a/yt_dlp/extractor/clipsyndicate.py +++ b/yt_dlp/extractor/clipsyndicate.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( find_xpath_attr, diff --git a/yt_dlp/extractor/closertotruth.py b/yt_dlp/extractor/closertotruth.py index 517e121e0..e78e26a11 100644 --- a/yt_dlp/extractor/closertotruth.py +++ b/yt_dlp/extractor/closertotruth.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cloudflarestream.py b/yt_dlp/extractor/cloudflarestream.py index 2fdcfbb3a..0333d5def 100644 --- a/yt_dlp/extractor/cloudflarestream.py +++ b/yt_dlp/extractor/cloudflarestream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re diff --git a/yt_dlp/extractor/cloudy.py b/yt_dlp/extractor/cloudy.py index 85ca20ecc..848643e26 100644 --- a/yt_dlp/extractor/cloudy.py +++ b/yt_dlp/extractor/cloudy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/clubic.py b/yt_dlp/extractor/clubic.py index 98f9cb596..ce8621296 100644 --- a/yt_dlp/extractor/clubic.py +++ b/yt_dlp/extractor/clubic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/clyp.py b/yt_dlp/extractor/clyp.py index e6b2ac4d4..c64726ca2 100644 --- a/yt_dlp/extractor/clyp.py +++ b/yt_dlp/extractor/clyp.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/cmt.py b/yt_dlp/extractor/cmt.py index a4ddb9160..4eec066dd 100644 --- a/yt_dlp/extractor/cmt.py +++ b/yt_dlp/extractor/cmt.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVIE # TODO Remove - Reason: Outdated Site diff --git a/yt_dlp/extractor/cnbc.py b/yt_dlp/extractor/cnbc.py index da3730cc8..68fd025b7 100644 --- a/yt_dlp/extractor/cnbc.py +++ b/yt_dlp/extractor/cnbc.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import smuggle_url diff --git a/yt_dlp/extractor/cnn.py b/yt_dlp/extractor/cnn.py index af11d95b4..96482eaf5 100644 --- a/yt_dlp/extractor/cnn.py +++ b/yt_dlp/extractor/cnn.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from .turner import TurnerBaseIE from ..utils import url_basename diff --git a/yt_dlp/extractor/comedycentral.py b/yt_dlp/extractor/comedycentral.py index 5a12ab5e6..05fc9f2b5 100644 --- a/yt_dlp/extractor/comedycentral.py +++ b/yt_dlp/extractor/comedycentral.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 9914910d0..ebeca4395 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,46 +1,46 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import collections import hashlib import itertools import json +import math import netrc import os import random -import re import sys import time -import math +import xml.etree.ElementTree from ..compat import ( compat_cookiejar_Cookie, compat_cookies_SimpleCookie, - compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_getpass, compat_http_client, compat_os_name, - compat_Pattern, compat_str, compat_urllib_error, compat_urllib_parse_unquote, compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, - compat_xml_parse_error, + re, ) from ..downloader import FileDownloader -from ..downloader.f4m import ( - get_base_url, - remove_encrypted_media, -) +from ..downloader.f4m import get_base_url, remove_encrypted_media from ..utils import ( + JSON_LD_RE, + NO_DEFAULT, + ExtractorError, + GeoRestrictedError, + GeoUtils, + RegexNotFoundError, + UnsupportedError, age_restricted, base_url, bug_reports_message, + classproperty, clean_html, determine_ext, determine_protocol, @@ -48,20 +48,15 @@ from ..utils import ( encode_data_uri, error_to_compat_str, extract_attributes, - ExtractorError, filter_dict, fix_xml_ampersands, float_or_none, format_field, - GeoRestrictedError, - GeoUtils, int_or_none, join_nonempty, js_to_json, - JSON_LD_RE, mimetype2ext, network_exceptions, - NO_DEFAULT, orderedSet, parse_bitrate, parse_codecs, @@ -69,7 +64,6 @@ from ..utils import ( parse_iso8601, parse_m3u8_attributes, parse_resolution, - RegexNotFoundError, sanitize_filename, sanitized_Request, str_or_none, @@ -78,7 +72,6 @@ from ..utils import ( traverse_obj, try_get, unescapeHTML, - UnsupportedError, unified_strdate, unified_timestamp, update_Request, @@ -93,7 +86,7 @@ from ..utils import ( ) -class InfoExtractor(object): +class InfoExtractor: """Information Extractor class. Information extractors are the classes that, given a URL, extract @@ -111,7 +104,9 @@ class InfoExtractor(object): For a video, the dictionaries must include the following fields: id: Video identifier. - title: Video title, unescaped. + title: Video title, unescaped. Set to an empty string if video has + no title as opposed to "None" which signifies that the + extractor failed to obtain a title Additionally, it must contain either a formats entry or a url one: @@ -216,8 +211,10 @@ class InfoExtractor(object): * no_resume The server does not support resuming the (HTTP or RTMP) download. Boolean. * has_drm The format has DRM and cannot be downloaded. Boolean - * downloader_options A dictionary of downloader options as - described in FileDownloader (For internal use only) + * downloader_options A dictionary of downloader options + (For internal use only) + * http_chunk_size Chunk size for HTTP downloads + * ffmpeg_args Extra arguments for ffmpeg downloader RTMP formats can also have the additional fields: page_url, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, rtmp_protocol, rtmp_real_time @@ -473,14 +470,18 @@ class InfoExtractor(object): _WORKING = True _NETRC_MACHINE = None IE_DESC = None + SEARCH_KEY = None - _LOGIN_HINTS = { - 'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials', - 'cookies': ( - 'Use --cookies-from-browser or --cookies for the authentication. ' - 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), - 'password': 'Use --username and --password, or --netrc to provide account credentials', - } + def _login_hint(self, method=NO_DEFAULT, netrc=None): + password_hint = f'--username and --password, or --netrc ({netrc or self._NETRC_MACHINE}) to provide account credentials' + return { + None: '', + 'any': f'Use --cookies, --cookies-from-browser, {password_hint}', + 'password': f'Use {password_hint}', + 'cookies': ( + 'Use --cookies-from-browser or --cookies for the authentication. ' + 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), + }[method if method is not NO_DEFAULT else 'any' if self.supports_login() else 'cookies'] def __init__(self, downloader=None): """Constructor. Receives an optional downloader (a YoutubeDL instance). @@ -543,7 +544,7 @@ class InfoExtractor(object): if username: self._perform_login(username, password) elif self.get_param('username') and False not in (self.IE_DESC, self._NETRC_MACHINE): - self.report_warning(f'Login with password is not supported for this website. {self._LOGIN_HINTS["cookies"]}') + self.report_warning(f'Login with password is not supported for this website. {self._login_hint("cookies")}') self._real_initialize() self._ready = True @@ -629,7 +630,7 @@ class InfoExtractor(object): if country: self._x_forwarded_for_ip = GeoUtils.random_ipv4(country) self._downloader.write_debug( - 'Using fake IP %s (%s) as X-Forwarded-For' % (self._x_forwarded_for_ip, country.upper())) + f'Using fake IP {self._x_forwarded_for_ip} ({country.upper()}) as X-Forwarded-For') def extract(self, url): """Extracts URL information and returns it in list of dicts.""" @@ -710,9 +711,9 @@ class InfoExtractor(object): """A string for getting the InfoExtractor with get_info_extractor""" return cls.__name__[:-2] - @property - def IE_NAME(self): - return compat_str(type(self).__name__[:-2]) + @classproperty + def IE_NAME(cls): + return cls.__name__[:-2] @staticmethod def __can_accept_status_code(err, expected_status): @@ -742,9 +743,9 @@ class InfoExtractor(object): self.report_download_webpage(video_id) elif note is not False: if video_id is None: - self.to_screen('%s' % (note,)) + self.to_screen(str(note)) else: - self.to_screen('%s: %s' % (video_id, note)) + self.to_screen(f'{video_id}: {note}') # Some sites check X-Forwarded-For HTTP header in order to figure out # the origin of the client behind proxy. This allows bypassing geo @@ -780,7 +781,7 @@ class InfoExtractor(object): if errnote is None: errnote = 'Unable to download webpage' - errmsg = '%s: %s' % (errnote, error_to_compat_str(err)) + errmsg = f'{errnote}: {error_to_compat_str(err)}' if fatal: raise ExtractorError(errmsg, cause=err) else: @@ -861,7 +862,7 @@ class InfoExtractor(object): dump = base64.b64encode(webpage_bytes).decode('ascii') self._downloader.to_screen(dump) if self.get_param('write_pages', False): - basen = '%s_%s' % (video_id, urlh.geturl()) + basen = f'{video_id}_{urlh.geturl()}' trim_length = self.get_param('trim_file_name') or 240 if len(basen) > trim_length: h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest() @@ -951,7 +952,7 @@ class InfoExtractor(object): fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an compat_etree_Element, URL handle). + Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -972,7 +973,7 @@ class InfoExtractor(object): transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an compat_etree_Element. + Return the xml as an xml.etree.ElementTree.Element. See _download_webpage docstring for arguments specification. """ @@ -988,7 +989,7 @@ class InfoExtractor(object): xml_string = transform_source(xml_string) try: return compat_etree_fromstring(xml_string.encode('utf-8')) - except compat_xml_parse_error as ve: + except xml.etree.ElementTree.ParseError as ve: errmsg = '%s: Failed to parse XML ' % video_id if fatal: raise ExtractorError(errmsg, cause=ve) @@ -1099,10 +1100,10 @@ class InfoExtractor(object): def to_screen(self, msg, *args, **kwargs): """Print msg to screen, prefixing it with '[ie_name]'""" - self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs) + self._downloader.to_screen(f'[{self.IE_NAME}] {msg}', *args, **kwargs) def write_debug(self, msg, *args, **kwargs): - self._downloader.write_debug('[%s] %s' % (self.IE_NAME, msg), *args, **kwargs) + self._downloader.write_debug(f'[{self.IE_NAME}] {msg}', *args, **kwargs) def get_param(self, name, default=None, *args, **kwargs): if self._downloader: @@ -1135,11 +1136,7 @@ class InfoExtractor(object): self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) return - if method is NO_DEFAULT: - method = 'any' if self.supports_login() else 'cookies' - if method is not None: - assert method in self._LOGIN_HINTS, 'Invalid login method' - msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) + msg += format_field(self._login_hint(method), template='. %s') raise ExtractorError(msg, expected=True) def raise_geo_restricted( @@ -1205,7 +1202,7 @@ class InfoExtractor(object): """ if string is None: mobj = None - elif isinstance(pattern, (str, compat_Pattern)): + elif isinstance(pattern, (str, re.Pattern)): mobj = re.search(pattern, string, flags) else: for p in pattern: @@ -1258,7 +1255,7 @@ class InfoExtractor(object): else: raise netrc.NetrcParseError( 'No authenticators for %s' % netrc_machine) - except (IOError, netrc.NetrcParseError) as err: + except (OSError, netrc.NetrcParseError) as err: self.report_warning( 'parsing .netrc: %s' % error_to_compat_str(err)) @@ -1928,8 +1925,7 @@ class InfoExtractor(object): def _sort_formats(self, formats, field_preference=[]): if not formats: return - format_sort = self.FormatSort(self, field_preference) - formats.sort(key=lambda f: format_sort.calculate_preference(f)) + formats.sort(key=self.FormatSort(self, field_preference).calculate_preference) def _check_formats(self, formats, video_id): if formats: @@ -1990,17 +1986,19 @@ class InfoExtractor(object): def _extract_f4m_formats(self, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None, data=None, headers={}, query={}): - manifest = self._download_xml( + res = self._download_xml_handle( manifest_url, video_id, 'Downloading f4m manifest', 'Unable to download f4m manifest', # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244) transform_source=transform_source, fatal=fatal, data=data, headers=headers, query=query) - - if manifest is False: + if res is False: return [] + manifest, urlh = res + manifest_url = urlh.geturl() + return self._parse_f4m_formats( manifest, manifest_url, video_id, preference=preference, quality=quality, f4m_id=f4m_id, transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id) @@ -2008,7 +2006,7 @@ class InfoExtractor(object): def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, quality=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): - if not isinstance(manifest, compat_etree_Element) and not fatal: + if not isinstance(manifest, xml.etree.ElementTree.Element) and not fatal: return [] # currently yt-dlp cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy @@ -2408,12 +2406,14 @@ class InfoExtractor(object): return '/'.join(out) def _extract_smil_formats_and_subtitles(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) - - if smil is False: + res = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source) + if res is False: assert not fatal return [], {} + smil, urlh = res + smil_url = urlh.geturl() + namespace = self._parse_smil_namespace(smil) fmts = self._parse_smil_formats( @@ -2430,13 +2430,17 @@ class InfoExtractor(object): return fmts def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None): - smil = self._download_smil(smil_url, video_id, fatal=fatal) - if smil is False: + res = self._download_smil(smil_url, video_id, fatal=fatal) + if res is False: return {} + + smil, urlh = res + smil_url = urlh.geturl() + return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params) def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None): - return self._download_xml( + return self._download_xml_handle( smil_url, video_id, 'Downloading SMIL file', 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source) @@ -2615,11 +2619,15 @@ class InfoExtractor(object): return subtitles def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True): - xspf = self._download_xml( + res = self._download_xml_handle( xspf_url, playlist_id, 'Downloading xpsf playlist', 'Unable to download xspf manifest', fatal=fatal) - if xspf is False: + if res is False: return [] + + xspf, urlh = res + xspf_url = urlh.geturl() + return self._parse_xspf( xspf, playlist_id, xspf_url=xspf_url, xspf_base_url=base_url(xspf_url)) @@ -2684,7 +2692,10 @@ class InfoExtractor(object): mpd_doc, urlh = res if mpd_doc is None: return [], {} - mpd_base_url = base_url(urlh.geturl()) + + # We could have been redirected to a new url when we retrieved our mpd file. + mpd_url = urlh.geturl() + mpd_base_url = base_url(mpd_url) return self._parse_mpd_formats_and_subtitles( mpd_doc, mpd_id, mpd_base_url, mpd_url) @@ -2800,7 +2811,7 @@ class InfoExtractor(object): content_type = 'video' elif codecs['acodec'] != 'none': content_type = 'audio' - elif codecs.get('tcodec', 'none') != 'none': + elif codecs.get('scodec', 'none') != 'none': content_type = 'text' elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): content_type = 'text' @@ -3334,7 +3345,7 @@ class InfoExtractor(object): http_f = f.copy() del http_f['manifest_url'] http_url = re.sub( - REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url']) + REPL_REGEX, protocol + fr'://{http_host}/\g<1>{qualities[i]}\3', f['url']) http_f.update({ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'url': http_url, @@ -3355,7 +3366,7 @@ class InfoExtractor(object): formats = [] def manifest_url(manifest): - m_url = '%s/%s' % (http_base_url, manifest) + m_url = f'{http_base_url}/{manifest}' if query: m_url += '?%s' % query return m_url @@ -3392,7 +3403,7 @@ class InfoExtractor(object): for protocol in ('rtmp', 'rtsp'): if protocol not in skip_protocols: formats.append({ - 'url': '%s:%s' % (protocol, url_base), + 'url': f'{protocol}:{url_base}', 'format_id': protocol, 'protocol': protocol, }) @@ -3558,7 +3569,7 @@ class InfoExtractor(object): def _int(self, v, name, fatal=False, **kwargs): res = int_or_none(v, **kwargs) if res is None: - msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + msg = f'Failed to extract {name}: Could not parse value {v!r}' if fatal: raise ExtractorError(msg) else: @@ -3568,7 +3579,7 @@ class InfoExtractor(object): def _float(self, v, name, fatal=False, **kwargs): res = float_or_none(v, **kwargs) if res is None: - msg = 'Failed to extract %s: Could not parse value %r' % (name, v) + msg = f'Failed to extract {name}: Could not parse value {v!r}' if fatal: raise ExtractorError(msg) else: @@ -3606,9 +3617,7 @@ class InfoExtractor(object): for header, cookies in url_handle.headers.items(): if header.lower() != 'set-cookie': continue - if sys.version_info[0] >= 3: - cookies = cookies.encode('iso-8859-1') - cookies = cookies.decode('utf-8') + cookies = cookies.encode('iso-8859-1').decode('utf-8') cookie_value = re.search( r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies) if cookie_value: @@ -3616,34 +3625,55 @@ class InfoExtractor(object): self._set_cookie(domain, cookie, value) break - def get_testcases(self, include_onlymatching=False): - t = getattr(self, '_TEST', None) + @classmethod + def get_testcases(cls, include_onlymatching=False): + t = getattr(cls, '_TEST', None) if t: - assert not hasattr(self, '_TESTS'), \ - '%s has _TEST and _TESTS' % type(self).__name__ + assert not hasattr(cls, '_TESTS'), f'{cls.ie_key()}IE has _TEST and _TESTS' tests = [t] else: - tests = getattr(self, '_TESTS', []) + tests = getattr(cls, '_TESTS', []) for t in tests: if not include_onlymatching and t.get('only_matching', False): continue - t['name'] = type(self).__name__[:-len('IE')] + t['name'] = cls.ie_key() yield t - def is_suitable(self, age_limit): - """ Test whether the extractor is generally suitable for the given - age limit (i.e. pornographic sites are not, all others usually are) """ - - any_restricted = False - for tc in self.get_testcases(include_onlymatching=False): - if tc.get('playlist', []): - tc = tc['playlist'][0] - is_restricted = age_restricted( - tc.get('info_dict', {}).get('age_limit'), age_limit) - if not is_restricted: - return True - any_restricted = any_restricted or is_restricted - return not any_restricted + @classproperty + def age_limit(cls): + """Get age limit from the testcases""" + return max(traverse_obj( + tuple(cls.get_testcases(include_onlymatching=False)), + (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0]) + + @classmethod + def is_suitable(cls, age_limit): + """Test whether the extractor is generally suitable for the given age limit""" + return not age_restricted(cls.age_limit, age_limit) + + @classmethod + def description(cls, *, markdown=True, search_examples=None): + """Description of the extractor""" + desc = '' + if cls._NETRC_MACHINE: + if markdown: + desc += f' [<abbr title="netrc machine"><em>{cls._NETRC_MACHINE}</em></abbr>]' + else: + desc += f' [{cls._NETRC_MACHINE}]' + if cls.IE_DESC is False: + desc += ' [HIDDEN]' + elif cls.IE_DESC: + desc += f' {cls.IE_DESC}' + if cls.SEARCH_KEY: + desc += f'; "{cls.SEARCH_KEY}:" prefix' + if search_examples: + _COUNTS = ('', '5', '10', 'all') + desc += f' (Example: "{cls.SEARCH_KEY}{random.choice(_COUNTS)}:{random.choice(search_examples)}")' + if not cls.working(): + desc += ' (**Currently broken**)' if markdown else ' (Currently broken)' + + name = f' - **{cls.IE_NAME}**' if markdown else cls.IE_NAME + return f'{name}:{desc}' if desc else name def extract_subtitles(self, *args, **kwargs): if (self.get_param('writesubtitles', False) @@ -3688,7 +3718,7 @@ class InfoExtractor(object): def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs/data will be dropped. """ - list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1) + list1_data = {(item.get('url'), item.get('data')) for item in subtitle_list1} ret = list(subtitle_list1) ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data) return ret @@ -3712,11 +3742,15 @@ class InfoExtractor(object): def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError('This method must be implemented by subclasses') + @property + def _cookies_passed(self): + """Whether cookies have been passed to YoutubeDL""" + return self.get_param('cookiefile') is not None or self.get_param('cookiesfrombrowser') is not None + def mark_watched(self, *args, **kwargs): if not self.get_param('mark_watched', False): return - if (self.supports_login() and self._get_login_info()[0] is not None - or self.get_param('cookiefile') or self.get_param('cookiesfrombrowser')): + if self.supports_login() and self._get_login_info()[0] is not None or self._cookies_passed: self._mark_watched(*args, **kwargs) def _mark_watched(self, *args, **kwargs): @@ -3801,7 +3835,7 @@ class SearchInfoExtractor(InfoExtractor): else: n = int(prefix) if n <= 0: - raise ExtractorError('invalid download number %s for query "%s"' % (n, query)) + raise ExtractorError(f'invalid download number {n} for query "{query}"') elif n > self._MAX_RESULTS: self.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n)) n = self._MAX_RESULTS @@ -3818,6 +3852,6 @@ class SearchInfoExtractor(InfoExtractor): """Returns an iterator of search results""" raise NotImplementedError('This method must be implemented by subclasses') - @property - def SEARCH_KEY(self): - return self._SEARCH_KEY + @classproperty + def SEARCH_KEY(cls): + return cls._SEARCH_KEY diff --git a/yt_dlp/extractor/commonmistakes.py b/yt_dlp/extractor/commonmistakes.py index 051269652..62bd51fd7 100644 --- a/yt_dlp/extractor/commonmistakes.py +++ b/yt_dlp/extractor/commonmistakes.py @@ -1,7 +1,3 @@ -from __future__ import unicode_literals - -import sys - from .common import InfoExtractor from ..utils import ExtractorError @@ -35,9 +31,7 @@ class UnicodeBOMIE(InfoExtractor): IE_DESC = False _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' - # Disable test for python 3.2 since BOM is broken in re in this version - # (see https://github.com/ytdl-org/youtube-dl/issues/9751) - _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{ + _TESTS = [{ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', 'only_matching': True, }] diff --git a/yt_dlp/extractor/commonprotocols.py b/yt_dlp/extractor/commonprotocols.py index 3708c6ad2..e8f19b9e0 100644 --- a/yt_dlp/extractor/commonprotocols.py +++ b/yt_dlp/extractor/commonprotocols.py @@ -1,10 +1,5 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor -from ..compat import ( - compat_urlparse, -) +from ..compat import compat_urlparse class RtmpIE(InfoExtractor): diff --git a/yt_dlp/extractor/condenast.py b/yt_dlp/extractor/condenast.py index 54e7af8b0..cf6e40cb8 100644 --- a/yt_dlp/extractor/condenast.py +++ b/yt_dlp/extractor/condenast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/contv.py b/yt_dlp/extractor/contv.py index 84b462d40..50648a536 100644 --- a/yt_dlp/extractor/contv.py +++ b/yt_dlp/extractor/contv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/corus.py b/yt_dlp/extractor/corus.py index 119461375..7b83c0390 100644 --- a/yt_dlp/extractor/corus.py +++ b/yt_dlp/extractor/corus.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .theplatform import ThePlatformFeedIE from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/coub.py b/yt_dlp/extractor/coub.py index e90aa1954..b462acaf0 100644 --- a/yt_dlp/extractor/coub.py +++ b/yt_dlp/extractor/coub.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index d49f1ca74..5ef5afcc2 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/cpac.py b/yt_dlp/extractor/cpac.py index 22741152c..65ac2497f 100644 --- a/yt_dlp/extractor/cpac.py +++ b/yt_dlp/extractor/cpac.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -12,13 +9,6 @@ from ..utils import ( urljoin, ) -# compat_range -try: - if callable(xrange): - range = xrange -except (NameError, TypeError): - pass - class CPACIE(InfoExtractor): IE_NAME = 'cpac' diff --git a/yt_dlp/extractor/cracked.py b/yt_dlp/extractor/cracked.py index f77a68ece..c6aabccc6 100644 --- a/yt_dlp/extractor/cracked.py +++ b/yt_dlp/extractor/cracked.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/crackle.py b/yt_dlp/extractor/crackle.py index db4962c42..319374f3b 100644 --- a/yt_dlp/extractor/crackle.py +++ b/yt_dlp/extractor/crackle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals, division - import hashlib import hmac import re diff --git a/yt_dlp/extractor/craftsy.py b/yt_dlp/extractor/craftsy.py index ed2f4420e..307bfb946 100644 --- a/yt_dlp/extractor/craftsy.py +++ b/yt_dlp/extractor/craftsy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor diff --git a/yt_dlp/extractor/crooksandliars.py b/yt_dlp/extractor/crooksandliars.py index 7fb782db7..c831a3ae0 100644 --- a/yt_dlp/extractor/crooksandliars.py +++ b/yt_dlp/extractor/crooksandliars.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/crowdbunker.py b/yt_dlp/extractor/crowdbunker.py index 72906afef..75d90b5c5 100644 --- a/yt_dlp/extractor/crowdbunker.py +++ b/yt_dlp/extractor/crowdbunker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/crunchyroll.py b/yt_dlp/extractor/crunchyroll.py index 7edb645f8..bb1dbbaad 100644 --- a/yt_dlp/extractor/crunchyroll.py +++ b/yt_dlp/extractor/crunchyroll.py @@ -1,18 +1,15 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import re import json import zlib +import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from .vrv import VRVBaseIE from ..compat import ( compat_b64decode, - compat_etree_Element, compat_etree_fromstring, compat_str, compat_urllib_parse_urlencode, @@ -395,7 +392,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, compat_etree_Element): + if not isinstance(sub_doc, xml.etree.ElementTree.Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -525,7 +522,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, compat_etree_Element): + if isinstance(streamdata, xml.etree.ElementTree.Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -536,7 +533,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, compat_etree_Element): + if isinstance(stream_info, xml.etree.ElementTree.Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -611,7 +608,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text season = episode = episode_number = duration = None - if isinstance(metadata, compat_etree_Element): + if isinstance(metadata, xml.etree.ElementTree.Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index f51159bbe..cb1523617 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ctsnews.py b/yt_dlp/extractor/ctsnews.py index 679f1d92e..cec178f03 100644 --- a/yt_dlp/extractor/ctsnews.py +++ b/yt_dlp/extractor/ctsnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_timestamp from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/ctv.py b/yt_dlp/extractor/ctv.py index 756bcc2be..f125c1ce9 100644 --- a/yt_dlp/extractor/ctv.py +++ b/yt_dlp/extractor/ctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/ctvnews.py b/yt_dlp/extractor/ctvnews.py index 952f4c747..ad3f0d8e4 100644 --- a/yt_dlp/extractor/ctvnews.py +++ b/yt_dlp/extractor/ctvnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cultureunplugged.py b/yt_dlp/extractor/cultureunplugged.py index 9002e4cef..2fb22800f 100644 --- a/yt_dlp/extractor/cultureunplugged.py +++ b/yt_dlp/extractor/cultureunplugged.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import time from .common import InfoExtractor diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index b8abcf7a5..5b76b29ff 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/cwtv.py b/yt_dlp/extractor/cwtv.py index 73382431b..07239f39c 100644 --- a/yt_dlp/extractor/cwtv.py +++ b/yt_dlp/extractor/cwtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/cybrary.py b/yt_dlp/extractor/cybrary.py index c278f0fe0..7da581828 100644 --- a/yt_dlp/extractor/cybrary.py +++ b/yt_dlp/extractor/cybrary.py @@ -1,5 +1,4 @@ -๏ปฟ# coding: utf-8 -from .common import InfoExtractor +๏ปฟfrom .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/daftsex.py b/yt_dlp/extractor/daftsex.py index 6037fd9ca..0fe014f76 100644 --- a/yt_dlp/extractor/daftsex.py +++ b/yt_dlp/extractor/daftsex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( diff --git a/yt_dlp/extractor/dailymail.py b/yt_dlp/extractor/dailymail.py index 67b88fd56..5451dbf00 100644 --- a/yt_dlp/extractor/dailymail.py +++ b/yt_dlp/extractor/dailymail.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index 9cb56185b..3b090d5e0 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json import re diff --git a/yt_dlp/extractor/damtomo.py b/yt_dlp/extractor/damtomo.py index 456cd35a4..962d9741b 100644 --- a/yt_dlp/extractor/damtomo.py +++ b/yt_dlp/extractor/damtomo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/daum.py b/yt_dlp/extractor/daum.py index 4362e92cb..a1f197b0b 100644 --- a/yt_dlp/extractor/daum.py +++ b/yt_dlp/extractor/daum.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/dbtv.py b/yt_dlp/extractor/dbtv.py index 8e73176a6..2beccd8b5 100644 --- a/yt_dlp/extractor/dbtv.py +++ b/yt_dlp/extractor/dbtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dctp.py b/yt_dlp/extractor/dctp.py index e700f8d86..24bb6aca2 100644 --- a/yt_dlp/extractor/dctp.py +++ b/yt_dlp/extractor/dctp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/deezer.py b/yt_dlp/extractor/deezer.py index 7ba02e552..bee1c7501 100644 --- a/yt_dlp/extractor/deezer.py +++ b/yt_dlp/extractor/deezer.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/defense.py b/yt_dlp/extractor/defense.py index 9fe144e14..7d73ea862 100644 --- a/yt_dlp/extractor/defense.py +++ b/yt_dlp/extractor/defense.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/democracynow.py b/yt_dlp/extractor/democracynow.py index 5c9c0ecdc..af327e6c6 100644 --- a/yt_dlp/extractor/democracynow.py +++ b/yt_dlp/extractor/democracynow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import os.path diff --git a/yt_dlp/extractor/dfb.py b/yt_dlp/extractor/dfb.py index 97f70fc7b..5aca72988 100644 --- a/yt_dlp/extractor/dfb.py +++ b/yt_dlp/extractor/dfb.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import unified_strdate diff --git a/yt_dlp/extractor/dhm.py b/yt_dlp/extractor/dhm.py index aee72a6ed..3d42fc2b0 100644 --- a/yt_dlp/extractor/dhm.py +++ b/yt_dlp/extractor/dhm.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/digg.py b/yt_dlp/extractor/digg.py index 913c1750f..86e8a6fac 100644 --- a/yt_dlp/extractor/digg.py +++ b/yt_dlp/extractor/digg.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/digitalconcerthall.py b/yt_dlp/extractor/digitalconcerthall.py index 8398ae30e..c891ad0a6 100644 --- a/yt_dlp/extractor/digitalconcerthall.py +++ b/yt_dlp/extractor/digitalconcerthall.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/digiteka.py b/yt_dlp/extractor/digiteka.py index d63204778..5d244cb08 100644 --- a/yt_dlp/extractor/digiteka.py +++ b/yt_dlp/extractor/digiteka.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/discovery.py b/yt_dlp/extractor/discovery.py index fd3ad75c7..fd3fc8fb0 100644 --- a/yt_dlp/extractor/discovery.py +++ b/yt_dlp/extractor/discovery.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import random import string diff --git a/yt_dlp/extractor/discoverygo.py b/yt_dlp/extractor/discoverygo.py index 9e7b14a7d..7b4278c88 100644 --- a/yt_dlp/extractor/discoverygo.py +++ b/yt_dlp/extractor/discoverygo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/discoveryvr.py b/yt_dlp/extractor/discoveryvr.py deleted file mode 100644 index cb63c2649..000000000 --- a/yt_dlp/extractor/discoveryvr.py +++ /dev/null @@ -1,59 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import parse_duration - - -class DiscoveryVRIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)' - _TEST = { - 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction', - 'md5': '32b1929798c464a54356378b7912eca4', - 'info_dict': { - 'id': 'discovery-vr-an-introduction', - 'ext': 'mp4', - 'title': 'Discovery VR - An Introduction', - 'description': 'md5:80d418a10efb8899d9403e61d8790f06', - } - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - bootstrap_data = self._search_regex( - r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";', - webpage, 'bootstrap data') - bootstrap_data = self._parse_json( - bootstrap_data.encode('utf-8').decode('unicode_escape'), - display_id) - videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos'] - video_data = next(video for video in videos if video.get('slug') == display_id) - - series = video_data.get('showTitle') - title = episode = video_data.get('title') or series - if series and series != title: - title = '%s - %s' % (series, title) - - formats = [] - for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')): - f_url = video_data.get(f) - if not f_url: - continue - formats.append({ - 'format_id': format_id, - 'url': f_url, - }) - - return { - 'id': display_id, - 'display_id': display_id, - 'title': title, - 'description': video_data.get('description'), - 'thumbnail': video_data.get('thumbnail'), - 'duration': parse_duration(video_data.get('runTime')), - 'formats': formats, - 'episode': episode, - 'series': series, - } diff --git a/yt_dlp/extractor/disney.py b/yt_dlp/extractor/disney.py index 0ad7b1f46..f9af59a57 100644 --- a/yt_dlp/extractor/disney.py +++ b/yt_dlp/extractor/disney.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dispeak.py b/yt_dlp/extractor/dispeak.py index 3d651f3ab..d4f3324e7 100644 --- a/yt_dlp/extractor/dispeak.py +++ b/yt_dlp/extractor/dispeak.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py index 7410eb6c8..31b4a568f 100644 --- a/yt_dlp/extractor/dlive.py +++ b/yt_dlp/extractor/dlive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/doodstream.py b/yt_dlp/extractor/doodstream.py index f692127c2..f1001c778 100644 --- a/yt_dlp/extractor/doodstream.py +++ b/yt_dlp/extractor/doodstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import string import random import time diff --git a/yt_dlp/extractor/dotsub.py b/yt_dlp/extractor/dotsub.py index 148605c0b..079f83750 100644 --- a/yt_dlp/extractor/dotsub.py +++ b/yt_dlp/extractor/dotsub.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index 26a8d645c..477f4687c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import time import hashlib import re diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index a25f27c3a..5c4f3c892 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid @@ -11,6 +8,7 @@ from ..utils import ( ExtractorError, float_or_none, int_or_none, + remove_start, strip_or_none, try_get, unified_timestamp, @@ -314,7 +312,7 @@ class DPlayIE(DPlayBaseIE): def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') - domain = mobj.group('domain').lstrip('www.') + domain = remove_start(mobj.group('domain'), 'www.') country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( diff --git a/yt_dlp/extractor/drbonanza.py b/yt_dlp/extractor/drbonanza.py index ea0f06d3d..dca8c89d0 100644 --- a/yt_dlp/extractor/drbonanza.py +++ b/yt_dlp/extractor/drbonanza.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/dreisat.py b/yt_dlp/extractor/dreisat.py index 5a07c18f4..80a724607 100644 --- a/yt_dlp/extractor/dreisat.py +++ b/yt_dlp/extractor/dreisat.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .zdf import ZDFIE diff --git a/yt_dlp/extractor/drooble.py b/yt_dlp/extractor/drooble.py index 058425095..106e5c457 100644 --- a/yt_dlp/extractor/drooble.py +++ b/yt_dlp/extractor/drooble.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 2559657ad..6ac0c713a 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os.path import re diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index 2fa61950c..475825eb8 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from .vimeo import VHXEmbedIE from ..utils import ( diff --git a/yt_dlp/extractor/drtuber.py b/yt_dlp/extractor/drtuber.py index 540b86a16..3149e319f 100644 --- a/yt_dlp/extractor/drtuber.py +++ b/yt_dlp/extractor/drtuber.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 37e4d5b26..708b72fae 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import binascii import hashlib import re @@ -26,7 +23,7 @@ class DRTVIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: - (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*| + (?:www\.)?dr\.dk/(?:tv/se|nyheder|(?:radio|lyd)(?:/ondemand)?)/(?:[^/]+/)*| (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode|program)/ ) (?P<id>[\da-z_-]+) @@ -54,6 +51,7 @@ class DRTVIE(InfoExtractor): 'release_year': 2016, }, 'expected_warnings': ['Unable to download f4m manifest'], + 'skip': 'this video has been removed', }, { # embed 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang', @@ -74,31 +72,41 @@ class DRTVIE(InfoExtractor): # with SignLanguage formats 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder', 'info_dict': { - 'id': 'historien-om-danmark-stenalder', + 'id': '00831690010', 'ext': 'mp4', 'title': 'Historien om Danmark: Stenalder', 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a', 'timestamp': 1546628400, 'upload_date': '20190104', - 'duration': 3502.56, + 'duration': 3504.618, 'formats': 'mincount:20', + 'release_year': 2017, + 'season_id': 'urn:dr:mu:bundle:5afc03ad6187a4065ca5fd35', + 'season_number': 1, + 'season': 'Historien om Danmark', + 'series': 'Historien om Danmark', }, 'params': { 'skip_download': True, }, }, { - 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', + 'url': 'https://www.dr.dk/lyd/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9', 'only_matching': True, }, { 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769', 'info_dict': { 'id': '00951930010', 'ext': 'mp4', - 'title': 'Bonderรธven (1:8)', - 'description': 'md5:3cf18fc0d3b205745d4505f896af8121', - 'timestamp': 1546542000, - 'upload_date': '20190103', + 'title': 'Bonderรธven 2019 (1:8)', + 'description': 'md5:b6dcfe9b6f0bea6703e9a0092739a5bd', + 'timestamp': 1603188600, + 'upload_date': '20201020', 'duration': 2576.6, + 'season': 'Bonderรธven 2019', + 'season_id': 'urn:dr:mu:bundle:5c201667a11fa01ca4528ce5', + 'release_year': 2019, + 'season_number': 2019, + 'series': 'Frank & Kastaniegaarden' }, 'params': { 'skip_download': True, @@ -112,6 +120,24 @@ class DRTVIE(InfoExtractor): }, { 'url': 'https://www.dr.dk/drtv/program/jagten_220924', 'only_matching': True, + }, { + 'url': 'https://www.dr.dk/lyd/p4aarhus/regionale-nyheder-ar4/regionale-nyheder-2022-05-05-12-30-3', + 'info_dict': { + 'id': 'urn:dr:mu:programcard:6265cb2571401424d0360113', + 'title': "Regionale nyheder", + 'ext': 'mp4', + 'duration': 120.043, + 'series': 'P4 รstjylland regionale nyheder', + 'timestamp': 1651746600, + 'season': 'Regionale nyheder', + 'release_year': 0, + 'season_id': 'urn:dr:mu:bundle:61c26889539f0201586b73c5', + 'description': '', + 'upload_date': '20220505', + }, + 'params': { + 'skip_download': True, + }, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/dtube.py b/yt_dlp/extractor/dtube.py index ad247b7dd..25a98f625 100644 --- a/yt_dlp/extractor/dtube.py +++ b/yt_dlp/extractor/dtube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from socket import timeout diff --git a/yt_dlp/extractor/duboku.py b/yt_dlp/extractor/duboku.py index a87597873..24403842d 100644 --- a/yt_dlp/extractor/duboku.py +++ b/yt_dlp/extractor/duboku.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dumpert.py b/yt_dlp/extractor/dumpert.py index d9d9afdec..dc61115ff 100644 --- a/yt_dlp/extractor/dumpert.py +++ b/yt_dlp/extractor/dumpert.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/dvtv.py b/yt_dlp/extractor/dvtv.py index 08663cffb..61d469f11 100644 --- a/yt_dlp/extractor/dvtv.py +++ b/yt_dlp/extractor/dvtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/dw.py b/yt_dlp/extractor/dw.py index 6eaee07b4..ee2365ddd 100644 --- a/yt_dlp/extractor/dw.py +++ b/yt_dlp/extractor/dw.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/eagleplatform.py b/yt_dlp/extractor/eagleplatform.py index f86731a0c..e2ecd4b7c 100644 --- a/yt_dlp/extractor/eagleplatform.py +++ b/yt_dlp/extractor/eagleplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ebaumsworld.py b/yt_dlp/extractor/ebaumsworld.py index c97682cd3..0854d0344 100644 --- a/yt_dlp/extractor/ebaumsworld.py +++ b/yt_dlp/extractor/ebaumsworld.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/echomsk.py b/yt_dlp/extractor/echomsk.py index 6b7cc652f..850eabbff 100644 --- a/yt_dlp/extractor/echomsk.py +++ b/yt_dlp/extractor/echomsk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py index b6b86768c..d5c954961 100644 --- a/yt_dlp/extractor/egghead.py +++ b/yt_dlp/extractor/egghead.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/ehow.py b/yt_dlp/extractor/ehow.py index b1cd4f5d4..74469ce36 100644 --- a/yt_dlp/extractor/ehow.py +++ b/yt_dlp/extractor/ehow.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/eighttracks.py b/yt_dlp/extractor/eighttracks.py index 9a44f89f3..3dd9ab1b3 100644 --- a/yt_dlp/extractor/eighttracks.py +++ b/yt_dlp/extractor/eighttracks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import random diff --git a/yt_dlp/extractor/einthusan.py b/yt_dlp/extractor/einthusan.py index 7af279a53..37be68c61 100644 --- a/yt_dlp/extractor/einthusan.py +++ b/yt_dlp/extractor/einthusan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/eitb.py b/yt_dlp/extractor/eitb.py index ee5ead18b..01a47f6fd 100644 --- a/yt_dlp/extractor/eitb.py +++ b/yt_dlp/extractor/eitb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py index d451bc048..bcd458cdf 100644 --- a/yt_dlp/extractor/ellentube.py +++ b/yt_dlp/extractor/ellentube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/elonet.py b/yt_dlp/extractor/elonet.py index 9c6aea28e..f99e12250 100644 --- a/yt_dlp/extractor/elonet.py +++ b/yt_dlp/extractor/elonet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import determine_ext diff --git a/yt_dlp/extractor/elpais.py b/yt_dlp/extractor/elpais.py index b89f6db62..7c6c88075 100644 --- a/yt_dlp/extractor/elpais.py +++ b/yt_dlp/extractor/elpais.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import strip_jsonp, unified_strdate diff --git a/yt_dlp/extractor/embedly.py b/yt_dlp/extractor/embedly.py index a5820b21e..a8d1f3c55 100644 --- a/yt_dlp/extractor/embedly.py +++ b/yt_dlp/extractor/embedly.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/engadget.py b/yt_dlp/extractor/engadget.py index 733bf322f..e7c5d7bf1 100644 --- a/yt_dlp/extractor/engadget.py +++ b/yt_dlp/extractor/engadget.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index cd19325bc..89424785e 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/eporner.py b/yt_dlp/extractor/eporner.py index 25a0d9799..6bc70c5c6 100644 --- a/yt_dlp/extractor/eporner.py +++ b/yt_dlp/extractor/eporner.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( encode_base_n, diff --git a/yt_dlp/extractor/eroprofile.py b/yt_dlp/extractor/eroprofile.py index 5d5e7f244..2b61f3be7 100644 --- a/yt_dlp/extractor/eroprofile.py +++ b/yt_dlp/extractor/eroprofile.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ertgr.py b/yt_dlp/extractor/ertgr.py index 19ce23f01..507f0a5c1 100644 --- a/yt_dlp/extractor/ertgr.py +++ b/yt_dlp/extractor/ertgr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/escapist.py b/yt_dlp/extractor/escapist.py index 4cd815ebc..5d9c46f72 100644 --- a/yt_dlp/extractor/escapist.py +++ b/yt_dlp/extractor/escapist.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index dc50f3b8b..8fad70e6b 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/esri.py b/yt_dlp/extractor/esri.py index e9dcaeb1d..1736788db 100644 --- a/yt_dlp/extractor/esri.py +++ b/yt_dlp/extractor/esri.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/europa.py b/yt_dlp/extractor/europa.py index 60ab2ce13..ea20b4d4d 100644 --- a/yt_dlp/extractor/europa.py +++ b/yt_dlp/extractor/europa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/europeantour.py b/yt_dlp/extractor/europeantour.py index e28f067be..1995a745d 100644 --- a/yt_dlp/extractor/europeantour.py +++ b/yt_dlp/extractor/europeantour.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 2759e7436..4435f08e0 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/everyonesmixtape.py b/yt_dlp/extractor/everyonesmixtape.py deleted file mode 100644 index 80cb032be..000000000 --- a/yt_dlp/extractor/everyonesmixtape.py +++ /dev/null @@ -1,76 +0,0 @@ -from __future__ import unicode_literals - - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - sanitized_Request, -) - - -class EveryonesMixtapeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?everyonesmixtape\.com/#/mix/(?P<id>[0-9a-zA-Z]+)(?:/(?P<songnr>[0-9]))?$' - - _TESTS = [{ - 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi/5', - 'info_dict': { - 'id': '5bfseWNmlds', - 'ext': 'mp4', - 'title': "Passion Pit - \"Sleepyhead\" (Official Music Video)", - 'uploader': 'FKR.TV', - 'uploader_id': 'frenchkissrecords', - 'description': "Music video for \"Sleepyhead\" from Passion Pit's debut EP Chunk Of Change.\nBuy on iTunes: https://itunes.apple.com/us/album/chunk-of-change-ep/id300087641\n\nDirected by The Wilderness.\n\nhttp://www.passionpitmusic.com\nhttp://www.frenchkissrecords.com", - 'upload_date': '20081015' - }, - 'params': { - 'skip_download': True, # This is simply YouTube - } - }, { - 'url': 'http://everyonesmixtape.com/#/mix/m7m0jJAbMQi', - 'info_dict': { - 'id': 'm7m0jJAbMQi', - 'title': 'Driving', - }, - 'playlist_count': 24 - }] - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - playlist_id = mobj.group('id') - - pllist_url = 'http://everyonesmixtape.com/mixtape.php?a=getMixes&u=-1&linked=%s&explore=' % playlist_id - pllist_req = sanitized_Request(pllist_url) - pllist_req.add_header('X-Requested-With', 'XMLHttpRequest') - - playlist_list = self._download_json( - pllist_req, playlist_id, note='Downloading playlist metadata') - try: - playlist_no = next(playlist['id'] - for playlist in playlist_list - if playlist['code'] == playlist_id) - except StopIteration: - raise ExtractorError('Playlist id not found') - - pl_url = 'http://everyonesmixtape.com/mixtape.php?a=getMix&id=%s&userId=null&code=' % playlist_no - pl_req = sanitized_Request(pl_url) - pl_req.add_header('X-Requested-With', 'XMLHttpRequest') - playlist = self._download_json( - pl_req, playlist_id, note='Downloading playlist info') - - entries = [{ - '_type': 'url', - 'url': t['url'], - 'title': t['title'], - } for t in playlist['tracks']] - - if mobj.group('songnr'): - songnr = int(mobj.group('songnr')) - 1 - return entries[songnr] - - playlist_title = playlist['mixData']['name'] - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': playlist_title, - 'entries': entries, - } diff --git a/yt_dlp/extractor/expotv.py b/yt_dlp/extractor/expotv.py index 95a897782..92eaf4248 100644 --- a/yt_dlp/extractor/expotv.py +++ b/yt_dlp/extractor/expotv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/expressen.py b/yt_dlp/extractor/expressen.py index dc8b855d2..a1b8e9bc9 100644 --- a/yt_dlp/extractor/expressen.py +++ b/yt_dlp/extractor/expressen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 0cb686304..9c5a5f482 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1,5 +1,4 @@ -# flake8: noqa -from __future__ import unicode_literals +# flake8: noqa: F401 from .abc import ( ABCIE, @@ -169,6 +168,7 @@ from .bilibili import ( BilibiliChannelIE, BiliIntlIE, BiliIntlSeriesIE, + BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( @@ -501,6 +501,7 @@ from .fc2 import ( FC2LiveIE, ) from .fczenit import FczenitIE +from .fifa import FifaIE from .filmmodu import FilmmoduIE from .filmon import ( FilmOnIE, @@ -590,6 +591,7 @@ from .go import GoIE from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE +from .goodgame import GoodGameIE from .googledrive import GoogleDriveIE from .googlepodcasts import ( GooglePodcastsIE, @@ -600,7 +602,11 @@ from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE -from .gronkh import GronkhIE +from .gronkh import ( + GronkhIE, + GronkhFeedIE, + GronkhVodsIE +) from .groupon import GrouponIE from .hbo import HBOIE from .hearthisat import HearThisAtIE @@ -617,6 +623,7 @@ from .hitrecord import HitRecordIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, + HotStarPrefixIE, HotStarPlaylistIE, HotStarSeriesIE, ) @@ -640,6 +647,7 @@ from .hungama import ( HungamaAlbumPlaylistIE, ) from .hypem import HypemIE +from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveIE, IchinanaLiveClipIE, @@ -699,7 +707,11 @@ from .ivi import ( IviCompilationIE ) from .ivideon import IvideonIE -from .iwara import IwaraIE +from .iwara import ( + IwaraIE, + IwaraPlaylistIE, + IwaraUserIE, +) from .izlesene import IzleseneIE from .jable import ( JableIE, @@ -785,6 +797,10 @@ from .lifenews import ( LifeNewsIE, LifeEmbedIE, ) +from .likee import ( + LikeeIE, + LikeeUserIE +) from .limelight import ( LimelightMediaIE, LimelightChannelIE, @@ -813,7 +829,10 @@ from .lnkgo import ( ) from .localnews8 import LocalNews8IE from .lovehomeporn import LoveHomePornIE -from .lrt import LRTIE +from .lrt import ( + LRTVODIE, + LRTStreamIE +) from .lynda import ( LyndaIE, LyndaCourseIE @@ -843,6 +862,7 @@ from .markiza import ( MarkizaPageIE, ) from .massengeschmacktv import MassengeschmackTVIE +from .masters import MastersIE from .matchtv import MatchTVIE from .mdr import MDRIE from .medaltv import MedalTVIE @@ -999,7 +1019,8 @@ from .ndr import ( from .ndtv import NDTVIE from .nebula import ( NebulaIE, - NebulaCollectionIE, + NebulaSubscriptionsIE, + NebulaChannelIE, ) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE @@ -1248,6 +1269,7 @@ from .pluralsight import ( PluralsightIE, PluralsightCourseIE, ) +from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( PokemonIE, @@ -1383,6 +1405,7 @@ from .rokfin import ( RokfinIE, RokfinStackIE, RokfinChannelIE, + RokfinSearchIE, ) from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE from .rottentomatoes import RottenTomatoesIE @@ -1884,10 +1907,7 @@ from .vice import ( from .vidbit import VidbitIE from .viddler import ViddlerIE from .videa import VideaIE -from .videocampus_sachsen import ( - VideocampusSachsenIE, - VideocampusSachsenEmbedIE, -) +from .videocampus_sachsen import VideocampusSachsenIE from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videomore import ( @@ -2111,6 +2131,7 @@ from .youtube import ( YoutubeSearchURLIE, YoutubeMusicSearchURLIE, YoutubeSubscriptionsIE, + YoutubeStoriesIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeYtBeIE, @@ -2145,6 +2166,10 @@ from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3IE, ZingMp3AlbumIE, + ZingMp3ChartHomeIE, + ZingMp3WeekChartIE, + ZingMp3ChartMusicVideoIE, + ZingMp3UserIE, ) from .zoom import ZoomIE from .zype import ZypeIE diff --git a/yt_dlp/extractor/extremetube.py b/yt_dlp/extractor/extremetube.py index acd4090fa..99520b6a0 100644 --- a/yt_dlp/extractor/extremetube.py +++ b/yt_dlp/extractor/extremetube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from ..utils import str_to_int from .keezmovies import KeezMoviesIE diff --git a/yt_dlp/extractor/eyedotv.py b/yt_dlp/extractor/eyedotv.py index f62ddebae..d8b068e9c 100644 --- a/yt_dlp/extractor/eyedotv.py +++ b/yt_dlp/extractor/eyedotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( xpath_text, diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 5e0e2facf..de45f9298 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re @@ -397,10 +394,8 @@ class FacebookIE(InfoExtractor): r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)] post = traverse_obj(post_data, ( ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] - media = traverse_obj( - post, - (..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'), - expected_type=dict) + media = traverse_obj(post, (..., 'attachments', ..., lambda k, v: ( + k == 'media' and str(v['id']) == video_id and v['__typename'] == 'Video')), expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} @@ -528,7 +523,8 @@ class FacebookIE(InfoExtractor): info = { 'id': v_id, 'formats': formats, - 'thumbnail': try_get(video, lambda x: x['thumbnailImage']['uri']), + 'thumbnail': traverse_obj( + video, ('thumbnailImage', 'uri'), ('preferred_thumbnail', 'image', 'uri')), 'uploader_id': try_get(video, lambda x: x['owner']['id']), 'timestamp': int_or_none(video.get('publish_time')), 'duration': float_or_none(video.get('playable_duration_in_ms'), 1000), diff --git a/yt_dlp/extractor/fancode.py b/yt_dlp/extractor/fancode.py index 7ea16c61d..9716e581a 100644 --- a/yt_dlp/extractor/fancode.py +++ b/yt_dlp/extractor/fancode.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/faz.py b/yt_dlp/extractor/faz.py index 312ee2aee..cc12fda2b 100644 --- a/yt_dlp/extractor/faz.py +++ b/yt_dlp/extractor/faz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 54a83aa16..225677b00 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -1,16 +1,13 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import ( compat_parse_qs, ) +from ..dependencies import websockets from ..utils import ( ExtractorError, WebSocketsWrapper, - has_websockets, js_to_json, sanitized_Request, std_headers, @@ -173,7 +170,7 @@ class FC2LiveIE(InfoExtractor): }] def _real_extract(self, url): - if not has_websockets: + if not websockets: raise ExtractorError('websockets library is not available. Please install it.', expected=True) video_id = self._match_id(url) webpage = self._download_webpage('https://live.fc2.com/%s/' % video_id, video_id) diff --git a/yt_dlp/extractor/fczenit.py b/yt_dlp/extractor/fczenit.py index 8db7c5963..df40888e1 100644 --- a/yt_dlp/extractor/fczenit.py +++ b/yt_dlp/extractor/fczenit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/fifa.py b/yt_dlp/extractor/fifa.py new file mode 100644 index 000000000..bdc8d7fbf --- /dev/null +++ b/yt_dlp/extractor/fifa.py @@ -0,0 +1,109 @@ +from .common import InfoExtractor + +from ..utils import ( + int_or_none, + traverse_obj, + unified_timestamp, +) + + +class FifaIE(InfoExtractor): + _VALID_URL = r'https?://www.fifa.com/fifaplus/(?P<locale>\w{2})/watch/([^#?]+/)?(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://www.fifa.com/fifaplus/en/watch/7on10qPcnyLajDDU3ntg6y', + 'info_dict': { + 'id': '7on10qPcnyLajDDU3ntg6y', + 'title': 'Italy v France | Final | 2006 FIFA World Cup Germanyโข | Full Match Replay', + 'description': 'md5:f4520d0ee80529c8ba4134a7d692ff8b', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Replay'], + 'thumbnail': 'https://digitalhub.fifa.com/transform/fa6f0b3e-a2e9-4cf7-9f32-53c57bcb7360/2006_Final_ITA_FRA', + 'duration': 8164, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/pt/watch/1cg5r5Qt6Qt12ilkDgb1sV', + 'info_dict': { + 'id': '1cg5r5Qt6Qt12ilkDgb1sV', + 'title': 'Brasil x Alemanha | Semifinais | Copa do Mundo FIFA Brasil 2014 | Compacto', + 'description': 'md5:ba4ffcc084802b062beffc3b4c4b19d6', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Highlights'], + 'thumbnail': 'https://digitalhub.fifa.com/transform/d8fe6f61-276d-4a73-a7fe-6878a35fd082/FIFAPLS_100EXTHL_2014BRAvGER_TMB', + 'duration': 901, + 'release_timestamp': 1404777600, + 'release_date': '20140708', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.fifa.com/fifaplus/fr/watch/3C6gQH9C2DLwzNx7BMRQdp', + 'info_dict': { + 'id': '3C6gQH9C2DLwzNx7BMRQdp', + 'title': 'Le but de Josimar contre le Irlande du Nord | Buts classiques', + 'description': 'md5:16f9f789f09960bfe7220fe67af31f34', + 'ext': 'mp4', + 'categories': ['FIFA Tournaments', 'Goal'], + 'duration': 28, + 'thumbnail': 'https://digitalhub.fifa.com/transform/f9301391-f8d9-48b5-823e-c093ac5e3e11/CG_MEN_1986_JOSIMAR', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id, locale = self._match_valid_url(url).group('id', 'locale') + webpage = self._download_webpage(url, video_id) + + preconnect_link = self._search_regex( + r'<link[^>]+rel\s*=\s*"preconnect"[^>]+href\s*=\s*"([^"]+)"', webpage, 'Preconnect Link') + + json_data = self._download_json( + f'{preconnect_link}/video/GetVideoPlayerData/{video_id}', video_id, + 'Downloading Video Player Data', query={'includeIdents': True, 'locale': locale}) + + video_details = self._download_json( + f'{preconnect_link}/sections/videoDetails/{video_id}', video_id, 'Downloading Video Details', fatal=False) + + preplay_parameters = self._download_json( + f'{preconnect_link}/video/GetVerizonPreplayParameters', video_id, 'Downloading Preplay Parameters', query={ + 'entryId': video_id, + 'assetId': json_data['verizonAssetId'], + 'useExternalId': False, + 'requiresToken': json_data['requiresToken'], + 'adConfig': 'fifaplusvideo', + 'prerollAds': True, + 'adVideoId': json_data['externalVerizonAssetId'], + 'preIdentId': json_data['preIdentId'], + 'postIdentId': json_data['postIdentId'], + }) + + cid = f'{json_data["preIdentId"]},{json_data["verizonAssetId"]},{json_data["postIdentId"]}' + content_data = self._download_json( + f'https://content.uplynk.com/preplay/{cid}/multiple.json', video_id, 'Downloading Content Data', query={ + 'v': preplay_parameters['preplayAPIVersion'], + 'tc': preplay_parameters['tokenCheckAlgorithmVersion'], + 'rn': preplay_parameters['randomNumber'], + 'exp': preplay_parameters['tokenExpirationDate'], + 'ct': preplay_parameters['contentType'], + 'cid': cid, + 'mbtracks': preplay_parameters['tracksAssetNumber'], + 'ad': preplay_parameters['adConfiguration'], + 'ad.preroll': int(preplay_parameters['adPreroll']), + 'ad.cmsid': preplay_parameters['adCMSSourceId'], + 'ad.vid': preplay_parameters['adSourceVideoID'], + 'sig': preplay_parameters['signature'], + }) + + formats, subtitles = self._extract_m3u8_formats_and_subtitles(content_data['playURL'], video_id) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': json_data.get('title'), + 'description': json_data.get('description'), + 'duration': int_or_none(json_data.get('duration')), + 'release_timestamp': unified_timestamp(video_details.get('dateOfRelease')), + 'categories': traverse_obj(video_details, (('videoCategory', 'videoSubcategory'),)), + 'thumbnail': traverse_obj(video_details, ('backgroundImage', 'src')), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/filmmodu.py b/yt_dlp/extractor/filmmodu.py index 2746876d5..d74131192 100644 --- a/yt_dlp/extractor/filmmodu.py +++ b/yt_dlp/extractor/filmmodu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index 7b43ecc0f..7040231be 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/filmweb.py b/yt_dlp/extractor/filmweb.py index 5e323b4f8..cfea1f2fb 100644 --- a/yt_dlp/extractor/filmweb.py +++ b/yt_dlp/extractor/filmweb.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/firsttv.py b/yt_dlp/extractor/firsttv.py index ccad173b7..99c27e0c3 100644 --- a/yt_dlp/extractor/firsttv.py +++ b/yt_dlp/extractor/firsttv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/fivetv.py b/yt_dlp/extractor/fivetv.py index d6bebd19b..448c332b3 100644 --- a/yt_dlp/extractor/fivetv.py +++ b/yt_dlp/extractor/fivetv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/flickr.py b/yt_dlp/extractor/flickr.py index 2ed6c2bdc..552ecd43a 100644 --- a/yt_dlp/extractor/flickr.py +++ b/yt_dlp/extractor/flickr.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/folketinget.py b/yt_dlp/extractor/folketinget.py index b3df93f28..0e69fa32f 100644 --- a/yt_dlp/extractor/folketinget.py +++ b/yt_dlp/extractor/folketinget.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_parse_qs from ..utils import ( diff --git a/yt_dlp/extractor/footyroom.py b/yt_dlp/extractor/footyroom.py index 118325b6d..4a1316b50 100644 --- a/yt_dlp/extractor/footyroom.py +++ b/yt_dlp/extractor/footyroom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .streamable import StreamableIE diff --git a/yt_dlp/extractor/formula1.py b/yt_dlp/extractor/formula1.py index 67662e6de..0a8ef850e 100644 --- a/yt_dlp/extractor/formula1.py +++ b/yt_dlp/extractor/formula1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/fourtube.py b/yt_dlp/extractor/fourtube.py index d4d955b6b..c6af100f3 100644 --- a/yt_dlp/extractor/fourtube.py +++ b/yt_dlp/extractor/fourtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fox.py b/yt_dlp/extractor/fox.py index 4c52b9ac6..5996e86bb 100644 --- a/yt_dlp/extractor/fox.py +++ b/yt_dlp/extractor/fox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/fox9.py b/yt_dlp/extractor/fox9.py index 91f8f7b8a..dfbafa7dd 100644 --- a/yt_dlp/extractor/fox9.py +++ b/yt_dlp/extractor/fox9.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/foxgay.py b/yt_dlp/extractor/foxgay.py index 1c53e0642..4abc2cfd0 100644 --- a/yt_dlp/extractor/foxgay.py +++ b/yt_dlp/extractor/foxgay.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/foxnews.py b/yt_dlp/extractor/foxnews.py index 18fa0a5ef..cee4d6b49 100644 --- a/yt_dlp/extractor/foxnews.py +++ b/yt_dlp/extractor/foxnews.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .amp import AMPIE diff --git a/yt_dlp/extractor/foxsports.py b/yt_dlp/extractor/foxsports.py index 2b2cb6c6f..f9d7fe52a 100644 --- a/yt_dlp/extractor/foxsports.py +++ b/yt_dlp/extractor/foxsports.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/fptplay.py b/yt_dlp/extractor/fptplay.py index c23fe6c53..1872d8a1c 100644 --- a/yt_dlp/extractor/fptplay.py +++ b/yt_dlp/extractor/fptplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import time import urllib.parse diff --git a/yt_dlp/extractor/franceculture.py b/yt_dlp/extractor/franceculture.py index 9dc28d801..6bd9912f3 100644 --- a/yt_dlp/extractor/franceculture.py +++ b/yt_dlp/extractor/franceculture.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/franceinter.py b/yt_dlp/extractor/franceinter.py index ae822a50e..779249b84 100644 --- a/yt_dlp/extractor/franceinter.py +++ b/yt_dlp/extractor/franceinter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import month_by_name diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 347a766d8..5902eaca0 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -1,8 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/freesound.py b/yt_dlp/extractor/freesound.py index 138b6bc58..9724dbdf0 100644 --- a/yt_dlp/extractor/freesound.py +++ b/yt_dlp/extractor/freesound.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/freespeech.py b/yt_dlp/extractor/freespeech.py index ea9c3e317..aea551379 100644 --- a/yt_dlp/extractor/freespeech.py +++ b/yt_dlp/extractor/freespeech.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/frontendmasters.py b/yt_dlp/extractor/frontendmasters.py index fc67a8437..e0529b7ba 100644 --- a/yt_dlp/extractor/frontendmasters.py +++ b/yt_dlp/extractor/frontendmasters.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/fujitv.py b/yt_dlp/extractor/fujitv.py index 4fdfe12ab..d7f49accd 100644 --- a/yt_dlp/extractor/fujitv.py +++ b/yt_dlp/extractor/fujitv.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from ..utils import HEADRequest from .common import InfoExtractor @@ -19,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40110076', 'info_dict': { 'id': '5d40110076', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1318 ใใพใๅญใใพใผใใใฎๆด้คจใ่ฆใใใฎๅทป', 'series': 'ใกใณใพใๅญใกใใ', 'series_id': '5d40', @@ -30,7 +28,7 @@ class FujiTVFODPlus7IE(InfoExtractor): 'url': 'https://fod.fujitv.co.jp/title/5d40/5d40810083', 'info_dict': { 'id': '5d40810083', - 'ext': 'mp4', + 'ext': 'ts', 'title': '#1324 ใใพใๅญใจใชใใฎๅญใใฎๅทป๏ผใ็ตๆ๏ผ2ๆใใ ใใซใใชใไผใใฎๅทป', 'description': 'md5:3972d900b896adc8ab1849e310507efa', 'series': 'ใกใณใพใๅญใกใใ', @@ -47,13 +45,13 @@ class FujiTVFODPlus7IE(InfoExtractor): if token: json_info = self._download_json('https://fod-sp.fujitv.co.jp/apps/api/episode/detail/?ep_id=%s&is_premium=false' % video_id, video_id, headers={'x-authorization': f'Bearer {token.value}'}, fatal=False) else: - self.report_warning(f'The token cookie is needed to extract video metadata. {self._LOGIN_HINTS["cookies"]}') + self.report_warning(f'The token cookie is needed to extract video metadata. {self._login_hint("cookies")}') formats, subtitles = [], {} src_json = self._download_json(f'{self._BASE_URL}abrjson_v2/tv_android/{video_id}', video_id) for src in src_json['video_selector']: if not src.get('url'): continue - fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'mp4') + fmt, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, 'ts') for f in fmt: f.update(dict(zip(('height', 'width'), self._BITRATE_MAP.get(f.get('tbr'), ())))) diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 6aa9bc9ce..12cacd3b4 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string @@ -245,6 +242,9 @@ class FunimationIE(FunimationBaseIE): 'language_preference': language_preference(lang.lower()), }) formats.extend(current_formats) + if not formats and (requested_languages or requested_versions): + self.raise_no_formats( + 'There are no video formats matching the requested languages/versions', expected=True, video_id=display_id) self._remove_duplicate_formats(formats) self._sort_formats(formats, ('lang', 'source')) diff --git a/yt_dlp/extractor/funk.py b/yt_dlp/extractor/funk.py index 2c5cfe864..539d719c5 100644 --- a/yt_dlp/extractor/funk.py +++ b/yt_dlp/extractor/funk.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from .nexx import NexxIE from ..utils import ( diff --git a/yt_dlp/extractor/fusion.py b/yt_dlp/extractor/fusion.py index a3f44b812..46bda49ea 100644 --- a/yt_dlp/extractor/fusion.py +++ b/yt_dlp/extractor/fusion.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/fxnetworks.py b/yt_dlp/extractor/fxnetworks.py deleted file mode 100644 index 00e67426b..000000000 --- a/yt_dlp/extractor/fxnetworks.py +++ /dev/null @@ -1,77 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .adobepass import AdobePassIE -from ..utils import ( - extract_attributes, - int_or_none, - parse_age_limit, - smuggle_url, - update_url_query, -) - - -class FXNetworksIE(AdobePassIE): - _VALID_URL = r'https?://(?:www\.)?(?:fxnetworks|simpsonsworld)\.com/video/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.fxnetworks.com/video/1032565827847', - 'md5': '8d99b97b4aa7a202f55b6ed47ea7e703', - 'info_dict': { - 'id': 'dRzwHC_MMqIv', - 'ext': 'mp4', - 'title': 'First Look: Better Things - Season 2', - 'description': 'Because real life is like a fart. Watch this FIRST LOOK to see what inspired the new season of Better Things.', - 'age_limit': 14, - 'uploader': 'NEWA-FNG-FX', - 'upload_date': '20170825', - 'timestamp': 1503686274, - 'episode_number': 0, - 'season_number': 2, - 'series': 'Better Things', - }, - 'add_ie': ['ThePlatform'], - }, { - 'url': 'http://www.simpsonsworld.com/video/716094019682', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - if 'The content you are trying to access is not available in your region.' in webpage: - self.raise_geo_restricted() - video_data = extract_attributes(self._search_regex( - r'(<a.+?rel="https?://link\.theplatform\.com/s/.+?</a>)', webpage, 'video data')) - player_type = self._search_regex(r'playerType\s*=\s*[\'"]([^\'"]+)', webpage, 'player type', default=None) - release_url = video_data['rel'] - title = video_data['data-title'] - rating = video_data.get('data-rating') - query = { - 'mbr': 'true', - } - if player_type == 'movies': - query.update({ - 'manifest': 'm3u', - }) - else: - query.update({ - 'switch': 'http', - }) - if video_data.get('data-req-auth') == '1': - resource = self._get_mvpd_resource( - video_data['data-channel'], title, - video_data.get('data-guid'), rating) - query['auth'] = self._extract_mvpd_auth(url, video_id, 'fx', resource) - - return { - '_type': 'url_transparent', - 'id': video_id, - 'title': title, - 'url': smuggle_url(update_url_query(release_url, query), {'force_smil_url': True}), - 'series': video_data.get('data-show-title'), - 'episode_number': int_or_none(video_data.get('data-episode')), - 'season_number': int_or_none(video_data.get('data-season')), - 'thumbnail': video_data.get('data-large-thumb'), - 'age_limit': parse_age_limit(rating), - 'ie_key': 'ThePlatform', - } diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index 9ba0b1ca1..7ed81f761 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/gaia.py b/yt_dlp/extractor/gaia.py index 5b0195c63..4ace0544a 100644 --- a/yt_dlp/extractor/gaia.py +++ b/yt_dlp/extractor/gaia.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/gameinformer.py b/yt_dlp/extractor/gameinformer.py index f1b96c172..2664edb81 100644 --- a/yt_dlp/extractor/gameinformer.py +++ b/yt_dlp/extractor/gameinformer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/gamejolt.py b/yt_dlp/extractor/gamejolt.py index a13e528f5..440b832fc 100644 --- a/yt_dlp/extractor/gamejolt.py +++ b/yt_dlp/extractor/gamejolt.py @@ -1,4 +1,3 @@ -# coding: utf-8 import itertools import json import math diff --git a/yt_dlp/extractor/gamespot.py b/yt_dlp/extractor/gamespot.py index 7a1beae3c..e1d317377 100644 --- a/yt_dlp/extractor/gamespot.py +++ b/yt_dlp/extractor/gamespot.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .once import OnceIE from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/gamestar.py b/yt_dlp/extractor/gamestar.py index e882fa671..e9966f532 100644 --- a/yt_dlp/extractor/gamestar.py +++ b/yt_dlp/extractor/gamestar.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/gaskrank.py b/yt_dlp/extractor/gaskrank.py index 03acd2a73..76ddcc40e 100644 --- a/yt_dlp/extractor/gaskrank.py +++ b/yt_dlp/extractor/gaskrank.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/gazeta.py b/yt_dlp/extractor/gazeta.py index 367187080..c6868a672 100644 --- a/yt_dlp/extractor/gazeta.py +++ b/yt_dlp/extractor/gazeta.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/gdcvault.py b/yt_dlp/extractor/gdcvault.py index c3ad6b4ce..2878bbd88 100644 --- a/yt_dlp/extractor/gdcvault.py +++ b/yt_dlp/extractor/gdcvault.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/gedidigital.py b/yt_dlp/extractor/gedidigital.py index ec386c218..4ae5362b4 100644 --- a/yt_dlp/extractor/gedidigital.py +++ b/yt_dlp/extractor/gedidigital.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -14,7 +11,7 @@ from ..utils import ( class GediDigitalIE(InfoExtractor): - _VALID_URL = r'''(?x)(?P<url>(?:https?:)//video\. + _VALID_URL = r'''(?x:(?P<url>(?:https?:)//video\. (?: (?: (?:espresso\.)?repubblica @@ -36,7 +33,7 @@ class GediDigitalIE(InfoExtractor): |corrierealpi |lasentinella )\.gelocal - )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*)''' + )\.it(?:/[^/]+){2,4}/(?P<id>\d+))(?:$|[?&].*))''' _TESTS = [{ 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683', 'md5': '84658d7fb9e55a6e57ecc77b73137494', diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index bd56ad289..f594d02c2 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -1,30 +1,126 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import os import re -import sys +import xml.etree.ElementTree +from .ant1newsgr import Ant1NewsGrEmbedIE +from .anvato import AnvatoIE +from .apa import APAIE +from .arcpublishing import ArcPublishingIE +from .arkena import ArkenaIE +from .arte import ArteTVEmbedIE +from .bitchute import BitChuteIE +from .blogger import BloggerIE +from .brightcove import BrightcoveLegacyIE, BrightcoveNewIE +from .channel9 import Channel9IE +from .cloudflarestream import CloudflareStreamIE from .common import InfoExtractor +from .commonprotocols import RtmpIE +from .condenast import CondeNastIE +from .dailymail import DailyMailIE +from .dailymotion import DailymotionIE +from .dbtv import DBTVIE +from .digiteka import DigitekaIE +from .drtuber import DrTuberIE +from .eagleplatform import EaglePlatformIE +from .ertgr import ERTWebtvEmbedIE +from .expressen import ExpressenIE +from .facebook import FacebookIE +from .foxnews import FoxNewsIE +from .gedidigital import GediDigitalIE +from .gfycat import GfycatIE +from .glomex import GlomexEmbedIE +from .googledrive import GoogleDriveIE +from .indavideo import IndavideoEmbedIE +from .instagram import InstagramIE +from .joj import JojIE +from .jwplatform import JWPlatformIE +from .kaltura import KalturaIE +from .kinja import KinjaEmbedIE +from .limelight import LimelightBaseIE +from .mainstreaming import MainStreamingIE +from .medialaan import MedialaanIE +from .mediaset import MediasetIE +from .mediasite import MediasiteIE +from .megaphone import MegaphoneIE +from .megatvcom import MegaTVComEmbedIE +from .mofosex import MofosexEmbedIE +from .mtv import MTVServicesEmbeddedIE +from .myvi import MyviIE +from .nbc import NBCSportsVPlayerIE +from .nexx import NexxEmbedIE, NexxIE +from .odnoklassniki import OdnoklassnikiIE +from .onionstudios import OnionStudiosIE +from .ooyala import OoyalaIE +from .panopto import PanoptoBaseIE +from .peertube import PeerTubeIE +from .piksel import PikselIE +from .pladform import PladformIE +from .pornhub import PornHubIE +from .rcs import RCSEmbedsIE +from .redtube import RedTubeIE +from .rumble import RumbleEmbedIE +from .rutube import RutubeIE +from .rutv import RUTVIE +from .ruutu import RuutuIE +from .senategov import SenateISVPIE +from .simplecast import SimplecastIE +from .soundcloud import SoundcloudEmbedIE +from .spankwire import SpankwireIE +from .sportbox import SportBoxIE +from .spotify import SpotifyBaseIE +from .springboardplatform import SpringboardPlatformIE +from .svt import SVTIE +from .teachable import TeachableIE +from .ted import TedEmbedIE +from .theplatform import ThePlatformIE +from .threeqsdn import ThreeQSDNIE +from .tnaflix import TNAFlixNetworkEmbedIE +from .tube8 import Tube8IE +from .tunein import TuneInBaseIE +from .tvc import TVCIE +from .tvopengr import TVOpenGrEmbedIE +from .tvp import TVPEmbedIE +from .twentymin import TwentyMinutenIE +from .udn import UDNEmbedIE +from .ustream import UstreamIE +from .vbox7 import Vbox7IE +from .vice import ViceIE +from .videa import VideaIE +from .videomore import VideomoreIE +from .videopress import VideoPressIE +from .viewlift import ViewLiftEmbedIE +from .vimeo import VHXEmbedIE, VimeoIE +from .viqeo import ViqeoIE +from .vk import VKIE +from .vshare import VShareIE +from .vzaar import VzaarIE +from .washingtonpost import WashingtonPostIE +from .webcaster import WebcasterFeedIE +from .wimtv import WimTVIE +from .wistia import WistiaIE +from .xfileshare import XFileShareIE +from .xhamster import XHamsterEmbedIE +from .yapfiles import YapFilesIE +from .youporn import YouPornIE from .youtube import YoutubeIE +from .zype import ZypeIE from ..compat import ( compat_etree_fromstring, compat_str, compat_urllib_parse_unquote, compat_urlparse, - compat_xml_parse_error, ) from ..utils import ( + KNOWN_EXTENSIONS, + ExtractorError, + HEADRequest, + UnsupportedError, determine_ext, dict_get, - ExtractorError, float_or_none, - HEADRequest, int_or_none, is_html, js_to_json, - KNOWN_EXTENSIONS, merge_dicts, mimetype2ext, orderedSet, @@ -36,120 +132,11 @@ from ..utils import ( unescapeHTML, unified_timestamp, unsmuggle_url, - UnsupportedError, url_or_none, xpath_attr, xpath_text, xpath_with_ns, ) -from .commonprotocols import RtmpIE -from .brightcove import ( - BrightcoveLegacyIE, - BrightcoveNewIE, -) -from .nexx import ( - NexxIE, - NexxEmbedIE, -) -from .nbc import NBCSportsVPlayerIE -from .ooyala import OoyalaIE -from .rutv import RUTVIE -from .tvc import TVCIE -from .sportbox import SportBoxIE -from .myvi import MyviIE -from .condenast import CondeNastIE -from .udn import UDNEmbedIE -from .senategov import SenateISVPIE -from .svt import SVTIE -from .pornhub import PornHubIE -from .xhamster import XHamsterEmbedIE -from .tnaflix import TNAFlixNetworkEmbedIE -from .drtuber import DrTuberIE -from .redtube import RedTubeIE -from .tube8 import Tube8IE -from .mofosex import MofosexEmbedIE -from .spankwire import SpankwireIE -from .youporn import YouPornIE -from .vimeo import ( - VimeoIE, - VHXEmbedIE, -) -from .dailymotion import DailymotionIE -from .dailymail import DailyMailIE -from .onionstudios import OnionStudiosIE -from .viewlift import ViewLiftEmbedIE -from .mtv import MTVServicesEmbeddedIE -from .pladform import PladformIE -from .videomore import VideomoreIE -from .webcaster import WebcasterFeedIE -from .googledrive import GoogleDriveIE -from .jwplatform import JWPlatformIE -from .digiteka import DigitekaIE -from .arkena import ArkenaIE -from .instagram import InstagramIE -from .threeqsdn import ThreeQSDNIE -from .theplatform import ThePlatformIE -from .kaltura import KalturaIE -from .eagleplatform import EaglePlatformIE -from .facebook import FacebookIE -from .soundcloud import SoundcloudEmbedIE -from .tunein import TuneInBaseIE -from .vbox7 import Vbox7IE -from .dbtv import DBTVIE -from .piksel import PikselIE -from .videa import VideaIE -from .twentymin import TwentyMinutenIE -from .ustream import UstreamIE -from .arte import ArteTVEmbedIE -from .videopress import VideoPressIE -from .rutube import RutubeIE -from .glomex import GlomexEmbedIE -from .megatvcom import MegaTVComEmbedIE -from .ant1newsgr import Ant1NewsGrEmbedIE -from .limelight import LimelightBaseIE -from .anvato import AnvatoIE -from .washingtonpost import WashingtonPostIE -from .wistia import WistiaIE -from .mediaset import MediasetIE -from .joj import JojIE -from .megaphone import MegaphoneIE -from .vzaar import VzaarIE -from .channel9 import Channel9IE -from .vshare import VShareIE -from .mediasite import MediasiteIE -from .springboardplatform import SpringboardPlatformIE -from .ted import TedEmbedIE -from .yapfiles import YapFilesIE -from .vice import ViceIE -from .xfileshare import XFileShareIE -from .cloudflarestream import CloudflareStreamIE -from .peertube import PeerTubeIE -from .teachable import TeachableIE -from .indavideo import IndavideoEmbedIE -from .apa import APAIE -from .foxnews import FoxNewsIE -from .viqeo import ViqeoIE -from .expressen import ExpressenIE -from .zype import ZypeIE -from .odnoklassniki import OdnoklassnikiIE -from .vk import VKIE -from .kinja import KinjaEmbedIE -from .gedidigital import GediDigitalIE -from .rcs import RCSEmbedsIE -from .bitchute import BitChuteIE -from .rumble import RumbleEmbedIE -from .arcpublishing import ArcPublishingIE -from .medialaan import MedialaanIE -from .simplecast import SimplecastIE -from .wimtv import WimTVIE -from .tvopengr import TVOpenGrEmbedIE -from .ertgr import ERTWebtvEmbedIE -from .tvp import TVPEmbedIE -from .blogger import BloggerIE -from .mainstreaming import MainStreamingIE -from .gfycat import GfycatIE -from .panopto import PanoptoBaseIE -from .ruutu import RuutuIE class GenericIE(InfoExtractor): @@ -1043,20 +1030,6 @@ class GenericIE(InfoExtractor): 'filesize': 24687186, }, }, - { - 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz', - 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4', - 'info_dict': { - 'id': 'uxjb0lwrcz', - 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1', - 'description': 'a Martin Fowler video from ThoughtWorks', - 'duration': 1715.0, - 'uploader': 'thoughtworks.wistia.com', - 'timestamp': 1401832161, - 'upload_date': '20140603', - }, - }, # Wistia standard embed (async) { 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', @@ -2530,6 +2503,29 @@ class GenericIE(InfoExtractor): 'upload_date': '20220308', }, }, + { + # Multiple Ruutu embeds + 'url': 'https://www.hs.fi/kotimaa/art-2000008762560.html', + 'info_dict': { + 'title': 'Koronavirus | Epidemiahuippu voi olla Suomessa ohi, mutta koronaviruksen poistamista yleisvaarallisten tautien joukosta harkitaan vasta syksyllรค', + 'id': 'art-2000008762560' + }, + 'playlist_count': 3 + }, + { + # Ruutu embed in hs.fi with a single video + 'url': 'https://www.hs.fi/kotimaa/art-2000008793421.html', + 'md5': 'f8964e65d8fada6e8a562389bf366bb4', + 'info_dict': { + 'id': '4081841', + 'ext': 'mp4', + 'title': 'Puolustusvoimat siirsi panssariajoneuvoja harjoituksiin Niinisaloon 2.5.2022', + 'thumbnail': r're:^https?://.+\.jpg$', + 'duration': 138, + 'age_limit': 0, + 'upload_date': '20220504', + }, + }, ] def report_following_redirect(self, new_url): @@ -2629,7 +2625,7 @@ class GenericIE(InfoExtractor): entries.append({ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0], - 'title': '%s - %s' % (title, n.tag), + 'title': f'{title} - {n.tag}', 'url': compat_urlparse.urljoin(url, url_n.text), 'duration': float_or_none(n.find('./duration').text), }) @@ -2651,7 +2647,7 @@ class GenericIE(InfoExtractor): for o in range(len(newmagic) - 1, -1, -1): new = '' - l = (o + sum([int(n) for n in license[o:]])) % 32 + l = (o + sum(int(n) for n in license[o:])) % 32 for i in range(0, len(newmagic)): if i == o: @@ -2828,7 +2824,7 @@ class GenericIE(InfoExtractor): try: try: doc = compat_etree_fromstring(webpage) - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': self.report_detected('RSS feed') @@ -2863,7 +2859,7 @@ class GenericIE(InfoExtractor): self.report_detected('F4M manifest') self._sort_formats(info_dict['formats']) return info_dict - except compat_xml_parse_error: + except xml.etree.ElementTree.ParseError: pass # Is it a Camtasia project? @@ -3178,6 +3174,11 @@ class GenericIE(InfoExtractor): if sportbox_urls: return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key()) + # Look for embedded Spotify player + spotify_urls = SpotifyBaseIE._extract_embed_urls(webpage) + if spotify_urls: + return self.playlist_from_matches(spotify_urls, video_id, video_title) + # Look for embedded XHamster player xhamster_urls = XHamsterEmbedIE._extract_urls(webpage) if xhamster_urls: @@ -3757,9 +3758,9 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches(panopto_urls, video_id, video_title) # Look for Ruutu embeds - ruutu_url = RuutuIE._extract_url(webpage) - if ruutu_url: - return self.url_result(ruutu_url, RuutuIE) + ruutu_urls = RuutuIE._extract_urls(webpage) + if ruutu_urls: + return self.playlist_from_matches(ruutu_urls, video_id, video_title) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') @@ -3773,7 +3774,7 @@ class GenericIE(InfoExtractor): else: for num, entry in enumerate(entries, start=1): entry.update({ - 'id': '%s-%s' % (video_id, num), + 'id': f'{video_id}-{num}', 'title': '%s (%d)' % (video_title, num), }) for entry in entries: @@ -4011,9 +4012,6 @@ class GenericIE(InfoExtractor): # Look also in Refresh HTTP header refresh_header = head_response.headers.get('Refresh') if refresh_header: - # In python 2 response HTTP headers are bytestrings - if sys.version_info < (3, 0) and isinstance(refresh_header, str): - refresh_header = refresh_header.decode('iso-8859-1') found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) @@ -4108,7 +4106,7 @@ class GenericIE(InfoExtractor): entries.append(entry_info_dict) if len(entries) == 1: - return entries[0] + return merge_dicts(entries[0], info_dict) else: for num, e in enumerate(entries, start=1): # 'url' results don't have a title diff --git a/yt_dlp/extractor/gettr.py b/yt_dlp/extractor/gettr.py index 327a4d0b8..9bd6200b6 100644 --- a/yt_dlp/extractor/gettr.py +++ b/yt_dlp/extractor/gettr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( bool_or_none, diff --git a/yt_dlp/extractor/gfycat.py b/yt_dlp/extractor/gfycat.py index 2ad03e2b2..60f06ccd7 100644 --- a/yt_dlp/extractor/gfycat.py +++ b/yt_dlp/extractor/gfycat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -13,7 +10,7 @@ from ..utils import ( class GfycatIE(InfoExtractor): - _VALID_URL = r'(?i)https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)' + _VALID_URL = r'https?://(?:(?:www|giant|thumbs)\.)?gfycat\.com/(?i:ru/|ifr/|gifs/detail/)?(?P<id>[^-/?#\."\']+)' _TESTS = [{ 'url': 'http://gfycat.com/DeadlyDecisiveGermanpinscher', 'info_dict': { diff --git a/yt_dlp/extractor/giantbomb.py b/yt_dlp/extractor/giantbomb.py index 1920923fc..5d6b208aa 100644 --- a/yt_dlp/extractor/giantbomb.py +++ b/yt_dlp/extractor/giantbomb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/giga.py b/yt_dlp/extractor/giga.py index 5a9992a27..9e835a6da 100644 --- a/yt_dlp/extractor/giga.py +++ b/yt_dlp/extractor/giga.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/gigya.py b/yt_dlp/extractor/gigya.py index 412178492..c5bc86bb4 100644 --- a/yt_dlp/extractor/gigya.py +++ b/yt_dlp/extractor/gigya.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/glide.py b/yt_dlp/extractor/glide.py index 12af859be..2bffb26dc 100644 --- a/yt_dlp/extractor/glide.py +++ b/yt_dlp/extractor/glide.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/globo.py b/yt_dlp/extractor/globo.py index f6aaae1e9..8915ebf48 100644 --- a/yt_dlp/extractor/globo.py +++ b/yt_dlp/extractor/globo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib import json diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index d9ef4338f..85ffa4c05 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import urllib.parse diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py index f92e16600..07d13d1c3 100644 --- a/yt_dlp/extractor/go.py +++ b/yt_dlp/extractor/go.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE diff --git a/yt_dlp/extractor/godtube.py b/yt_dlp/extractor/godtube.py index 96e68b4d2..697540155 100644 --- a/yt_dlp/extractor/godtube.py +++ b/yt_dlp/extractor/godtube.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/gofile.py b/yt_dlp/extractor/gofile.py index 858bac52c..ddbce2ee8 100644 --- a/yt_dlp/extractor/gofile.py +++ b/yt_dlp/extractor/gofile.py @@ -1,4 +1,5 @@ -# coding: utf-8 +import hashlib + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -38,6 +39,15 @@ class GofileIE(InfoExtractor): 'id': 'TMjXd9', }, 'playlist_count': 1, + }, { + 'url': 'https://gofile.io/d/gqOtRf', + 'info_dict': { + 'id': 'gqOtRf', + }, + 'playlist_mincount': 1, + 'params': { + 'videopassword': 'password', + }, }] _TOKEN = None @@ -53,14 +63,22 @@ class GofileIE(InfoExtractor): self._set_cookie('gofile.io', 'accountToken', self._TOKEN) def _entries(self, file_id): - files = self._download_json('https://api.gofile.io/getContent', 'Gofile', note='Getting filelist', query={ + query_params = { 'contentId': file_id, 'token': self._TOKEN, 'websiteToken': 12345, - }) + } + password = self.get_param('videopassword') + if password: + query_params['password'] = hashlib.sha256(password.encode('utf-8')).hexdigest() + files = self._download_json( + 'https://api.gofile.io/getContent', file_id, note='Getting filelist', query=query_params) status = files['status'] - if status != 'ok': + if status == 'error-passwordRequired': + raise ExtractorError( + 'This video is protected by a password, use the --video-password option', expected=True) + elif status != 'ok': raise ExtractorError(f'{self.IE_NAME} said: status {status}', expected=True) found_files = False diff --git a/yt_dlp/extractor/golem.py b/yt_dlp/extractor/golem.py index 47a068e74..8416b5aa4 100644 --- a/yt_dlp/extractor/golem.py +++ b/yt_dlp/extractor/golem.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/goodgame.py b/yt_dlp/extractor/goodgame.py new file mode 100644 index 000000000..0866647e6 --- /dev/null +++ b/yt_dlp/extractor/goodgame.py @@ -0,0 +1,58 @@ +from .common import InfoExtractor +from ..utils import ( + clean_html, + int_or_none, + str_or_none, + traverse_obj, +) + + +class GoodGameIE(InfoExtractor): + IE_NAME = 'goodgame:stream' + _VALID_URL = r'https?://goodgame\.ru/channel/(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://goodgame.ru/channel/Pomi/#autoplay', + 'info_dict': { + 'id': 'pomi', + 'ext': 'mp4', + 'title': r're:Reynor vs Special \(1/2,bo3\) Wardi Spring EU \- playoff \(ัะธะฝะฐะปัะฝัะน ะดะตะฝั\) \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'channel_id': '1644', + 'channel': 'Pomi', + 'channel_url': 'https://goodgame.ru/channel/Pomi/', + 'description': 'md5:4a87b775ee7b2b57bdccebe285bbe171', + 'thumbnail': r're:^https?://.*\.jpg$', + 'live_status': 'is_live', + 'view_count': int, + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'May not be online', + }] + + def _real_extract(self, url): + channel_name = self._match_id(url) + response = self._download_json(f'https://api2.goodgame.ru/v2/streams/{channel_name}', channel_name) + player_id = response['channel']['gg_player_src'] + + formats, subtitles = [], {} + if response.get('status') == 'Live': + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://hls.goodgame.ru/manifest/{player_id}_master.m3u8', + channel_name, 'mp4', live=True) + else: + self.raise_no_formats('User is offline', expected=True, video_id=channel_name) + + self._sort_formats(formats) + return { + 'id': player_id, + 'formats': formats, + 'subtitles': subtitles, + 'title': traverse_obj(response, ('channel', 'title')), + 'channel': channel_name, + 'channel_id': str_or_none(traverse_obj(response, ('channel', 'id'))), + 'channel_url': response.get('url'), + 'description': clean_html(traverse_obj(response, ('channel', 'description'))), + 'thumbnail': traverse_obj(response, ('channel', 'thumb')), + 'is_live': bool(formats), + 'view_count': int_or_none(response.get('viewers')), + 'age_limit': 18 if traverse_obj(response, ('channel', 'adult')) else None, + } diff --git a/yt_dlp/extractor/googledrive.py b/yt_dlp/extractor/googledrive.py index 7b5bf280f..c0905f86a 100644 --- a/yt_dlp/extractor/googledrive.py +++ b/yt_dlp/extractor/googledrive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/googlepodcasts.py b/yt_dlp/extractor/googlepodcasts.py index 25631e213..8b2351ba8 100644 --- a/yt_dlp/extractor/googlepodcasts.py +++ b/yt_dlp/extractor/googlepodcasts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/googlesearch.py b/yt_dlp/extractor/googlesearch.py index 4b8b1bcbb..67ca0e5e0 100644 --- a/yt_dlp/extractor/googlesearch.py +++ b/yt_dlp/extractor/googlesearch.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/gopro.py b/yt_dlp/extractor/gopro.py index 10cc1aec1..14d6b2187 100644 --- a/yt_dlp/extractor/gopro.py +++ b/yt_dlp/extractor/gopro.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/goshgay.py b/yt_dlp/extractor/goshgay.py index 377981d3e..9a1f32b7e 100644 --- a/yt_dlp/extractor/goshgay.py +++ b/yt_dlp/extractor/goshgay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/gotostage.py b/yt_dlp/extractor/gotostage.py index 6aa96106a..112293bef 100644 --- a/yt_dlp/extractor/gotostage.py +++ b/yt_dlp/extractor/gotostage.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/gputechconf.py b/yt_dlp/extractor/gputechconf.py index 73dc62c49..2d13bf491 100644 --- a/yt_dlp/extractor/gputechconf.py +++ b/yt_dlp/extractor/gputechconf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index c9f1dd256..c112c7857 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -1,8 +1,11 @@ -# coding: utf-8 -from __future__ import unicode_literals +import functools from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + OnDemandPagedList, + traverse_obj, + unified_strdate, +) class GronkhIE(InfoExtractor): @@ -44,3 +47,54 @@ class GronkhIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class GronkhFeedIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv(?:/feed)?/?(?:#|$)' + IE_NAME = 'gronkh:feed' + + _TESTS = [{ + 'url': 'https://gronkh.tv/feed', + 'info_dict': { + 'id': 'feed', + }, + 'playlist_count': 16, + }, { + 'url': 'https://gronkh.tv', + 'only_matching': True, + }] + + def _entries(self): + for type_ in ('recent', 'views'): + info = self._download_json( + f'https://api.gronkh.tv/v1/video/discovery/{type_}', 'feed', note=f'Downloading {type_} API JSON') + for item in traverse_obj(info, ('discovery', ...)) or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item.get('title')) + + def _real_extract(self, url): + return self.playlist_result(self._entries(), 'feed') + + +class GronkhVodsIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/vods/streams/?(?:#|$)' + IE_NAME = 'gronkh:vods' + + _TESTS = [{ + 'url': 'https://gronkh.tv/vods/streams', + 'info_dict': { + 'id': 'vods', + }, + 'playlist_mincount': 150, + }] + _PER_PAGE = 25 + + def _fetch_page(self, page): + items = traverse_obj(self._download_json( + 'https://api.gronkh.tv/v1/search', 'vods', query={'offset': self._PER_PAGE * page, 'first': self._PER_PAGE}, + note=f'Downloading stream video page {page + 1}'), ('results', 'videos', ...)) + for item in items or []: + yield self.url_result(f'https://gronkh.tv/watch/stream/{item["episode"]}', GronkhIE, item['episode'], item.get('title')) + + def _real_extract(self, url): + entries = OnDemandPagedList(functools.partial(self._fetch_page), self._PER_PAGE) + return self.playlist_result(entries, 'vods') diff --git a/yt_dlp/extractor/groupon.py b/yt_dlp/extractor/groupon.py index a6da90931..362d3ff83 100644 --- a/yt_dlp/extractor/groupon.py +++ b/yt_dlp/extractor/groupon.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hbo.py b/yt_dlp/extractor/hbo.py index 68df748f5..f54628665 100644 --- a/yt_dlp/extractor/hbo.py +++ b/yt_dlp/extractor/hbo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hearthisat.py b/yt_dlp/extractor/hearthisat.py index a3d6a055f..9aa1325af 100644 --- a/yt_dlp/extractor/hearthisat.py +++ b/yt_dlp/extractor/hearthisat.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/heise.py b/yt_dlp/extractor/heise.py index cbe564a3c..84e5d3023 100644 --- a/yt_dlp/extractor/heise.py +++ b/yt_dlp/extractor/heise.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .kaltura import KalturaIE from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/hellporno.py b/yt_dlp/extractor/hellporno.py index 92d32cdcc..fd0327228 100644 --- a/yt_dlp/extractor/hellporno.py +++ b/yt_dlp/extractor/hellporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/helsinki.py b/yt_dlp/extractor/helsinki.py index 575fb332a..b7c826055 100644 --- a/yt_dlp/extractor/helsinki.py +++ b/yt_dlp/extractor/helsinki.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/hentaistigma.py b/yt_dlp/extractor/hentaistigma.py index 86a93de4d..ca5ffc2ae 100644 --- a/yt_dlp/extractor/hentaistigma.py +++ b/yt_dlp/extractor/hentaistigma.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hgtv.py b/yt_dlp/extractor/hgtv.py index a4f332565..c40017db1 100644 --- a/yt_dlp/extractor/hgtv.py +++ b/yt_dlp/extractor/hgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 46d7d62ab..a6a71d630 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/historicfilms.py b/yt_dlp/extractor/historicfilms.py index 56343e98f..c428feede 100644 --- a/yt_dlp/extractor/historicfilms.py +++ b/yt_dlp/extractor/historicfilms.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_duration diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index 0470d0a99..a7e4424b6 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hitrecord.py b/yt_dlp/extractor/hitrecord.py index fd5dc2935..902af44fa 100644 --- a/yt_dlp/extractor/hitrecord.py +++ b/yt_dlp/extractor/hitrecord.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/hketv.py b/yt_dlp/extractor/hketv.py index 1f3502b90..4c616d1dd 100644 --- a/yt_dlp/extractor/hketv.py +++ b/yt_dlp/extractor/hketv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/hotnewhiphop.py b/yt_dlp/extractor/hotnewhiphop.py index 4703e1894..f8570cb86 100644 --- a/yt_dlp/extractor/hotnewhiphop.py +++ b/yt_dlp/extractor/hotnewhiphop.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_b64decode from ..utils import ( diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index a0ce1f10a..d9223a416 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import hmac import re @@ -17,6 +14,7 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, + join_nonempty, str_or_none, try_get, url_or_none, @@ -24,6 +22,8 @@ from ..utils import ( class HotStarBaseIE(InfoExtractor): + _BASE_URL = 'https://www.hotstar.com' + _API_URL = 'https://api.hotstar.com' _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee' def _call_api_impl(self, path, video_id, query, st=None, cookies=None): @@ -36,7 +36,7 @@ class HotStarBaseIE(InfoExtractor): token = cookies.get('userUP').value else: token = self._download_json( - 'https://api.hotstar.com/um/v3/users', + f'{self._API_URL}/um/v3/users', video_id, note='Downloading token', data=json.dumps({"device_ids": [{"id": compat_str(uuid.uuid4()), "type": "device_id"}]}).encode('utf-8'), headers={ @@ -46,12 +46,13 @@ class HotStarBaseIE(InfoExtractor): })['user_identity'] response = self._download_json( - 'https://api.hotstar.com/' + path, video_id, headers={ + f'{self._API_URL}/{path}', video_id, query=query, + headers={ 'hotstarauth': auth, 'x-hs-appversion': '6.72.2', 'x-hs-platform': 'web', 'x-hs-usertoken': token, - }, query=query) + }) if response['message'] != "Playback URL's fetched successfully": raise ExtractorError( @@ -59,17 +60,19 @@ class HotStarBaseIE(InfoExtractor): return response['data'] def _call_api(self, path, video_id, query_name='contentId'): - return self._download_json('https://api.hotstar.com/' + path, video_id=video_id, query={ - query_name: video_id, - 'tas': 10000, - }, headers={ - 'x-country-code': 'IN', - 'x-platform-code': 'PCTV', - }) + return self._download_json( + f'{self._API_URL}/{path}', video_id=video_id, + query={ + query_name: video_id, + 'tas': 10000, + }, headers={ + 'x-country-code': 'IN', + 'x-platform-code': 'PCTV', + }) def _call_api_v2(self, path, video_id, st=None, cookies=None): return self._call_api_impl( - '%s/content/%s' % (path, video_id), video_id, st=st, cookies=cookies, query={ + f'{path}/content/{video_id}', video_id, st=st, cookies=cookies, query={ 'desired-config': 'audio_channel:stereo|container:fmp4|dynamic_range:hdr|encryption:plain|ladder:tv|package:dash|resolution:fhd|subs-tag:HotstarVIP|video_codec:h265', 'device-id': cookies.get('device_id').value if cookies.get('device_id') else compat_str(uuid.uuid4()), 'os-name': 'Windows', @@ -80,24 +83,15 @@ class HotStarBaseIE(InfoExtractor): class HotStarIE(HotStarBaseIE): IE_NAME = 'hotstar' _VALID_URL = r'''(?x) - (?: - hotstar\:| - https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) - ) - (?: - (?P<type>movies|sports|episode|(?P<tv>tv)) - (?: - \:| - /[^/?#]+/ - (?(tv) - (?:[^/?#]+/){2}| - (?:[^/?#]+/)* - ) - )| - [^/?#]+/ - )? - (?P<id>\d{10}) - ''' + https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) + (?: + (?P<type>movies|sports|episode|(?P<tv>tv))/ + (?(tv)(?:[^/?#]+/){2}|[^?#]*) + )? + [^/?#]+/ + (?P<id>\d{10}) + ''' + _TESTS = [{ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273', 'info_dict': { @@ -108,39 +102,9 @@ class HotStarIE(HotStarBaseIE): 'timestamp': 1447248600, 'upload_date': '20151111', 'duration': 381, + 'episode': 'Can You Not Spread Rumours?', }, }, { - 'url': 'hotstar:1000076273', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', - 'info_dict': { - 'id': '1000057157', - 'ext': 'mp4', - 'title': 'Radha Gopalam', - 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', - 'timestamp': 1140805800, - 'upload_date': '20060224', - 'duration': 9182, - }, - }, { - 'url': 'hotstar:movies:1000057157', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104', - 'only_matching': True, - }, { - 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956', - 'only_matching': True, - }, { - # contentData - 'url': 'hotstar:sports:1260065956', - 'only_matching': True, - }, { - # contentData - 'url': 'hotstar:sports:1260066104', - 'only_matching': True, - }, { 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847', 'info_dict': { 'id': '1000234847', @@ -158,12 +122,19 @@ class HotStarIE(HotStarBaseIE): 'season_id': 6771, 'episode': 'Janhvi Targets Suman', 'episode_number': 8, - }, + } }, { - 'url': 'hotstar:episode:1000234847', + 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', + 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/sports/cricket/follow-the-blues-2021/recap-eng-fight-back-on-day-2/1260066104', + 'only_matching': True, + }, { + 'url': 'https://www.hotstar.com/in/sports/football/most-costly-pl-transfers-ft-grealish/1260065956', 'only_matching': True, }] _GEO_BYPASS = False + _TYPE = { 'movies': 'movie', 'sports': 'match', @@ -172,41 +143,53 @@ class HotStarIE(HotStarBaseIE): None: 'content', } + _IGNORE_MAP = { + 'res': 'resolution', + 'vcodec': 'video_codec', + 'dr': 'dynamic_range', + } + + @classmethod + def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): + assert None in (video_type, root) + if not root: + root = join_nonempty(cls._BASE_URL, video_type, delim='/') + return f'{root}/{slug}/{video_id}' + def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - video_type = mobj.group('type') - cookies = self._get_cookies(url) + video_id, video_type = self._match_valid_url(url).group('id', 'type') video_type = self._TYPE.get(video_type, video_type) - video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] - title = video_data['title'] + cookies = self._get_cookies(url) # Cookies before any request + video_data = self._call_api(f'o/v1/{video_type}/detail', video_id)['body']['results']['item'] if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) - headers = {'Referer': 'https://www.hotstar.com/in'} - formats = [] - subs = {} + # See https://github.com/yt-dlp/yt-dlp/issues/396 + st = self._download_webpage_handle(f'{self._BASE_URL}/in', video_id)[1].headers.get('x-origin-date') + geo_restricted = False - _, urlh = self._download_webpage_handle('https://www.hotstar.com/in', video_id) - # Required to fix https://github.com/yt-dlp/yt-dlp/issues/396 - st = urlh.headers.get('x-origin-date') + formats, subs = [], {} + headers = {'Referer': f'{self._BASE_URL}/in'} + # change to v2 in the future playback_sets = self._call_api_v2('play/v1/playback', video_id, st=st, cookies=cookies)['playBackSets'] for playback_set in playback_sets: if not isinstance(playback_set, dict): continue - dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') + tags = str_or_none(playback_set.get('tagsCombination')) or '' + if any(f'{prefix}:{ignore}' in tags + for key, prefix in self._IGNORE_MAP.items() + for ignore in self._configuration_arg(key)): + continue + format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue - format_url = re.sub( - r'(?<=//staragvod)(\d)', r'web\1', format_url) - tags = str_or_none(playback_set.get('tagsCombination')) or '' - ingored_res, ignored_vcodec, ignored_dr = self._configuration_arg('res'), self._configuration_arg('vcodec'), self._configuration_arg('dr') - if any(f'resolution:{ig_res}' in tags for ig_res in ingored_res) or any(f'video_codec:{ig_vc}' in tags for ig_vc in ignored_vcodec) or any(f'dynamic_range:{ig_dr}' in tags for ig_dr in ignored_dr): - continue + format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) + dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') ext = determine_ext(format_url) + current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': @@ -218,8 +201,7 @@ class HotStarIE(HotStarBaseIE): current_formats, current_subs = self._extract_mpd_formats_and_subtitles( format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) elif ext == 'f4m': - # produce broken files - pass + pass # XXX: produce broken files else: current_formats = [{ 'url': format_url, @@ -230,6 +212,7 @@ class HotStarIE(HotStarBaseIE): if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: geo_restricted = True continue + if tags and 'encryption:plain' not in tags: for f in current_formats: f['has_drm'] = True @@ -238,18 +221,19 @@ class HotStarIE(HotStarBaseIE): for f in current_formats: if not f.get('langauge'): f['language'] = lang + formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) + if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) self._sort_formats(formats) - for f in formats: f.setdefault('http_headers', {}).update(headers) return { 'id': video_id, - 'title': title, + 'title': video_data.get('title'), 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), @@ -261,14 +245,48 @@ class HotStarIE(HotStarBaseIE): 'season': video_data.get('seasonName'), 'season_number': int_or_none(video_data.get('seasonNo')), 'season_id': video_data.get('seasonId'), - 'episode': title, + 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episodeNo')), - 'http_headers': { - 'Referer': 'https://www.hotstar.com/in', - } } +class HotStarPrefixIE(InfoExtractor): + """ The "hotstar:" prefix is no longer in use, but this is kept for backward compatibility """ + IE_DESC = False + _VALID_URL = r'hotstar:(?:(?P<type>\w+):)?(?P<id>\d+)$' + _TESTS = [{ + 'url': 'hotstar:1000076273', + 'only_matching': True, + }, { + 'url': 'hotstar:movies:1000057157', + 'info_dict': { + 'id': '1000057157', + 'ext': 'mp4', + 'title': 'Radha Gopalam', + 'description': 'md5:be3bc342cc120bbc95b3b0960e2b0d22', + 'timestamp': 1140805800, + 'upload_date': '20060224', + 'duration': 9182, + 'episode': 'Radha Gopalam', + }, + }, { + 'url': 'hotstar:episode:1000234847', + 'only_matching': True, + }, { + # contentData + 'url': 'hotstar:sports:1260065956', + 'only_matching': True, + }, { + # contentData + 'url': 'hotstar:sports:1260066104', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id, video_type = self._match_valid_url(url).group('id', 'type') + return self.url_result(HotStarIE._video_url(video_id, video_type), HotStarIE, video_id) + + class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)' @@ -288,11 +306,8 @@ class HotStarPlaylistIE(HotStarBaseIE): collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')['body']['results'] entries = [ - self.url_result( - 'https://www.hotstar.com/%s' % video['contentId'], - ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in collection['assets']['items'] - if video.get('contentId')] + self.url_result(HotStarIE._video_url(video['contentId']), HotStarIE, video['contentId']) + for video in collection['assets']['items'] if video.get('contentId')] return self.playlist_result(entries, playlist_id) @@ -326,16 +341,13 @@ class HotStarSeriesIE(HotStarBaseIE): 'x-country-code': 'IN', 'x-platform-code': 'PCTV', } - detail_json = self._download_json('https://api.hotstar.com/o/v1/show/detail?contentId=' + series_id, - video_id=series_id, headers=headers) - id = compat_str(try_get(detail_json, lambda x: x['body']['results']['item']['id'], int)) - item_json = self._download_json('https://api.hotstar.com/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid=' + id, - video_id=series_id, headers=headers) - entries = [ - self.url_result( - '%s/ignoreme/%d' % (url, video['contentId']), - ie=HotStarIE.ie_key(), video_id=video['contentId']) - for video in item_json['body']['results']['items'] - if video.get('contentId')] + detail_json = self._download_json( + f'{self._API_URL}/o/v1/show/detail?contentId={series_id}', series_id, headers=headers) + id = try_get(detail_json, lambda x: x['body']['results']['item']['id'], int) + item_json = self._download_json( + f'{self._API_URL}/o/v1/tray/g/1/items?etid=0&tao=0&tas=10000&eid={id}', series_id, headers=headers) - return self.playlist_result(entries, series_id) + return self.playlist_result([ + self.url_result(HotStarIE._video_url(video['contentId'], root=url), HotStarIE, video['contentId']) + for video in item_json['body']['results']['items'] if video.get('contentId') + ], series_id) diff --git a/yt_dlp/extractor/howcast.py b/yt_dlp/extractor/howcast.py index 7e36b85ad..59cf80f1a 100644 --- a/yt_dlp/extractor/howcast.py +++ b/yt_dlp/extractor/howcast.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import parse_iso8601 diff --git a/yt_dlp/extractor/howstuffworks.py b/yt_dlp/extractor/howstuffworks.py index cf90ab3c9..c49c0899e 100644 --- a/yt_dlp/extractor/howstuffworks.py +++ b/yt_dlp/extractor/howstuffworks.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( find_xpath_attr, diff --git a/yt_dlp/extractor/hrfensehen.py b/yt_dlp/extractor/hrfensehen.py index e39ded254..6f7ed9b4b 100644 --- a/yt_dlp/extractor/hrfensehen.py +++ b/yt_dlp/extractor/hrfensehen.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/hrti.py b/yt_dlp/extractor/hrti.py index 36d600773..773ae0c9a 100644 --- a/yt_dlp/extractor/hrti.py +++ b/yt_dlp/extractor/hrti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py index 9144ff8dc..9faf46a5d 100644 --- a/yt_dlp/extractor/hse.py +++ b/yt_dlp/extractor/hse.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/huajiao.py b/yt_dlp/extractor/huajiao.py index 4ca275dda..c498fa330 100644 --- a/yt_dlp/extractor/huajiao.py +++ b/yt_dlp/extractor/huajiao.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/huffpost.py b/yt_dlp/extractor/huffpost.py index 54385bafa..7286dbcd7 100644 --- a/yt_dlp/extractor/huffpost.py +++ b/yt_dlp/extractor/huffpost.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/hungama.py b/yt_dlp/extractor/hungama.py index 821b16e5d..938a24296 100644 --- a/yt_dlp/extractor/hungama.py +++ b/yt_dlp/extractor/hungama.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/huya.py b/yt_dlp/extractor/huya.py index 4e96f22fa..9dd5e41b3 100644 --- a/yt_dlp/extractor/huya.py +++ b/yt_dlp/extractor/huya.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random diff --git a/yt_dlp/extractor/hypem.py b/yt_dlp/extractor/hypem.py index 9ca28d632..54db7b3eb 100644 --- a/yt_dlp/extractor/hypem.py +++ b/yt_dlp/extractor/hypem.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/icareus.py b/yt_dlp/extractor/icareus.py new file mode 100644 index 000000000..dc7a2f0ba --- /dev/null +++ b/yt_dlp/extractor/icareus.py @@ -0,0 +1,180 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + determine_ext, + get_element_by_class, + int_or_none, + merge_dicts, + parse_bitrate, + parse_resolution, + remove_end, + str_or_none, + url_or_none, + urlencode_postdata, +) + + +class IcareusIE(InfoExtractor): + _DOMAINS = '|'.join(map(re.escape, ( + 'asahitv.fi', + 'helsinkikanava.fi', + 'hyvinvointitv.fi', + 'inez.fi', + 'permanto.fi', + 'suite.icareus.com', + 'videos.minifiddlers.org', + ))) + _VALID_URL = rf'(?P<base_url>https?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894', + 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a', + 'info_dict': { + 'id': '68021894', + 'ext': 'mp4', + 'title': 'Perheiden parhaaksi', + 'description': 'md5:295785ea408e5ac00708766465cc1325', + 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501', + 'upload_date': '20200924', + 'timestamp': 1600938300, + }, + }, { # Recorded livestream + 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489', + 'md5': '014327e69dfa7b949fcc861f6d162d6d', + 'info_dict': { + 'id': '76258304', + 'ext': 'mp4', + 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020', + 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c', + 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630', + 'upload_date': '20201124', + 'timestamp': 1606206600, + }, + }, { # Non-m3u8 stream + 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389', + 'md5': '72fc04ee971bbedc44405cdf16c990b6', + 'info_dict': { + 'id': '47567389', + 'ext': 'mp4', + 'title': 'Omatoiminen harjoittelu - Laukominen', + 'description': '', + 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162', + 'upload_date': '20200319', + 'timestamp': 1584658080, + }, + }, { + 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818', + 'only_matching': True + }, { + 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730', + 'only_matching': True + }, { + 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822', + 'only_matching': True + }, { + 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515', + 'only_matching': True + }, { + 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759', + 'only_matching': True + }] + + def _real_extract(self, url): + base_url, temp_id = self._match_valid_url(url).groups() + webpage = self._download_webpage(url, temp_id) + + video_id = self._search_regex(r"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage, 'video_id') + organization_id = self._search_regex(r"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage, 'organization_id') + + assets = self._download_json( + self._search_regex(r'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage, 'api_base'), + video_id, data=urlencode_postdata({ + 'version': '03', + 'action': 'getAssetPlaybackUrls', + 'organizationId': organization_id, + 'assetId': video_id, + 'token': self._search_regex(r"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage, 'icareus_token'), + })) + + subtitles = { + remove_end(sdesc.split(' ')[0], ':'): [{'url': url_or_none(surl)}] + for _, sdesc, surl in assets.get('subtitles') or [] + } + + formats = [{ + 'format': item.get('name'), + 'format_id': 'audio', + 'vcodec': 'none', + 'url': url_or_none(item['url']), + 'tbr': int_or_none(self._search_regex( + r'\((\d+)\s*k\)', item.get('name') or '', 'audio bitrate', default=None)), + } for item in assets.get('audio_urls') or [] if url_or_none(item.get('url'))] + + for item in assets.get('urls') or []: + video_url = url_or_none(item.get('url')) + if video_url is None: + continue + ext = determine_ext(video_url) + if ext == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, 'mp4', m3u8_id='hls', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + fmt = item.get('name') + formats.append({ + 'url': video_url, + 'format': fmt, + 'tbr': parse_bitrate(fmt), + 'format_id': str_or_none(item.get('id')), + **parse_resolution(fmt), + }) + + info, token, live_title = self._search_json_ld(webpage, video_id, default={}), None, None + if not info: + token = self._search_regex( + r'data\s*:\s*{action:"getAsset".*?token:\'([a-f0-9]+)\'}', webpage, 'token', default=None) + if not token: + live_title = get_element_by_class('unpublished-info-item future-event-title', webpage) + + if token: + metadata = self._download_json( + f'{base_url}/icareus-suite-api-portlet/publishing', + video_id, fatal=False, data=urlencode_postdata({ + 'version': '03', + 'action': 'getAsset', + 'organizationId': organization_id, + 'assetId': video_id, + 'languageId': 'en_US', + 'userId': '0', + 'token': token, + })) or {} + info = { + 'title': metadata.get('name'), + 'description': metadata.get('description'), + 'timestamp': int_or_none(metadata.get('date'), scale=1000), + 'duration': int_or_none(metadata.get('duration')), + 'thumbnail': url_or_none(metadata.get('thumbnailMedium')), + } + elif live_title: # Recorded livestream + info = { + 'title': live_title, + 'description': get_element_by_class('unpublished-info-item future-event-description', webpage), + 'timestamp': int_or_none(self._search_regex( + r'var startEvent\s*=\s*(\d+);', webpage, 'uploadDate', fatal=False), scale=1000), + } + + thumbnails = info.get('thumbnails') or [{ + 'url': url_or_none(info.get('thumbnail') or assets.get('thumbnail')) + }] + + self._sort_formats(formats) + return merge_dicts({ + 'id': video_id, + 'title': None, + 'formats': formats, + 'subtitles': subtitles, + 'description': clean_html(info.get('description')), + 'thumbnails': thumbnails if thumbnails[0]['url'] else None, + }, info) diff --git a/yt_dlp/extractor/ichinanalive.py b/yt_dlp/extractor/ichinanalive.py index cb39f821c..ffff36cc1 100644 --- a/yt_dlp/extractor/ichinanalive.py +++ b/yt_dlp/extractor/ichinanalive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, str_or_none, traverse_obj, unified_strdate from ..compat import compat_str diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index c826eb3ba..bfb1e9d64 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/iheart.py b/yt_dlp/extractor/iheart.py index b54c05eeb..2c6a5b6a1 100644 --- a/yt_dlp/extractor/iheart.py +++ b/yt_dlp/extractor/iheart.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/imdb.py b/yt_dlp/extractor/imdb.py index 96cee2e2f..74cab7dc1 100644 --- a/yt_dlp/extractor/imdb.py +++ b/yt_dlp/extractor/imdb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import json import re diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index ce7b21ab2..5b8bfda96 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/imgur.py b/yt_dlp/extractor/imgur.py index dfa473752..a3bb47615 100644 --- a/yt_dlp/extractor/imgur.py +++ b/yt_dlp/extractor/imgur.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ina.py b/yt_dlp/extractor/ina.py index b3b2683cb..56038f1ca 100644 --- a/yt_dlp/extractor/ina.py +++ b/yt_dlp/extractor/ina.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/inc.py b/yt_dlp/extractor/inc.py index d5b258a0f..9b3fe9ac1 100644 --- a/yt_dlp/extractor/inc.py +++ b/yt_dlp/extractor/inc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .kaltura import KalturaIE diff --git a/yt_dlp/extractor/indavideo.py b/yt_dlp/extractor/indavideo.py index 4c16243ec..fb041a182 100644 --- a/yt_dlp/extractor/indavideo.py +++ b/yt_dlp/extractor/indavideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/infoq.py b/yt_dlp/extractor/infoq.py index 347cc5154..6b31701eb 100644 --- a/yt_dlp/extractor/infoq.py +++ b/yt_dlp/extractor/infoq.py @@ -1,15 +1,13 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, compat_urlparse, ) from ..utils import ( + ExtractorError, determine_ext, update_url_query, + traverse_obj, ) from .bokecc import BokeCCBaseIE @@ -38,6 +36,7 @@ class InfoQIE(BokeCCBaseIE): 'ext': 'flv', 'description': 'md5:308d981fb28fa42f49f9568322c683ff', }, + 'skip': 'Sorry, the page you visited does not exist', }, { 'url': 'https://www.infoq.com/presentations/Simple-Made-Easy', 'md5': '0e34642d4d9ef44bf86f66f6399672db', @@ -90,8 +89,10 @@ class InfoQIE(BokeCCBaseIE): }] def _extract_http_audio(self, webpage, video_id): - fields = self._form_hidden_inputs('mp3Form', webpage) - http_audio_url = fields.get('filename') + try: + http_audio_url = traverse_obj(self._form_hidden_inputs('mp3Form', webpage), 'filename') + except ExtractorError: + http_audio_url = None if not http_audio_url: return [] diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 970f2c8ab..05000e2fb 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import itertools import hashlib import json diff --git a/yt_dlp/extractor/internazionale.py b/yt_dlp/extractor/internazionale.py index 45e2af690..c8f70785f 100644 --- a/yt_dlp/extractor/internazionale.py +++ b/yt_dlp/extractor/internazionale.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unified_timestamp diff --git a/yt_dlp/extractor/internetvideoarchive.py b/yt_dlp/extractor/internetvideoarchive.py index 880918cd7..6a8e30d73 100644 --- a/yt_dlp/extractor/internetvideoarchive.py +++ b/yt_dlp/extractor/internetvideoarchive.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/iprima.py b/yt_dlp/extractor/iprima.py index 1a2038453..5e0b523dc 100644 --- a/yt_dlp/extractor/iprima.py +++ b/yt_dlp/extractor/iprima.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index d07b39d48..a0298f1a1 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools import re diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py index 64cb4e69a..4ac12603a 100644 --- a/yt_dlp/extractor/itprotv.py +++ b/yt_dlp/extractor/itprotv.py @@ -1,5 +1,3 @@ -# coding: utf-8 - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index f1591403f..26d77a469 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 098ab6665..f469a6adf 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py index 44b220846..538a961b7 100644 --- a/yt_dlp/extractor/ivideon.py +++ b/yt_dlp/extractor/ivideon.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_urlencode, diff --git a/yt_dlp/extractor/iwara.py b/yt_dlp/extractor/iwara.py index c0e01e352..4b88da35f 100644 --- a/yt_dlp/extractor/iwara.py +++ b/yt_dlp/extractor/iwara.py @@ -1,21 +1,28 @@ -# coding: utf-8 -from __future__ import unicode_literals import re +import urllib from .common import InfoExtractor -from ..compat import compat_urllib_parse_urlparse from ..utils import ( int_or_none, mimetype2ext, remove_end, url_or_none, + urljoin, unified_strdate, strip_or_none, ) -class IwaraIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.|ecchi\.)?iwara\.tv/videos/(?P<id>[a-zA-Z0-9]+)' +class IwaraBaseIE(InfoExtractor): + _BASE_REGEX = r'(?P<base_url>https?://(?:www\.|ecchi\.)?iwara\.tv)' + + def _extract_playlist(self, base_url, webpage): + for path in re.findall(r'class="title">\s*<a[^<]+href="([^"]+)', webpage): + yield self.url_result(urljoin(base_url, path)) + + +class IwaraIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/videos/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://iwara.tv/videos/amVwUl1EHpAD9RD', # md5 is unstable @@ -60,7 +67,7 @@ class IwaraIE(InfoExtractor): webpage, urlh = self._download_webpage_handle(url, video_id) - hostname = compat_urllib_parse_urlparse(urlh.geturl()).hostname + hostname = urllib.parse.urlparse(urlh.geturl()).hostname # ecchi is 'sexy' in Japanese age_limit = 18 if hostname.split('.')[0] == 'ecchi' else 0 @@ -120,3 +127,81 @@ class IwaraIE(InfoExtractor): 'upload_date': upload_date, 'description': description, } + + +class IwaraPlaylistIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/playlist/(?P<id>[^/?#&]+)' + IE_NAME = 'iwara:playlist' + + _TESTS = [{ + 'url': 'https://ecchi.iwara.tv/playlist/best-enf', + 'info_dict': { + 'title': 'Best enf', + 'uploader': 'Jared98112', + 'id': 'best-enf', + }, + 'playlist_mincount': 1097, + }, { + # urlencoded + 'url': 'https://ecchi.iwara.tv/playlist/%E3%83%97%E3%83%AC%E3%82%A4%E3%83%AA%E3%82%B9%E3%83%88-2', + 'info_dict': { + 'id': 'ใใฌใคใชในใ-2', + 'title': 'ใใฌใคใชในใ', + 'uploader': 'mainyu', + }, + 'playlist_mincount': 91, + }] + + def _real_extract(self, url): + playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url') + playlist_id = urllib.parse.unquote(playlist_id) + webpage = self._download_webpage(url, playlist_id) + + return { + '_type': 'playlist', + 'id': playlist_id, + 'title': self._html_search_regex(r'class="title"[^>]*>([^<]+)', webpage, 'title', fatal=False), + 'uploader': self._html_search_regex(r'<h2>([^<]+)', webpage, 'uploader', fatal=False), + 'entries': self._extract_playlist(base_url, webpage), + } + + +class IwaraUserIE(IwaraBaseIE): + _VALID_URL = fr'{IwaraBaseIE._BASE_REGEX}/users/(?P<id>[^/?#&]+)' + IE_NAME = 'iwara:user' + + _TESTS = [{ + 'url': 'https://ecchi.iwara.tv/users/CuteMMD', + 'info_dict': { + 'id': 'CuteMMD', + }, + 'playlist_mincount': 198, + }, { + # urlencoded + 'url': 'https://ecchi.iwara.tv/users/%E5%92%95%E5%98%BF%E5%98%BF', + 'info_dict': { + 'id': 'ๅๅฟๅฟ', + }, + 'playlist_mincount': 141, + }] + + def _entries(self, playlist_id, base_url, webpage): + yield from self._extract_playlist(base_url, webpage) + + page_urls = re.findall( + r'class="pager-item"[^>]*>\s*<a[^<]+href="([^"]+)', webpage) + + for n, path in enumerate(page_urls, 2): + yield from self._extract_playlist( + base_url, self._download_webpage( + urljoin(base_url, path), playlist_id, note=f'Downloading playlist page {n}')) + + def _real_extract(self, url): + playlist_id, base_url = self._match_valid_url(url).group('id', 'base_url') + playlist_id = urllib.parse.unquote(playlist_id) + + webpage = self._download_webpage( + f'{base_url}/users/{playlist_id}/videos', playlist_id) + + return self.playlist_result( + self._entries(playlist_id, base_url, webpage), playlist_id) diff --git a/yt_dlp/extractor/izlesene.py b/yt_dlp/extractor/izlesene.py index f8fca6c8f..6520ecf6d 100644 --- a/yt_dlp/extractor/izlesene.py +++ b/yt_dlp/extractor/izlesene.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py index b294aee70..6840654cc 100644 --- a/yt_dlp/extractor/jable.py +++ b/yt_dlp/extractor/jable.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/jamendo.py b/yt_dlp/extractor/jamendo.py index 755d9703b..d960ee51c 100644 --- a/yt_dlp/extractor/jamendo.py +++ b/yt_dlp/extractor/jamendo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random @@ -31,10 +28,11 @@ class JamendoIE(InfoExtractor): 'ext': 'flac', # 'title': 'Maya Filipiฤ - Stories from Emona I', 'title': 'Stories from Emona I', - # 'artist': 'Maya Filipiฤ', + 'artist': 'Maya Filipiฤ', + 'album': 'Between two worlds', 'track': 'Stories from Emona I', 'duration': 210, - 'thumbnail': r're:^https?://.*\.jpg', + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=29279&width=300&trackid=196219', 'timestamp': 1217438117, 'upload_date': '20080730', 'license': 'by-nc-nd', @@ -48,11 +46,11 @@ class JamendoIE(InfoExtractor): 'only_matching': True, }] - def _call_api(self, resource, resource_id): + def _call_api(self, resource, resource_id, fatal=True): path = '/api/%ss' % resource rand = compat_str(random.random()) return self._download_json( - 'https://www.jamendo.com' + path, resource_id, query={ + 'https://www.jamendo.com' + path, resource_id, fatal=fatal, query={ 'id[]': resource_id, }, headers={ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand) @@ -74,6 +72,8 @@ class JamendoIE(InfoExtractor): # if artist_name: # title = '%s - %s' % (artist_name, title) # album = get_model('album') + artist = self._call_api("artist", track.get('artistId'), fatal=False) + album = self._call_api("album", track.get('albumId'), fatal=False) formats = [{ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' @@ -121,9 +121,9 @@ class JamendoIE(InfoExtractor): 'title': title, 'description': track.get('description'), 'duration': int_or_none(track.get('duration')), - # 'artist': artist_name, + 'artist': artist.get('name'), 'track': track_name, - # 'album': album.get('name'), + 'album': album.get('name'), 'formats': formats, 'license': '-'.join(license) if license else None, 'timestamp': int_or_none(track.get('dateCreated')), @@ -148,22 +148,38 @@ class JamendoAlbumIE(JamendoIE): 'info_dict': { 'id': '1032333', 'ext': 'flac', - 'title': 'Shearer - Warmachine', + 'title': 'Warmachine', 'artist': 'Shearer', 'track': 'Warmachine', 'timestamp': 1368089771, 'upload_date': '20130509', + 'view_count': int, + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032333', + 'duration': 190, + 'license': 'by', + 'album': 'Duck On Cover', + 'average_rating': 4, + 'tags': ['rock', 'drums', 'bass', 'world', 'punk', 'neutral'], + 'like_count': int, } }, { 'md5': '1f358d7b2f98edfe90fd55dac0799d50', 'info_dict': { 'id': '1032330', 'ext': 'flac', - 'title': 'Shearer - Without Your Ghost', + 'title': 'Without Your Ghost', 'artist': 'Shearer', 'track': 'Without Your Ghost', 'timestamp': 1368089771, 'upload_date': '20130509', + 'duration': 192, + 'tags': ['rock', 'drums', 'bass', 'world', 'punk'], + 'album': 'Duck On Cover', + 'thumbnail': 'https://usercontent.jamendo.com?type=album&id=121486&width=300&trackid=1032330', + 'view_count': int, + 'average_rating': 4, + 'license': 'by', + 'like_count': int, } }], 'params': { diff --git a/yt_dlp/extractor/jeuxvideo.py b/yt_dlp/extractor/jeuxvideo.py index 77c0f520c..56ea15cf9 100644 --- a/yt_dlp/extractor/jeuxvideo.py +++ b/yt_dlp/extractor/jeuxvideo.py @@ -1,8 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/joj.py b/yt_dlp/extractor/joj.py index 7350f537c..a01411be1 100644 --- a/yt_dlp/extractor/joj.py +++ b/yt_dlp/extractor/joj.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/jove.py b/yt_dlp/extractor/jove.py index 4b7dfc526..245fe73d4 100644 --- a/yt_dlp/extractor/jove.py +++ b/yt_dlp/extractor/jove.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/jwplatform.py b/yt_dlp/extractor/jwplatform.py index 5aa508bf9..8dbbb2926 100644 --- a/yt_dlp/extractor/jwplatform.py +++ b/yt_dlp/extractor/jwplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 483ab7128..a5014d931 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( @@ -109,6 +105,7 @@ class KakaoIE(InfoExtractor): resp = self._parse_json(e.cause.read().decode(), video_id) if resp.get('code') == 'GeoBlocked': self.raise_geo_restricted() + raise fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url')) if not fmt_url: diff --git a/yt_dlp/extractor/kaltura.py b/yt_dlp/extractor/kaltura.py index f6dfc9caa..afad279bd 100644 --- a/yt_dlp/extractor/kaltura.py +++ b/yt_dlp/extractor/kaltura.py @@ -1,8 +1,6 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re import base64 +import json +import re from .common import InfoExtractor from ..compat import ( @@ -16,6 +14,7 @@ from ..utils import ( int_or_none, unsmuggle_url, smuggle_url, + traverse_obj, ) @@ -36,7 +35,7 @@ class KalturaIE(InfoExtractor): ) ''' _SERVICE_URL = 'http://cdnapi.kaltura.com' - _SERVICE_BASE = '/api_v3/index.php' + _SERVICE_BASE = '/api_v3/service/multirequest' # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php _CAPTION_TYPES = { 1: 'srt', @@ -172,30 +171,35 @@ class KalturaIE(InfoExtractor): def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs): params = actions[0] - if len(actions) > 1: - for i, a in enumerate(actions[1:], start=1): - for k, v in a.items(): - params['%d:%s' % (i, k)] = v + params.update({i: a for i, a in enumerate(actions[1:], start=1)}) data = self._download_json( (service_url or self._SERVICE_URL) + self._SERVICE_BASE, - video_id, query=params, *args, **kwargs) + video_id, data=json.dumps(params).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + 'Accept-Encoding': 'gzip, deflate, br', + }, *args, **kwargs) + + for idx, status in enumerate(data): + if not isinstance(status, dict): + continue + if status.get('objectType') == 'KalturaAPIException': + raise ExtractorError( + '%s said: %s (%d)' % (self.IE_NAME, status['message'], idx)) - status = data if len(actions) == 1 else data[0] - if status.get('objectType') == 'KalturaAPIException': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, status['message'])) + data[1] = traverse_obj(data, (1, 'objects', 0)) return data def _get_video_info(self, video_id, partner_id, service_url=None): actions = [ { - 'action': 'null', - 'apiVersion': '3.1.5', - 'clientTag': 'kdp:v3.8.5', + 'apiVersion': '3.3.0', + 'clientTag': 'html5:v3.1.0', 'format': 1, # JSON, 2 = XML, 3 = PHP - 'service': 'multirequest', + 'ks': '', + 'partnerId': partner_id, }, { 'expiry': 86400, @@ -204,12 +208,14 @@ class KalturaIE(InfoExtractor): 'widgetId': '_%s' % partner_id, }, { - 'action': 'get', - 'entryId': video_id, + 'action': 'list', + 'filter': {'redirectFromEntryId': video_id}, 'service': 'baseentry', 'ks': '{1:result:ks}', - 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', - 'responseProfile:type': 1, + 'responseProfile': { + 'type': 1, + 'fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId', + }, }, { 'action': 'getbyentryid', diff --git a/yt_dlp/extractor/kanalplay.py b/yt_dlp/extractor/kanalplay.py deleted file mode 100644 index 5e24f7e21..000000000 --- a/yt_dlp/extractor/kanalplay.py +++ /dev/null @@ -1,96 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - - -from .common import InfoExtractor -from ..utils import ( - ExtractorError, - float_or_none, - srt_subtitles_timecode, -) - - -class KanalPlayIE(InfoExtractor): - IE_DESC = 'Kanal 5/9/11 Play' - _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277', - 'info_dict': { - 'id': '3270012277', - 'ext': 'flv', - 'title': 'Saknar bรฅde dusch och avlopp', - 'description': 'md5:6023a95832a06059832ae93bc3c7efb7', - 'duration': 2636.36, - }, - 'params': { - # rtmp download - 'skip_download': True, - } - }, { - 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042', - 'only_matching': True, - }, { - 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199', - 'only_matching': True, - }] - - def _fix_subtitles(self, subs): - return '\r\n\r\n'.join( - '%s\r\n%s --> %s\r\n%s' - % ( - num, - srt_subtitles_timecode(item['startMillis'] / 1000.0), - srt_subtitles_timecode(item['endMillis'] / 1000.0), - item['text'], - ) for num, item in enumerate(subs, 1)) - - def _get_subtitles(self, channel_id, video_id): - subs = self._download_json( - 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id), - video_id, 'Downloading subtitles JSON', fatal=False) - return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {} - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - channel_id = mobj.group('channel_id') - - video = self._download_json( - 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id), - video_id) - - reasons_for_no_streams = video.get('reasonsForNoStreams') - if reasons_for_no_streams: - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)), - expected=True) - - title = video['title'] - description = video.get('description') - duration = float_or_none(video.get('length'), 1000) - thumbnail = video.get('posterUrl') - - stream_base_url = video['streamBaseUrl'] - - formats = [{ - 'url': stream_base_url, - 'play_path': stream['source'], - 'ext': 'flv', - 'tbr': float_or_none(stream.get('bitrate'), 1000), - 'rtmp_real_time': True, - } for stream in video['streams']] - self._sort_formats(formats) - - subtitles = {} - if video.get('hasSubtitle'): - subtitles = self.extract_subtitles(channel_id, video_id) - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - 'duration': duration, - 'formats': formats, - 'subtitles': subtitles, - } diff --git a/yt_dlp/extractor/karaoketv.py b/yt_dlp/extractor/karaoketv.py index bfccf89b0..381dc00ad 100644 --- a/yt_dlp/extractor/karaoketv.py +++ b/yt_dlp/extractor/karaoketv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/karrierevideos.py b/yt_dlp/extractor/karrierevideos.py index 7b291e0a0..28d4841aa 100644 --- a/yt_dlp/extractor/karrierevideos.py +++ b/yt_dlp/extractor/karrierevideos.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/keezmovies.py b/yt_dlp/extractor/keezmovies.py index 06dbcbb40..79f9c7fa7 100644 --- a/yt_dlp/extractor/keezmovies.py +++ b/yt_dlp/extractor/keezmovies.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kelbyone.py b/yt_dlp/extractor/kelbyone.py index 20c26cf48..dea056c12 100644 --- a/yt_dlp/extractor/kelbyone.py +++ b/yt_dlp/extractor/kelbyone.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/ketnet.py b/yt_dlp/extractor/ketnet.py index e0599d02f..ab6276727 100644 --- a/yt_dlp/extractor/ketnet.py +++ b/yt_dlp/extractor/ketnet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .canvas import CanvasIE from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/khanacademy.py b/yt_dlp/extractor/khanacademy.py index 87e520378..5333036a8 100644 --- a/yt_dlp/extractor/khanacademy.py +++ b/yt_dlp/extractor/khanacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor @@ -27,16 +25,21 @@ class KhanAcademyBaseIE(InfoExtractor): def _real_extract(self, url): display_id = self._match_id(url) - component_props = self._parse_json(self._download_json( - 'https://www.khanacademy.org/api/internal/graphql', + content = self._download_json( + 'https://www.khanacademy.org/api/internal/graphql/FetchContentData', display_id, query={ - 'hash': 1604303425, + 'fastly_cacheable': 'persist_until_publish', + 'hash': '4134764944', + 'lang': 'en', 'variables': json.dumps({ 'path': display_id, - 'queryParams': '', + 'queryParams': 'lang=en', + 'isModal': False, + 'followRedirects': True, + 'countryCode': 'US', }), - })['data']['contentJson'], display_id)['componentProps'] - return self._parse_component_props(component_props) + })['data']['contentJson'] + return self._parse_component_props(self._parse_json(content, display_id)['componentProps']) class KhanAcademyIE(KhanAcademyBaseIE): diff --git a/yt_dlp/extractor/kickstarter.py b/yt_dlp/extractor/kickstarter.py index d4da8f484..c0d851d96 100644 --- a/yt_dlp/extractor/kickstarter.py +++ b/yt_dlp/extractor/kickstarter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import smuggle_url diff --git a/yt_dlp/extractor/kinja.py b/yt_dlp/extractor/kinja.py index 1be8b4809..c00abfbc1 100644 --- a/yt_dlp/extractor/kinja.py +++ b/yt_dlp/extractor/kinja.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/kinopoisk.py b/yt_dlp/extractor/kinopoisk.py index cdbb642e2..84a2489a3 100644 --- a/yt_dlp/extractor/kinopoisk.py +++ b/yt_dlp/extractor/kinopoisk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/konserthusetplay.py b/yt_dlp/extractor/konserthusetplay.py index dd42bb2f2..1e177c363 100644 --- a/yt_dlp/extractor/konserthusetplay.py +++ b/yt_dlp/extractor/konserthusetplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 088db1cb0..892d355ba 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/krasview.py b/yt_dlp/extractor/krasview.py index d27d052ff..4323aa429 100644 --- a/yt_dlp/extractor/krasview.py +++ b/yt_dlp/extractor/krasview.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/ku6.py b/yt_dlp/extractor/ku6.py index a574408e5..31b4ea0c6 100644 --- a/yt_dlp/extractor/ku6.py +++ b/yt_dlp/extractor/ku6.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/kusi.py b/yt_dlp/extractor/kusi.py index 707fe1821..f1221ef1b 100644 --- a/yt_dlp/extractor/kusi.py +++ b/yt_dlp/extractor/kusi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/kuwo.py b/yt_dlp/extractor/kuwo.py index 460a4252f..0c9518e66 100644 --- a/yt_dlp/extractor/kuwo.py +++ b/yt_dlp/extractor/kuwo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/la7.py b/yt_dlp/extractor/la7.py index de985e450..5d52decdb 100644 --- a/yt_dlp/extractor/la7.py +++ b/yt_dlp/extractor/la7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py index b5d27c2f0..4014a9256 100644 --- a/yt_dlp/extractor/laola1tv.py +++ b/yt_dlp/extractor/laola1tv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py index 5215717e8..7ba666d06 100644 --- a/yt_dlp/extractor/lastfm.py +++ b/yt_dlp/extractor/lastfm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/lbry.py b/yt_dlp/extractor/lbry.py index 5d5457c53..953ce2e18 100644 --- a/yt_dlp/extractor/lbry.py +++ b/yt_dlp/extractor/lbry.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json diff --git a/yt_dlp/extractor/lci.py b/yt_dlp/extractor/lci.py index 920872f5c..e7d2f8a24 100644 --- a/yt_dlp/extractor/lci.py +++ b/yt_dlp/extractor/lci.py @@ -1,26 +1,28 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor class LCIIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?lci\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' - _TEST = { - 'url': 'http://www.lci.fr/international/etats-unis-a-j-62-hillary-clinton-reste-sans-voix-2001679.html', - 'md5': '2fdb2538b884d4d695f9bd2bde137e6c', + _VALID_URL = r'https?://(?:www\.)?(?:lci|tf1info)\.fr/[^/]+/[\w-]+-(?P<id>\d+)\.html' + _TESTS = [{ + 'url': 'https://www.tf1info.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', 'info_dict': { - 'id': '13244802', + 'id': '13875948', 'ext': 'mp4', - 'title': 'Hillary Clinton et sa quinte de toux, en plein meeting', - 'description': 'md5:a4363e3a960860132f8124b62f4a01c9', - } - } + 'title': 'md5:660df5481fd418bc3bbb0d070e6fdb5a', + 'thumbnail': 'https://photos.tf1.fr/1280/720/presidentielle-2022-marine-le-pen-et-emmanuel-macron-invites-de-lci-ce-vendredi-9c0e73-e1a036-0@1x.jpg', + 'upload_date': '20220422', + 'duration': 33, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.lci.fr/politique/election-presidentielle-2022-second-tour-j-2-marine-le-pen-et-emmanuel-macron-en-interview-de-lci-vendredi-soir-2217486.html', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - wat_id = self._search_regex( - (r'data-watid=[\'"](\d+)', r'idwat["\']?\s*:\s*["\']?(\d+)'), - webpage, 'wat id') + wat_id = self._search_regex(r'watId["\']?\s*:\s*["\']?(\d+)', webpage, 'wat id') return self.url_result('wat:' + wat_id, 'Wat', wat_id) diff --git a/yt_dlp/extractor/lcp.py b/yt_dlp/extractor/lcp.py index ade27a99e..87543d56f 100644 --- a/yt_dlp/extractor/lcp.py +++ b/yt_dlp/extractor/lcp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .arkena import ArkenaIE diff --git a/yt_dlp/extractor/lecture2go.py b/yt_dlp/extractor/lecture2go.py index 81b5d41be..bee4e7587 100644 --- a/yt_dlp/extractor/lecture2go.py +++ b/yt_dlp/extractor/lecture2go.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/lecturio.py b/yt_dlp/extractor/lecturio.py index 0ee1eeb4d..c3d0cb193 100644 --- a/yt_dlp/extractor/lecturio.py +++ b/yt_dlp/extractor/lecturio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/leeco.py b/yt_dlp/extractor/leeco.py index d5e11423c..258e396cb 100644 --- a/yt_dlp/extractor/leeco.py +++ b/yt_dlp/extractor/leeco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import hashlib import re diff --git a/yt_dlp/extractor/lego.py b/yt_dlp/extractor/lego.py index 901f43bcf..7d0238a1f 100644 --- a/yt_dlp/extractor/lego.py +++ b/yt_dlp/extractor/lego.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import uuid from .common import InfoExtractor diff --git a/yt_dlp/extractor/lemonde.py b/yt_dlp/extractor/lemonde.py index 3306892e8..c916791af 100644 --- a/yt_dlp/extractor/lemonde.py +++ b/yt_dlp/extractor/lemonde.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lenta.py b/yt_dlp/extractor/lenta.py index 2ebd4e577..10aac984e 100644 --- a/yt_dlp/extractor/lenta.py +++ b/yt_dlp/extractor/lenta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/libraryofcongress.py b/yt_dlp/extractor/libraryofcongress.py index 03f205144..afe3c98a1 100644 --- a/yt_dlp/extractor/libraryofcongress.py +++ b/yt_dlp/extractor/libraryofcongress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/libsyn.py b/yt_dlp/extractor/libsyn.py index d1fcda4ef..8245a3481 100644 --- a/yt_dlp/extractor/libsyn.py +++ b/yt_dlp/extractor/libsyn.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/lifenews.py b/yt_dlp/extractor/lifenews.py index 49a0a5989..8c7d2064d 100644 --- a/yt_dlp/extractor/lifenews.py +++ b/yt_dlp/extractor/lifenews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/likee.py b/yt_dlp/extractor/likee.py new file mode 100644 index 000000000..b53e7a5ca --- /dev/null +++ b/yt_dlp/extractor/likee.py @@ -0,0 +1,193 @@ +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + js_to_json, + parse_iso8601, + str_or_none, + traverse_obj, +) + + +class LikeeIE(InfoExtractor): + IE_NAME = 'likee' + _VALID_URL = r'(?x)https?://(www\.)?likee\.video/(?:(?P<channel_name>[^/]+)/video/|v/)(?P<id>\w+)' + _TESTS = [{ + 'url': 'https://likee.video/@huynh_hong_quan_/video/7093444807096327263', + 'info_dict': { + 'id': '7093444807096327263', + 'ext': 'mp4', + 'title': '๐คด๐คด๐คด', + 'description': 'md5:9a7ebe816f0e78722ee5ed76f75983b4', + 'thumbnail': r're:^https?://.+\.jpg', + 'uploader': 'Huแปณnh Hแปng Quân ', + 'play_count': int, + 'download_count': int, + 'artist': 'Huแปณnh Hแปng Quân ', + 'timestamp': 1651571320, + 'upload_date': '20220503', + 'view_count': int, + 'uploader_id': 'huynh_hong_quan_', + 'duration': 12374, + 'comment_count': int, + 'like_count': int, + }, + }, { + 'url': 'https://likee.video/@649222262/video/7093167848050058862', + 'info_dict': { + 'id': '7093167848050058862', + 'ext': 'mp4', + 'title': 'likee video #7093167848050058862', + 'description': 'md5:3f971c8c6ee8a216f2b1a9094c5de99f', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'like_count': int, + 'uploader': 'Vฦฐฦกng Phฦฐแปc Nhi', + 'download_count': int, + 'timestamp': 1651506835, + 'upload_date': '20220502', + 'duration': 60024, + 'play_count': int, + 'artist': 'Vฦฐฦกng Phฦฐแปc Nhi', + 'uploader_id': '649222262', + 'view_count': int, + }, + }, { + 'url': 'https://likee.video/@fernanda_rivasg/video/6932224568407629502', + 'info_dict': { + 'id': '6932224568407629502', + 'ext': 'mp4', + 'title': 'Un trend viejito๐ฅ #LIKEE #Ferlovers #trend ', + 'description': 'md5:c42b903a72a99d6d8b73e3d1126fbcef', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'duration': 9684, + 'uploader_id': 'fernanda_rivasg', + 'view_count': int, + 'play_count': int, + 'artist': 'La Cami Laโจ', + 'download_count': int, + 'like_count': int, + 'uploader': 'Fernanda Rivas๐ถ', + 'timestamp': 1614034308, + 'upload_date': '20210222', + }, + }, { + 'url': 'https://likee.video/v/k6QcOp', + 'info_dict': { + 'id': 'k6QcOp', + 'ext': 'mp4', + 'title': '#AguaChallenge tú ya lo intentaste?๐ฑ๐คฉ', + 'description': 'md5:b0cc462689d4ff2b624daa4dba7640d9', + 'thumbnail': r're:^https?://.+\.jpg', + 'comment_count': int, + 'duration': 18014, + 'play_count': int, + 'view_count': int, + 'timestamp': 1611694774, + 'like_count': int, + 'uploader': 'Fernanda Rivas๐ถ', + 'uploader_id': 'fernanda_rivasg', + 'download_count': int, + 'artist': 'สแดสษชแด_แดษดษชแดแดสษดโก๏ธ', + 'upload_date': '20210126', + }, + }, { + 'url': 'https://www.likee.video/@649222262/video/7093167848050058862', + 'only_matching': True, + }, { + 'url': 'https://www.likee.video/v/k6QcOp', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + info = self._parse_json( + self._search_regex(r'window\.data\s=\s({.+?});', webpage, 'video info'), + video_id, transform_source=js_to_json) + video_url = traverse_obj(info, 'video_url', ('originVideoInfo', 'video_url')) + if not video_url: + self.raise_no_formats('Video was deleted', expected=True) + formats = [{ + 'format_id': 'mp4-with-watermark', + 'url': video_url, + 'height': info.get('video_height'), + 'width': info.get('video_width'), + }, { + 'format_id': 'mp4-without-watermark', + 'url': video_url.replace('_4', ''), + 'height': info.get('video_height'), + 'width': info.get('video_width'), + 'quality': 1, + }] + self._sort_formats(formats) + return { + 'id': video_id, + 'title': info.get('msgText'), + 'description': info.get('share_desc'), + 'view_count': int_or_none(info.get('video_count')), + 'like_count': int_or_none(info.get('likeCount')), + 'play_count': int_or_none(info.get('play_count')), + 'download_count': int_or_none(info.get('download_count')), + 'comment_count': int_or_none(info.get('comment_count')), + 'uploader': str_or_none(info.get('nick_name')), + 'uploader_id': str_or_none(info.get('likeeId')), + 'artist': str_or_none(traverse_obj(info, ('sound', 'owner_name'))), + 'timestamp': parse_iso8601(info.get('uploadDate')), + 'thumbnail': info.get('coverUrl'), + 'duration': int_or_none(traverse_obj(info, ('option_data', 'dur'))), + 'formats': formats, + } + + +class LikeeUserIE(InfoExtractor): + IE_NAME = 'likee:user' + _VALID_URL = r'https?://(www\.)?likee\.video/(?P<id>[^/]+)/?$' + _TESTS = [{ + 'url': 'https://likee.video/@fernanda_rivasg', + 'info_dict': { + 'id': '925638334', + 'title': 'fernanda_rivasg', + }, + 'playlist_mincount': 500, + }, { + 'url': 'https://likee.video/@may_hmoob', + 'info_dict': { + 'id': '2943949041', + 'title': 'may_hmoob', + }, + 'playlist_mincount': 80, + }] + _PAGE_SIZE = 50 + _API_GET_USER_VIDEO = 'https://api.like-video.com/likee-activity-flow-micro/videoApi/getUserVideo' + + def _entries(self, user_name, user_id): + last_post_id = '' + while True: + user_videos = self._download_json( + self._API_GET_USER_VIDEO, user_name, + data=json.dumps({ + 'uid': user_id, + 'count': self._PAGE_SIZE, + 'lastPostId': last_post_id, + 'tabType': 0, + }).encode('utf-8'), + headers={'content-type': 'application/json'}, + note=f'Get user info with lastPostId #{last_post_id}') + items = traverse_obj(user_videos, ('data', 'videoList')) + if not items: + break + for item in items: + last_post_id = item['postId'] + yield self.url_result(f'https://likee.video/{user_name}/video/{last_post_id}') + + def _real_extract(self, url): + user_name = self._match_id(url) + webpage = self._download_webpage(url, user_name) + info = self._parse_json( + self._search_regex(r'window\.data\s*=\s*({.+?});', webpage, 'user info'), + user_name, transform_source=js_to_json) + user_id = traverse_obj(info, ('userinfo', 'uid')) + return self.playlist_result(self._entries(user_name, user_id), user_id, traverse_obj(info, ('userinfo', 'user_name'))) diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index b20681ad1..25667fc07 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index 987c43430..63b6c002a 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 0f57bfa06..27f1080b4 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from itertools import zip_longest import re diff --git a/yt_dlp/extractor/linuxacademy.py b/yt_dlp/extractor/linuxacademy.py index 6aff88e13..bf22855a9 100644 --- a/yt_dlp/extractor/linuxacademy.py +++ b/yt_dlp/extractor/linuxacademy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import random diff --git a/yt_dlp/extractor/litv.py b/yt_dlp/extractor/litv.py index 16b475a44..31826ac99 100644 --- a/yt_dlp/extractor/litv.py +++ b/yt_dlp/extractor/litv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/livejournal.py b/yt_dlp/extractor/livejournal.py index 3a9f4553f..96bd8b233 100644 --- a/yt_dlp/extractor/livejournal.py +++ b/yt_dlp/extractor/livejournal.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import int_or_none diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py index 45bf26d26..4b90c22c5 100644 --- a/yt_dlp/extractor/livestream.py +++ b/yt_dlp/extractor/livestream.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/lnkgo.py b/yt_dlp/extractor/lnkgo.py index bd2dffac0..3bb52777f 100644 --- a/yt_dlp/extractor/lnkgo.py +++ b/yt_dlp/extractor/lnkgo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/localnews8.py b/yt_dlp/extractor/localnews8.py index c3e9d10fa..6f3f02c70 100644 --- a/yt_dlp/extractor/localnews8.py +++ b/yt_dlp/extractor/localnews8.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/lovehomeporn.py b/yt_dlp/extractor/lovehomeporn.py index ca4b5f375..ba5a13acd 100644 --- a/yt_dlp/extractor/lovehomeporn.py +++ b/yt_dlp/extractor/lovehomeporn.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .nuevo import NuevoBaseIE diff --git a/yt_dlp/extractor/lrt.py b/yt_dlp/extractor/lrt.py index 4024aef73..a49fd592f 100644 --- a/yt_dlp/extractor/lrt.py +++ b/yt_dlp/extractor/lrt.py @@ -1,21 +1,59 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, merge_dicts, + traverse_obj, + url_or_none, ) -class LRTIE(InfoExtractor): - IE_NAME = 'lrt.lt' +class LRTBaseIE(InfoExtractor): + def _extract_js_var(self, webpage, var_name, default=None): + return self._search_regex( + fr'{var_name}\s*=\s*(["\'])((?:(?!\1).)+)\1', + webpage, var_name.replace('_', ' '), default, group=2) + + +class LRTStreamIE(LRTBaseIE): + _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/tiesiogiai/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://www.lrt.lt/mediateka/tiesiogiai/lrt-opus', + 'info_dict': { + 'id': 'lrt-opus', + 'live_status': 'is_live', + 'title': 're:^LRT Opus.+$', + 'ext': 'mp4' + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + streams_data = self._download_json(self._extract_js_var(webpage, 'tokenURL'), video_id) + + formats, subtitles = [], {} + for stream_url in traverse_obj(streams_data, ( + 'response', 'data', lambda k, _: k.startswith('content')), expected_type=url_or_none): + fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True) + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + self._sort_formats(formats) + + stream_title = self._extract_js_var(webpage, 'video_title', 'LRT') + return { + 'id': video_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': True, + 'title': f'{self._og_search_title(webpage)} - {stream_title}' + } + + +class LRTVODIE(LRTBaseIE): _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))' _TESTS = [{ # m3u8 download 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene', - 'md5': '85cb2bb530f31d91a9c65b479516ade4', 'info_dict': { 'id': '2000127261', 'ext': 'mp4', @@ -24,6 +62,8 @@ class LRTIE(InfoExtractor): 'duration': 3035, 'timestamp': 1604079000, 'upload_date': '20201030', + 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvฤ', 'Beata Nicholson', 'Makaronai', 'Baklaลพanai', 'Vakarienฤ', 'Receptas'], + 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg' }, }, { # direct mp3 download @@ -40,11 +80,6 @@ class LRTIE(InfoExtractor): }, }] - def _extract_js_var(self, webpage, var_name, default): - return self._search_regex( - r'%s\s*=\s*(["\'])((?:(?!\1).)+)\1' % var_name, - webpage, var_name.replace('_', ' '), default, group=2) - def _real_extract(self, url): path, video_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id) diff --git a/yt_dlp/extractor/lynda.py b/yt_dlp/extractor/lynda.py index ce304743f..1ae7f9d4f 100644 --- a/yt_dlp/extractor/lynda.py +++ b/yt_dlp/extractor/lynda.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/m6.py b/yt_dlp/extractor/m6.py index 9806875e8..9dcc60164 100644 --- a/yt_dlp/extractor/m6.py +++ b/yt_dlp/extractor/m6.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/magentamusik360.py b/yt_dlp/extractor/magentamusik360.py index 5c274902f..5d0cb3bfb 100644 --- a/yt_dlp/extractor/magentamusik360.py +++ b/yt_dlp/extractor/magentamusik360.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mailru.py b/yt_dlp/extractor/mailru.py index 5d9f80bb3..5f30d0eaa 100644 --- a/yt_dlp/extractor/mailru.py +++ b/yt_dlp/extractor/mailru.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import re diff --git a/yt_dlp/extractor/mainstreaming.py b/yt_dlp/extractor/mainstreaming.py index 0f349a7a3..c144c7592 100644 --- a/yt_dlp/extractor/mainstreaming.py +++ b/yt_dlp/extractor/mainstreaming.py @@ -1,4 +1,3 @@ -# coding: utf-8 import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/malltv.py b/yt_dlp/extractor/malltv.py index fadfd9338..bfd6008b3 100644 --- a/yt_dlp/extractor/malltv.py +++ b/yt_dlp/extractor/malltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index 68ce138b3..a392e9b54 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/manoto.py b/yt_dlp/extractor/manoto.py index d12aa5f60..dc8653f5d 100644 --- a/yt_dlp/extractor/manoto.py +++ b/yt_dlp/extractor/manoto.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/manyvids.py b/yt_dlp/extractor/manyvids.py index bd24f8853..1f537d267 100644 --- a/yt_dlp/extractor/manyvids.py +++ b/yt_dlp/extractor/manyvids.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/maoritv.py b/yt_dlp/extractor/maoritv.py index 0d23fec75..67780eafc 100644 --- a/yt_dlp/extractor/maoritv.py +++ b/yt_dlp/extractor/maoritv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/markiza.py b/yt_dlp/extractor/markiza.py index def960a0c..53ed79158 100644 --- a/yt_dlp/extractor/markiza.py +++ b/yt_dlp/extractor/markiza.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/massengeschmacktv.py b/yt_dlp/extractor/massengeschmacktv.py index b381d31b4..4508e4391 100644 --- a/yt_dlp/extractor/massengeschmacktv.py +++ b/yt_dlp/extractor/massengeschmacktv.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/masters.py b/yt_dlp/extractor/masters.py new file mode 100644 index 000000000..d1ce07f10 --- /dev/null +++ b/yt_dlp/extractor/masters.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + unified_strdate, +) + + +class MastersIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?masters\.com/en_US/watch/(?P<date>\d{4}-\d{2}-\d{2})/(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.masters.com/en_US/watch/2022-04-07/16493755593805191/sungjae_im_thursday_interview_2022.html', + 'info_dict': { + 'id': '16493755593805191', + 'ext': 'mp4', + 'title': 'Sungjae Im: Thursday Interview 2022', + 'upload_date': '20220407', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id, upload_date = self._match_valid_url(url).group('id', 'date') + content_resp = self._download_json( + f'https://www.masters.com/relatedcontent/rest/v2/masters_v1/en/content/masters_v1_{video_id}_en', + video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles(traverse_obj(content_resp, ('media', 'm3u8')), video_id, 'mp4') + self._sort_formats(formats) + + thumbnails = [{'id': name, 'url': url} for name, url in traverse_obj(content_resp, ('images', 0), default={}).items()] + + return { + 'id': video_id, + 'title': content_resp.get('title'), + 'formats': formats, + 'subtitles': subtitles, + 'upload_date': unified_strdate(upload_date), + 'thumbnails': thumbnails, + } diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index e003b8d25..94ae20b26 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/mdr.py b/yt_dlp/extractor/mdr.py index 3ca174c2b..b44cf809a 100644 --- a/yt_dlp/extractor/mdr.py +++ b/yt_dlp/extractor/mdr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/medaltv.py b/yt_dlp/extractor/medaltv.py index 59cc30736..527b50cb0 100644 --- a/yt_dlp/extractor/medaltv.py +++ b/yt_dlp/extractor/medaltv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaite.py b/yt_dlp/extractor/mediaite.py index b670f0d61..0f9079b11 100644 --- a/yt_dlp/extractor/mediaite.py +++ b/yt_dlp/extractor/mediaite.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index 18ff3befa..f9a449377 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from ..utils import ( unified_strdate ) diff --git a/yt_dlp/extractor/medialaan.py b/yt_dlp/extractor/medialaan.py index 788acf7fb..297f8c4b2 100644 --- a/yt_dlp/extractor/medialaan.py +++ b/yt_dlp/extractor/medialaan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mediaset.py b/yt_dlp/extractor/mediaset.py index d6b456c5d..60c454dda 100644 --- a/yt_dlp/extractor/mediaset.py +++ b/yt_dlp/extractor/mediaset.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/mediasite.py b/yt_dlp/extractor/mediasite.py index fbf9223b2..30464bad0 100644 --- a/yt_dlp/extractor/mediasite.py +++ b/yt_dlp/extractor/mediasite.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/medici.py b/yt_dlp/extractor/medici.py index cd910238e..328ccd2c9 100644 --- a/yt_dlp/extractor/medici.py +++ b/yt_dlp/extractor/medici.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/megaphone.py b/yt_dlp/extractor/megaphone.py index 5bafa6cf4..0c150ef45 100644 --- a/yt_dlp/extractor/megaphone.py +++ b/yt_dlp/extractor/megaphone.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/megatvcom.py b/yt_dlp/extractor/megatvcom.py index 0d6793acd..ec481d016 100644 --- a/yt_dlp/extractor/megatvcom.py +++ b/yt_dlp/extractor/megatvcom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/meipai.py b/yt_dlp/extractor/meipai.py index 2445b8b39..95b6dfe52 100644 --- a/yt_dlp/extractor/meipai.py +++ b/yt_dlp/extractor/meipai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/melonvod.py b/yt_dlp/extractor/melonvod.py index bd8cf13ab..0cbc961c4 100644 --- a/yt_dlp/extractor/melonvod.py +++ b/yt_dlp/extractor/melonvod.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/meta.py b/yt_dlp/extractor/meta.py index cdb46e163..7c11e6017 100644 --- a/yt_dlp/extractor/meta.py +++ b/yt_dlp/extractor/meta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .pladform import PladformIE from ..utils import ( diff --git a/yt_dlp/extractor/metacafe.py b/yt_dlp/extractor/metacafe.py index 7b2d4a003..31fec86d2 100644 --- a/yt_dlp/extractor/metacafe.py +++ b/yt_dlp/extractor/metacafe.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/metacritic.py b/yt_dlp/extractor/metacritic.py index 1424288e7..543bdffad 100644 --- a/yt_dlp/extractor/metacritic.py +++ b/yt_dlp/extractor/metacritic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mgoon.py b/yt_dlp/extractor/mgoon.py index 184c311be..c41c51384 100644 --- a/yt_dlp/extractor/mgoon.py +++ b/yt_dlp/extractor/mgoon.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mgtv.py b/yt_dlp/extractor/mgtv.py index 4ac70ea57..96f3fb982 100644 --- a/yt_dlp/extractor/mgtv.py +++ b/yt_dlp/extractor/mgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import time import uuid diff --git a/yt_dlp/extractor/miaopai.py b/yt_dlp/extractor/miaopai.py index cf0610bdf..329ce3658 100644 --- a/yt_dlp/extractor/miaopai.py +++ b/yt_dlp/extractor/miaopai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/microsoftstream.py b/yt_dlp/extractor/microsoftstream.py index 4d5a9df1f..2dde82a75 100644 --- a/yt_dlp/extractor/microsoftstream.py +++ b/yt_dlp/extractor/microsoftstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from base64 import b64decode from .common import InfoExtractor diff --git a/yt_dlp/extractor/microsoftvirtualacademy.py b/yt_dlp/extractor/microsoftvirtualacademy.py index 46abd2a6d..f15f00ee5 100644 --- a/yt_dlp/extractor/microsoftvirtualacademy.py +++ b/yt_dlp/extractor/microsoftvirtualacademy.py @@ -1,11 +1,6 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor -from ..compat import ( - compat_xpath, -) from ..utils import ( int_or_none, parse_duration, @@ -70,9 +65,9 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): formats = [] - for sources in settings.findall(compat_xpath('.//MediaSources')): + for sources in settings.findall('.//MediaSources'): sources_type = sources.get('videoType') - for source in sources.findall(compat_xpath('./MediaSource')): + for source in sources.findall('./MediaSource'): video_url = source.text if not video_url or not video_url.startswith('http'): continue @@ -101,7 +96,7 @@ class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE): self._sort_formats(formats) subtitles = {} - for source in settings.findall(compat_xpath('.//MarkerResourceSource')): + for source in settings.findall('.//MarkerResourceSource'): subtitle_url = source.text if not subtitle_url: continue diff --git a/yt_dlp/extractor/mildom.py b/yt_dlp/extractor/mildom.py index 5f2df29c6..c7a61dfa0 100644 --- a/yt_dlp/extractor/mildom.py +++ b/yt_dlp/extractor/mildom.py @@ -1,8 +1,6 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json +import uuid from .common import InfoExtractor from ..utils import ( @@ -11,7 +9,6 @@ from ..utils import ( ExtractorError, float_or_none, OnDemandPagedList, - random_uuidv4, traverse_obj, ) @@ -21,7 +18,7 @@ class MildomBaseIE(InfoExtractor): def _call_api(self, url, video_id, query=None, note='Downloading JSON metadata', body=None): if not self._GUEST_ID: - self._GUEST_ID = f'pc-gp-{random_uuidv4()}' + self._GUEST_ID = f'pc-gp-{str(uuid.uuid4())}' content = self._download_json( url, video_id, note=note, data=json.dumps(body).encode() if body else None, diff --git a/yt_dlp/extractor/minds.py b/yt_dlp/extractor/minds.py index 9da07207b..393d20604 100644 --- a/yt_dlp/extractor/minds.py +++ b/yt_dlp/extractor/minds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/ministrygrid.py b/yt_dlp/extractor/ministrygrid.py index 8ad9239c5..053c6726c 100644 --- a/yt_dlp/extractor/ministrygrid.py +++ b/yt_dlp/extractor/ministrygrid.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/minoto.py b/yt_dlp/extractor/minoto.py index 603ce940b..e799cd3bc 100644 --- a/yt_dlp/extractor/minoto.py +++ b/yt_dlp/extractor/minoto.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/miomio.py b/yt_dlp/extractor/miomio.py index 40f72d66f..a0a041ea5 100644 --- a/yt_dlp/extractor/miomio.py +++ b/yt_dlp/extractor/miomio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random from .common import InfoExtractor diff --git a/yt_dlp/extractor/mirrativ.py b/yt_dlp/extractor/mirrativ.py index 2111de615..8192f2b46 100644 --- a/yt_dlp/extractor/mirrativ.py +++ b/yt_dlp/extractor/mirrativ.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mit.py b/yt_dlp/extractor/mit.py index 60e456978..38cc0c274 100644 --- a/yt_dlp/extractor/mit.py +++ b/yt_dlp/extractor/mit.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/mitele.py b/yt_dlp/extractor/mitele.py index b5937233b..12b2b2432 100644 --- a/yt_dlp/extractor/mitele.py +++ b/yt_dlp/extractor/mitele.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .telecinco import TelecincoIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py index 31f450dfa..3f430a717 100644 --- a/yt_dlp/extractor/mixch.py +++ b/yt_dlp/extractor/mixch.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/mixcloud.py b/yt_dlp/extractor/mixcloud.py index c2dd078ac..796f268f4 100644 --- a/yt_dlp/extractor/mixcloud.py +++ b/yt_dlp/extractor/mixcloud.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools from .common import InfoExtractor @@ -9,7 +7,6 @@ from ..compat import ( compat_ord, compat_str, compat_urllib_parse_unquote, - compat_zip ) from ..utils import ( ExtractorError, @@ -76,7 +73,7 @@ class MixcloudIE(MixcloudBaseIE): """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR.""" return ''.join([ compat_chr(compat_ord(ch) ^ compat_ord(k)) - for ch, k in compat_zip(ciphertext, itertools.cycle(key))]) + for ch, k in zip(ciphertext, itertools.cycle(key))]) def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() diff --git a/yt_dlp/extractor/mlb.py b/yt_dlp/extractor/mlb.py index b69301d97..5fb97083a 100644 --- a/yt_dlp/extractor/mlb.py +++ b/yt_dlp/extractor/mlb.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 1d6d4b804..9383f1358 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/mnet.py b/yt_dlp/extractor/mnet.py index 0e26ca1b3..65e3d476a 100644 --- a/yt_dlp/extractor/mnet.py +++ b/yt_dlp/extractor/mnet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/moevideo.py b/yt_dlp/extractor/moevideo.py index a3f1b3866..fda08cae9 100644 --- a/yt_dlp/extractor/moevideo.py +++ b/yt_dlp/extractor/moevideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/mofosex.py b/yt_dlp/extractor/mofosex.py index 5234cac02..66a098c97 100644 --- a/yt_dlp/extractor/mofosex.py +++ b/yt_dlp/extractor/mofosex.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mojvideo.py b/yt_dlp/extractor/mojvideo.py index 16d94052b..d47ad0742 100644 --- a/yt_dlp/extractor/mojvideo.py +++ b/yt_dlp/extractor/mojvideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/morningstar.py b/yt_dlp/extractor/morningstar.py index 71a22a614..e9fcfe3e2 100644 --- a/yt_dlp/extractor/morningstar.py +++ b/yt_dlp/extractor/morningstar.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/motherless.py b/yt_dlp/extractor/motherless.py index 111c7c544..9e53a8a97 100644 --- a/yt_dlp/extractor/motherless.py +++ b/yt_dlp/extractor/motherless.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import datetime import re diff --git a/yt_dlp/extractor/motorsport.py b/yt_dlp/extractor/motorsport.py index c9d1ab64d..b292aeb9a 100644 --- a/yt_dlp/extractor/motorsport.py +++ b/yt_dlp/extractor/motorsport.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/movieclips.py b/yt_dlp/extractor/movieclips.py index 5453da1ac..4777f440e 100644 --- a/yt_dlp/extractor/movieclips.py +++ b/yt_dlp/extractor/movieclips.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py index 4605d3481..ca541567a 100644 --- a/yt_dlp/extractor/moviepilot.py +++ b/yt_dlp/extractor/moviepilot.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .dailymotion import DailymotionIE from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/moviezine.py b/yt_dlp/extractor/moviezine.py index 730da4bd7..5757322d6 100644 --- a/yt_dlp/extractor/moviezine.py +++ b/yt_dlp/extractor/moviezine.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/movingimage.py b/yt_dlp/extractor/movingimage.py index 4f62d628a..cdd8ba4dc 100644 --- a/yt_dlp/extractor/movingimage.py +++ b/yt_dlp/extractor/movingimage.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( unescapeHTML, diff --git a/yt_dlp/extractor/msn.py b/yt_dlp/extractor/msn.py index f34e2102c..6f4935e51 100644 --- a/yt_dlp/extractor/msn.py +++ b/yt_dlp/extractor/msn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index be5de0a70..d161c33c1 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -1,13 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_xpath, -) +from ..compat import compat_str from ..utils import ( ExtractorError, find_xpath_attr, @@ -167,9 +161,9 @@ class MTVServicesInfoExtractor(InfoExtractor): itemdoc, './/{http://search.yahoo.com/mrss/}category', 'scheme', 'urn:mtvn:video_title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) + title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') if title_el is None: - title_el = itemdoc.find(compat_xpath('.//title')) + title_el = itemdoc.find('.//title') if title_el.text is None: title_el = None diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py index a53929e1b..b9681d1bd 100644 --- a/yt_dlp/extractor/muenchentv.py +++ b/yt_dlp/extractor/muenchentv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/murrtube.py b/yt_dlp/extractor/murrtube.py index 1eb5de660..508d51247 100644 --- a/yt_dlp/extractor/murrtube.py +++ b/yt_dlp/extractor/murrtube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import json diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index 09fadf8d9..289ae5733 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/musicdex.py b/yt_dlp/extractor/musicdex.py index 05f722091..4d8e74f6b 100644 --- a/yt_dlp/extractor/musicdex.py +++ b/yt_dlp/extractor/musicdex.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( date_from_str, diff --git a/yt_dlp/extractor/mwave.py b/yt_dlp/extractor/mwave.py index a67276596..0cbb16736 100644 --- a/yt_dlp/extractor/mwave.py +++ b/yt_dlp/extractor/mwave.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 3c2afd838..cdc340a80 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import try_get diff --git a/yt_dlp/extractor/mychannels.py b/yt_dlp/extractor/mychannels.py index d820d4eb8..8a70c1f7b 100644 --- a/yt_dlp/extractor/mychannels.py +++ b/yt_dlp/extractor/mychannels.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor diff --git a/yt_dlp/extractor/myspace.py b/yt_dlp/extractor/myspace.py index 4227d4248..63d36c30a 100644 --- a/yt_dlp/extractor/myspace.py +++ b/yt_dlp/extractor/myspace.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/myspass.py b/yt_dlp/extractor/myspass.py index 1775d5f0b..28ac982d6 100644 --- a/yt_dlp/extractor/myspass.py +++ b/yt_dlp/extractor/myspass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/myvi.py b/yt_dlp/extractor/myvi.py index 75d286365..b31cf4493 100644 --- a/yt_dlp/extractor/myvi.py +++ b/yt_dlp/extractor/myvi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/myvideoge.py b/yt_dlp/extractor/myvideoge.py index 0a1d7d0cb..513d4cb77 100644 --- a/yt_dlp/extractor/myvideoge.py +++ b/yt_dlp/extractor/myvideoge.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/myvidster.py b/yt_dlp/extractor/myvidster.py index 2117d302d..c91f294bf 100644 --- a/yt_dlp/extractor/myvidster.py +++ b/yt_dlp/extractor/myvidster.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/n1.py b/yt_dlp/extractor/n1.py index fdb7f32db..cc0ff533e 100644 --- a/yt_dlp/extractor/n1.py +++ b/yt_dlp/extractor/n1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nate.py b/yt_dlp/extractor/nate.py index 072faf6ea..c83b2acbd 100644 --- a/yt_dlp/extractor/nate.py +++ b/yt_dlp/extractor/nate.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/nationalgeographic.py b/yt_dlp/extractor/nationalgeographic.py index ee12e2b47..f22317d56 100644 --- a/yt_dlp/extractor/nationalgeographic.py +++ b/yt_dlp/extractor/nationalgeographic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .fox import FOXIE from ..utils import ( diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py index a6821ba86..a230d9cdd 100644 --- a/yt_dlp/extractor/naver.py +++ b/yt_dlp/extractor/naver.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nba.py b/yt_dlp/extractor/nba.py index 359cc52b7..e95c1b795 100644 --- a/yt_dlp/extractor/nba.py +++ b/yt_dlp/extractor/nba.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index 109403440..365c2e60d 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import json import re @@ -581,7 +579,7 @@ class NBCOlympicsStreamIE(AdobePassIE): for f in formats: # -http_seekable requires ffmpeg 4.3+ but it doesnt seem possible to # download with ffmpeg without this option - f['_ffmpeg_args'] = ['-seekable', '0', '-http_seekable', '0', '-icy', '0'] + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0', '-http_seekable', '0', '-icy', '0']} self._sort_formats(formats) return { diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index 1917254b8..de0142ccf 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/ndtv.py b/yt_dlp/extractor/ndtv.py index bc3eb9160..fbb033169 100644 --- a/yt_dlp/extractor/ndtv.py +++ b/yt_dlp/extractor/ndtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse_unquote_plus diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 77f253519..ff9a2adf0 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import time @@ -21,9 +18,8 @@ class NebulaBaseIE(InfoExtractor): _nebula_bearer_token = None _zype_access_token = None - def _perform_nebula_auth(self): - username, password = self._get_login_info() - if not (username and password): + def _perform_nebula_auth(self, username, password): + if not username or not password: self.raise_login_required() data = json.dumps({'email': username, 'password': password}).encode('utf8') @@ -54,7 +50,7 @@ class NebulaBaseIE(InfoExtractor): return response['key'] - def _retrieve_nebula_api_token(self): + def _retrieve_nebula_api_token(self, username=None, password=None): """ Check cookie jar for valid token. Try to authenticate using credentials if no valid token can be found in the cookie jar. @@ -68,7 +64,7 @@ class NebulaBaseIE(InfoExtractor): if nebula_api_token: return nebula_api_token - return self._perform_nebula_auth() + return self._perform_nebula_auth(username, password) def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): assert method in ('GET', 'POST',) @@ -149,8 +145,7 @@ class NebulaBaseIE(InfoExtractor): } def _perform_login(self, username=None, password=None): - # FIXME: username should be passed from here to inner functions - self._nebula_api_token = self._retrieve_nebula_api_token() + self._nebula_api_token = self._retrieve_nebula_api_token(username, password) self._nebula_bearer_token = self._fetch_nebula_bearer_token() self._zype_access_token = self._fetch_zype_access_token() @@ -160,7 +155,7 @@ class NebulaIE(NebulaBaseIE): _TESTS = [ { 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', - 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e', + 'md5': '14944cfee8c7beeea106320c47560efc', 'info_dict': { 'id': '5c271b40b13fd613090034fd', 'ext': 'mp4', @@ -172,14 +167,21 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', 'uploader_id': 'lindsayellis', - }, - 'params': { - 'usenetrc': True, + 'timestamp': 1533009600, + 'uploader_url': 'https://nebula.app/lindsayellis', + 'series': 'Lindsay Ellis', + 'average_rating': int, + 'display_id': 'that-time-disney-remade-beauty-and-the-beast', + 'channel_url': 'https://nebula.app/lindsayellis', + 'creator': 'Lindsay Ellis', + 'duration': 2212, + 'view_count': int, + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', }, }, { 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', - 'md5': '6d4edd14ce65720fa63aba5c583fb328', + 'md5': 'd05739cf6c38c09322422f696b569c23', 'info_dict': { 'id': '5e7e78171aaf320001fbd6be', 'ext': 'mp4', @@ -191,14 +193,20 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'realengineering', 'uploader': 'Real Engineering', 'uploader_id': 'realengineering', - }, - 'params': { - 'usenetrc': True, + 'view_count': int, + 'series': 'Real Engineering', + 'average_rating': int, + 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', + 'creator': 'Real Engineering', + 'duration': 841, + 'channel_url': 'https://nebula.app/realengineering', + 'uploader_url': 'https://nebula.app/realengineering', + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', }, }, { 'url': 'https://nebula.app/videos/money-episode-1-the-draw', - 'md5': '8c7d272910eea320f6f8e6d3084eecf5', + 'md5': 'ebe28a7ad822b9ee172387d860487868', 'info_dict': { 'id': '5e779ebdd157bc0001d1c75a', 'ext': 'mp4', @@ -210,9 +218,15 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'tom-scott-presents-money', 'uploader': 'Tom Scott Presents: Money', 'uploader_id': 'tom-scott-presents-money', - }, - 'params': { - 'usenetrc': True, + 'uploader_url': 'https://nebula.app/tom-scott-presents-money', + 'duration': 825, + 'channel_url': 'https://nebula.app/tom-scott-presents-money', + 'view_count': int, + 'series': 'Tom Scott Presents: Money', + 'display_id': 'money-episode-1-the-draw', + 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', + 'average_rating': int, + 'creator': 'Tom Scott Presents: Money', }, }, { @@ -233,9 +247,37 @@ class NebulaIE(NebulaBaseIE): return self._build_video_info(video) -class NebulaCollectionIE(NebulaBaseIE): - IE_NAME = 'nebula:collection' - _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)' +class NebulaSubscriptionsIE(NebulaBaseIE): + IE_NAME = 'nebula:subscriptions' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/myshows' + _TESTS = [ + { + 'url': 'https://nebula.app/myshows', + 'playlist_mincount': 1, + 'info_dict': { + 'id': 'myshows', + }, + }, + ] + + def _generate_playlist_entries(self): + next_url = 'https://content.watchnebula.com/library/video/?page_size=100' + page_num = 1 + while next_url: + channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer', + note=f'Retrieving subscriptions page {page_num}') + for episode in channel['results']: + yield self._build_video_info(episode) + next_url = channel['next'] + page_num += 1 + + def _real_extract(self, url): + return self.playlist_result(self._generate_playlist_entries(), 'myshows') + + +class NebulaChannelIE(NebulaBaseIE): + IE_NAME = 'nebula:channel' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!myshows|videos/)(?P<id>[-\w]+)' _TESTS = [ { 'url': 'https://nebula.app/tom-scott-presents-money', @@ -245,9 +287,6 @@ class NebulaCollectionIE(NebulaBaseIE): 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', }, 'playlist_count': 5, - 'params': { - 'usenetrc': True, - }, }, { 'url': 'https://nebula.app/lindsayellis', 'info_dict': { @@ -256,9 +295,6 @@ class NebulaCollectionIE(NebulaBaseIE): 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', }, 'playlist_mincount': 100, - 'params': { - 'usenetrc': True, - }, }, ] diff --git a/yt_dlp/extractor/nerdcubed.py b/yt_dlp/extractor/nerdcubed.py index 9feccc672..7c801b5d3 100644 --- a/yt_dlp/extractor/nerdcubed.py +++ b/yt_dlp/extractor/nerdcubed.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime from .common import InfoExtractor diff --git a/yt_dlp/extractor/neteasemusic.py b/yt_dlp/extractor/neteasemusic.py index 57b4774b6..4def7e76b 100644 --- a/yt_dlp/extractor/neteasemusic.py +++ b/yt_dlp/extractor/neteasemusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from hashlib import md5 from base64 import b64encode from datetime import datetime diff --git a/yt_dlp/extractor/netzkino.py b/yt_dlp/extractor/netzkino.py index 4ad0d8e96..49b29b67c 100644 --- a/yt_dlp/extractor/netzkino.py +++ b/yt_dlp/extractor/netzkino.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/newgrounds.py b/yt_dlp/extractor/newgrounds.py index 6525a6d8a..ba24720e3 100644 --- a/yt_dlp/extractor/newgrounds.py +++ b/yt_dlp/extractor/newgrounds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/newstube.py b/yt_dlp/extractor/newstube.py index 479141ae0..20db46057 100644 --- a/yt_dlp/extractor/newstube.py +++ b/yt_dlp/extractor/newstube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import hashlib diff --git a/yt_dlp/extractor/newsy.py b/yt_dlp/extractor/newsy.py index cf3164100..9fde6c079 100644 --- a/yt_dlp/extractor/newsy.py +++ b/yt_dlp/extractor/newsy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/nextmedia.py b/yt_dlp/extractor/nextmedia.py index 7bd1290bf..1f83089fc 100644 --- a/yt_dlp/extractor/nextmedia.py +++ b/yt_dlp/extractor/nextmedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index a521bb6e4..01376be3d 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import random import re diff --git a/yt_dlp/extractor/nfb.py b/yt_dlp/extractor/nfb.py index a12e503de..79c6aaf0c 100644 --- a/yt_dlp/extractor/nfb.py +++ b/yt_dlp/extractor/nfb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/nfhsnetwork.py b/yt_dlp/extractor/nfhsnetwork.py index 802f6caf0..e6f98b036 100644 --- a/yt_dlp/extractor/nfhsnetwork.py +++ b/yt_dlp/extractor/nfhsnetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index 821276a31..e5810b346 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index 3b8efc3e6..cf2ec7b79 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nhl.py b/yt_dlp/extractor/nhl.py index d3a5e17e9..884f9e2ae 100644 --- a/yt_dlp/extractor/nhl.py +++ b/yt_dlp/extractor/nhl.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/nick.py b/yt_dlp/extractor/nick.py index ba7da7602..2a228d8de 100644 --- a/yt_dlp/extractor/nick.py +++ b/yt_dlp/extractor/nick.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .mtv import MTVServicesInfoExtractor from ..utils import update_url_query diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4eb6ed070..a80b544f8 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import functools import itertools @@ -10,8 +7,6 @@ import time from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, compat_HTTPError, ) from ..utils import ( @@ -35,6 +30,7 @@ from ..utils import ( update_url_query, url_or_none, urlencode_postdata, + urljoin, ) @@ -195,7 +191,7 @@ class NiconicoIE(InfoExtractor): self._request_webpage( 'https://account.nicovideo.jp/login', None, note='Acquiring Login session') - urlh = self._request_webpage( + page = self._download_webpage( 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata(login_form_strs), @@ -203,19 +199,32 @@ class NiconicoIE(InfoExtractor): 'Referer': 'https://account.nicovideo.jp/login', 'Content-Type': 'application/x-www-form-urlencoded', }) - if urlh is False: - login_ok = False - else: - parts = compat_urllib_parse_urlparse(urlh.geturl()) - if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': - login_ok = False + if 'oneTimePw' in page: + post_url = self._search_regex( + r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url') + page = self._download_webpage( + urljoin('https://account.nicovideo.jp', post_url), None, + note='Performing MFA', errnote='Unable to complete MFA', + data=urlencode_postdata({ + 'otp': self._get_tfa_info('6 digits code') + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if 'oneTimePw' in page or 'formError' in page: + err_msg = self._html_search_regex( + r'formError["\']+>(.*?)</div>', page, 'form_error', + default='There\'s an error but the message can\'t be parsed.', + flags=re.DOTALL) + self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') + return False + login_ok = 'class="notice error"' not in page if not login_ok: - self.report_warning('unable to log in: bad username or password') + self.report_warning('Unable to log in: bad username or password') return login_ok def _get_heartbeat_info(self, info_dict): video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') - dmc_protocol = info_dict['_expected_protocol'] + dmc_protocol = info_dict['expected_protocol'] api_data = ( info_dict.get('_api_data') @@ -369,7 +378,7 @@ class NiconicoIE(InfoExtractor): 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), 'quality': -2 if 'low' in video_quality['id'] else None, 'protocol': 'niconico_dmc', - '_expected_protocol': dmc_protocol, + 'expected_protocol': dmc_protocol, # XXX: This is not a documented field 'http_headers': { 'Origin': 'https://www.nicovideo.jp', 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 781842721..462caf466 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/ninegag.py b/yt_dlp/extractor/ninegag.py index 14390823b..00ca95ea2 100644 --- a/yt_dlp/extractor/ninegag.py +++ b/yt_dlp/extractor/ninegag.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ninenow.py b/yt_dlp/extractor/ninenow.py index 6043674ba..b970f8ccb 100644 --- a/yt_dlp/extractor/ninenow.py +++ b/yt_dlp/extractor/ninenow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/nintendo.py b/yt_dlp/extractor/nintendo.py index ff8f70ba6..ed839af25 100644 --- a/yt_dlp/extractor/nintendo.py +++ b/yt_dlp/extractor/nintendo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nitter.py b/yt_dlp/extractor/nitter.py index 8bb709cd7..251bf444f 100644 --- a/yt_dlp/extractor/nitter.py +++ b/yt_dlp/extractor/nitter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/njpwworld.py b/yt_dlp/extractor/njpwworld.py index 68c8c8e52..e761cf257 100644 --- a/yt_dlp/extractor/njpwworld.py +++ b/yt_dlp/extractor/njpwworld.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index 4dfdb09d6..35b64530f 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( js_to_json, diff --git a/yt_dlp/extractor/noco.py b/yt_dlp/extractor/noco.py deleted file mode 100644 index 28af909d5..000000000 --- a/yt_dlp/extractor/noco.py +++ /dev/null @@ -1,228 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re -import time -import hashlib - -from .common import InfoExtractor -from ..compat import ( - compat_str, -) -from ..utils import ( - clean_html, - ExtractorError, - int_or_none, - float_or_none, - parse_iso8601, - parse_qs, - sanitized_Request, - urlencode_postdata, -) - - -class NocoIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www\.)?noco\.tv/emission/|player\.noco\.tv/\?idvideo=)(?P<id>\d+)' - _LOGIN_URL = 'https://noco.tv/do.php' - _API_URL_TEMPLATE = 'https://api.noco.tv/1.1/%s?ts=%s&tk=%s' - _SUB_LANG_TEMPLATE = '&sub_lang=%s' - _NETRC_MACHINE = 'noco' - - _TESTS = [ - { - 'url': 'http://noco.tv/emission/11538/nolife/ami-ami-idol-hello-france/', - 'md5': '0a993f0058ddbcd902630b2047ef710e', - 'info_dict': { - 'id': '11538', - 'ext': 'mp4', - 'title': 'Ami Ami Idol - Hello! France', - 'description': 'md5:4eaab46ab68fa4197a317a88a53d3b86', - 'upload_date': '20140412', - 'uploader': 'Nolife', - 'uploader_id': 'NOL', - 'duration': 2851.2, - }, - 'skip': 'Requires noco account', - }, - { - 'url': 'http://noco.tv/emission/12610/lbl42/the-guild/s01e01-wake-up-call', - 'md5': 'c190f1f48e313c55838f1f412225934d', - 'info_dict': { - 'id': '12610', - 'ext': 'mp4', - 'title': 'The Guild #1 - Wake-Up Call', - 'timestamp': 1403863200, - 'upload_date': '20140627', - 'uploader': 'LBL42', - 'uploader_id': 'LBL', - 'duration': 233.023, - }, - 'skip': 'Requires noco account', - } - ] - - def _perform_login(self, username, password): - login = self._download_json( - self._LOGIN_URL, None, 'Logging in', - data=urlencode_postdata({ - 'a': 'login', - 'cookie': '1', - 'username': username, - 'password': password, - }), - headers={ - 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', - }) - - if 'erreur' in login: - raise ExtractorError('Unable to login: %s' % clean_html(login['erreur']), expected=True) - - @staticmethod - def _ts(): - return int(time.time() * 1000) - - def _call_api(self, path, video_id, note, sub_lang=None): - ts = compat_str(self._ts() + self._ts_offset) - tk = hashlib.md5((hashlib.md5(ts.encode('ascii')).hexdigest() + '#8S?uCraTedap6a').encode('ascii')).hexdigest() - url = self._API_URL_TEMPLATE % (path, ts, tk) - if sub_lang: - url += self._SUB_LANG_TEMPLATE % sub_lang - - request = sanitized_Request(url) - request.add_header('Referer', self._referer) - - resp = self._download_json(request, video_id, note) - - if isinstance(resp, dict) and resp.get('error'): - self._raise_error(resp['error'], resp['description']) - - return resp - - def _raise_error(self, error, description): - raise ExtractorError( - '%s returned error: %s - %s' % (self.IE_NAME, error, description), - expected=True) - - def _real_extract(self, url): - video_id = self._match_id(url) - - # Timestamp adjustment offset between server time and local time - # must be calculated in order to use timestamps closest to server's - # in all API requests (see https://github.com/ytdl-org/youtube-dl/issues/7864) - webpage = self._download_webpage(url, video_id) - - player_url = self._search_regex( - r'(["\'])(?P<player>https?://noco\.tv/(?:[^/]+/)+NocoPlayer.+?\.swf.*?)\1', - webpage, 'noco player', group='player', - default='http://noco.tv/cdata/js/player/NocoPlayer-v1.2.40.swf') - - qs = parse_qs(player_url) - ts = int_or_none(qs.get('ts', [None])[0]) - self._ts_offset = ts - self._ts() if ts else 0 - self._referer = player_url - - medias = self._call_api( - 'shows/%s/medias' % video_id, - video_id, 'Downloading video JSON') - - show = self._call_api( - 'shows/by_id/%s' % video_id, - video_id, 'Downloading show JSON')[0] - - options = self._call_api( - 'users/init', video_id, - 'Downloading user options JSON')['options'] - audio_lang_pref = options.get('audio_language') or options.get('language', 'fr') - - if audio_lang_pref == 'original': - audio_lang_pref = show['original_lang'] - if len(medias) == 1: - audio_lang_pref = list(medias.keys())[0] - elif audio_lang_pref not in medias: - audio_lang_pref = 'fr' - - qualities = self._call_api( - 'qualities', - video_id, 'Downloading qualities JSON') - - formats = [] - - for audio_lang, audio_lang_dict in medias.items(): - preference = 1 if audio_lang == audio_lang_pref else 0 - for sub_lang, lang_dict in audio_lang_dict['video_list'].items(): - for format_id, fmt in lang_dict['quality_list'].items(): - format_id_extended = 'audio-%s_sub-%s_%s' % (audio_lang, sub_lang, format_id) - - video = self._call_api( - 'shows/%s/video/%s/%s' % (video_id, format_id.lower(), audio_lang), - video_id, 'Downloading %s video JSON' % format_id_extended, - sub_lang if sub_lang != 'none' else None) - - file_url = video['file'] - if not file_url: - continue - - if file_url in ['forbidden', 'not found']: - popmessage = video['popmessage'] - self._raise_error(popmessage['title'], popmessage['message']) - - formats.append({ - 'url': file_url, - 'format_id': format_id_extended, - 'width': int_or_none(fmt.get('res_width')), - 'height': int_or_none(fmt.get('res_lines')), - 'abr': int_or_none(fmt.get('audiobitrate'), 1000), - 'vbr': int_or_none(fmt.get('videobitrate'), 1000), - 'filesize': int_or_none(fmt.get('filesize')), - 'format_note': qualities[format_id].get('quality_name'), - 'quality': qualities[format_id].get('priority'), - 'language_preference': preference, - }) - - self._sort_formats(formats) - - timestamp = parse_iso8601(show.get('online_date_start_utc'), ' ') - - if timestamp is not None and timestamp < 0: - timestamp = None - - uploader = show.get('partner_name') - uploader_id = show.get('partner_key') - duration = float_or_none(show.get('duration_ms'), 1000) - - thumbnails = [] - for thumbnail_key, thumbnail_url in show.items(): - m = re.search(r'^screenshot_(?P<width>\d+)x(?P<height>\d+)$', thumbnail_key) - if not m: - continue - thumbnails.append({ - 'url': thumbnail_url, - 'width': int(m.group('width')), - 'height': int(m.group('height')), - }) - - episode = show.get('show_TT') or show.get('show_OT') - family = show.get('family_TT') or show.get('family_OT') - episode_number = show.get('episode_number') - - title = '' - if family: - title += family - if episode_number: - title += ' #' + compat_str(episode_number) - if episode: - title += ' - ' + compat_str(episode) - - description = show.get('show_resume') or show.get('family_resume') - - return { - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnails': thumbnails, - 'timestamp': timestamp, - 'uploader': uploader, - 'uploader_id': uploader_id, - 'duration': duration, - 'formats': formats, - } diff --git a/yt_dlp/extractor/nonktube.py b/yt_dlp/extractor/nonktube.py index ca1424e06..f191be33b 100644 --- a/yt_dlp/extractor/nonktube.py +++ b/yt_dlp/extractor/nonktube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .nuevo import NuevoBaseIE diff --git a/yt_dlp/extractor/noodlemagazine.py b/yt_dlp/extractor/noodlemagazine.py index 2f170bbfe..3e04da67e 100644 --- a/yt_dlp/extractor/noodlemagazine.py +++ b/yt_dlp/extractor/noodlemagazine.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/noovo.py b/yt_dlp/extractor/noovo.py index b40770d07..acbb74c6e 100644 --- a/yt_dlp/extractor/noovo.py +++ b/yt_dlp/extractor/noovo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/normalboots.py b/yt_dlp/extractor/normalboots.py index 61fe571df..07babcd2c 100644 --- a/yt_dlp/extractor/normalboots.py +++ b/yt_dlp/extractor/normalboots.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/nosvideo.py b/yt_dlp/extractor/nosvideo.py index 53c500c35..b6d3ea40c 100644 --- a/yt_dlp/extractor/nosvideo.py +++ b/yt_dlp/extractor/nosvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 00a64f88d..6875d26ba 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/novaplay.py b/yt_dlp/extractor/novaplay.py index bfb2c8751..4f1a84651 100644 --- a/yt_dlp/extractor/novaplay.py +++ b/yt_dlp/extractor/novaplay.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none, parse_duration, parse_iso8601 diff --git a/yt_dlp/extractor/nowness.py b/yt_dlp/extractor/nowness.py index b2c715f41..fc9043bce 100644 --- a/yt_dlp/extractor/nowness.py +++ b/yt_dlp/extractor/nowness.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, diff --git a/yt_dlp/extractor/noz.py b/yt_dlp/extractor/noz.py index ccafd7723..b42a56f7e 100644 --- a/yt_dlp/extractor/noz.py +++ b/yt_dlp/extractor/noz.py @@ -1,17 +1,11 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse_unquote, - compat_xpath, -) from ..utils import ( int_or_none, find_xpath_attr, xpath_text, update_url_query, ) +from ..compat import compat_urllib_parse_unquote class NozIE(InfoExtractor): @@ -50,7 +44,7 @@ class NozIE(InfoExtractor): duration = int_or_none(xpath_text( doc, './/article/movie/file/duration')) formats = [] - for qnode in doc.findall(compat_xpath('.//article/movie/file/qualities/qual')): + for qnode in doc.findall('.//article/movie/file/qualities/qual'): http_url_ele = find_xpath_attr( qnode, './html_urls/video_url', 'format', 'video/mp4') http_url = http_url_ele.text if http_url_ele is not None else None diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index a8aaef6f1..0b5f32c2e 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/npr.py b/yt_dlp/extractor/npr.py index 49f062d7a..6d93f154c 100644 --- a/yt_dlp/extractor/npr.py +++ b/yt_dlp/extractor/npr.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 0cf26d598..553c55132 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import re diff --git a/yt_dlp/extractor/nrl.py b/yt_dlp/extractor/nrl.py index 0bd5086ae..798d03417 100644 --- a/yt_dlp/extractor/nrl.py +++ b/yt_dlp/extractor/nrl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py index c9af91188..422ec6eb0 100644 --- a/yt_dlp/extractor/ntvcojp.py +++ b/yt_dlp/extractor/ntvcojp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ntvde.py b/yt_dlp/extractor/ntvde.py index 035582ee8..d252ced86 100644 --- a/yt_dlp/extractor/ntvde.py +++ b/yt_dlp/extractor/ntvde.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ntvru.py b/yt_dlp/extractor/ntvru.py index c47d1dfa4..c8df110e8 100644 --- a/yt_dlp/extractor/ntvru.py +++ b/yt_dlp/extractor/ntvru.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/nuevo.py b/yt_dlp/extractor/nuevo.py index be1e09d37..ec54041f1 100644 --- a/yt_dlp/extractor/nuevo.py +++ b/yt_dlp/extractor/nuevo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/nuvid.py b/yt_dlp/extractor/nuvid.py index 84fb97d6a..fafcc8f4b 100644 --- a/yt_dlp/extractor/nuvid.py +++ b/yt_dlp/extractor/nuvid.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/nytimes.py b/yt_dlp/extractor/nytimes.py index 99964737d..f388688c4 100644 --- a/yt_dlp/extractor/nytimes.py +++ b/yt_dlp/extractor/nytimes.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hmac import hashlib import base64 diff --git a/yt_dlp/extractor/nzherald.py b/yt_dlp/extractor/nzherald.py index e5601b495..7c9efd922 100644 --- a/yt_dlp/extractor/nzherald.py +++ b/yt_dlp/extractor/nzherald.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .brightcove import BrightcoveNewIE from .common import InfoExtractor diff --git a/yt_dlp/extractor/nzz.py b/yt_dlp/extractor/nzz.py index 61ee77adb..ac3b73156 100644 --- a/yt_dlp/extractor/nzz.py +++ b/yt_dlp/extractor/nzz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/odatv.py b/yt_dlp/extractor/odatv.py index 314527f98..24ab93942 100644 --- a/yt_dlp/extractor/odatv.py +++ b/yt_dlp/extractor/odatv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 293f1aa60..36a7f5f4e 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/oktoberfesttv.py b/yt_dlp/extractor/oktoberfesttv.py index 276567436..e0ac8563a 100644 --- a/yt_dlp/extractor/oktoberfesttv.py +++ b/yt_dlp/extractor/oktoberfesttv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 784f282c7..42ea94905 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, @@ -56,7 +53,7 @@ class OlympicsReplayIE(InfoExtractor): }) m3u8_url = self._download_json( f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, 'mp4', m3u8_id='hls') self._sort_formats(formats) return { diff --git a/yt_dlp/extractor/on24.py b/yt_dlp/extractor/on24.py index d4d824430..779becc70 100644 --- a/yt_dlp/extractor/on24.py +++ b/yt_dlp/extractor/on24.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/once.py b/yt_dlp/extractor/once.py index 3e44b7829..460b82d02 100644 --- a/yt_dlp/extractor/once.py +++ b/yt_dlp/extractor/once.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ondemandkorea.py b/yt_dlp/extractor/ondemandkorea.py index e933ea2cc..84687ef47 100644 --- a/yt_dlp/extractor/ondemandkorea.py +++ b/yt_dlp/extractor/ondemandkorea.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 826faadd2..41815bef1 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/onet.py b/yt_dlp/extractor/onet.py index 95177a213..ea46d7def 100644 --- a/yt_dlp/extractor/onet.py +++ b/yt_dlp/extractor/onet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/onionstudios.py b/yt_dlp/extractor/onionstudios.py index cf5c39e66..9776b4d97 100644 --- a/yt_dlp/extractor/onionstudios.py +++ b/yt_dlp/extractor/onionstudios.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ooyala.py b/yt_dlp/extractor/ooyala.py index 20cfa0a87..77017f08b 100644 --- a/yt_dlp/extractor/ooyala.py +++ b/yt_dlp/extractor/ooyala.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import base64 import re diff --git a/yt_dlp/extractor/opencast.py b/yt_dlp/extractor/opencast.py index cf8d91717..c640224dd 100644 --- a/yt_dlp/extractor/opencast.py +++ b/yt_dlp/extractor/opencast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/openload.py b/yt_dlp/extractor/openload.py index fe4740aae..61e3a8b86 100644 --- a/yt_dlp/extractor/openload.py +++ b/yt_dlp/extractor/openload.py @@ -1,22 +1,17 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import contextlib import json import os import subprocess import tempfile -from ..compat import ( - compat_urlparse, - compat_kwargs, -) +from ..compat import compat_urlparse from ..utils import ( + ExtractorError, + Popen, check_executable, encodeArgument, - ExtractorError, get_exe_version, is_outdated_version, - Popen, ) @@ -37,13 +32,11 @@ def cookie_to_dict(cookie): cookie_dict['secure'] = cookie.secure if cookie.discard is not None: cookie_dict['discard'] = cookie.discard - try: + with contextlib.suppress(TypeError): if (cookie.has_nonstandard_attr('httpOnly') or cookie.has_nonstandard_attr('httponly') or cookie.has_nonstandard_attr('HttpOnly')): cookie_dict['httponly'] = True - except TypeError: - pass return cookie_dict @@ -51,7 +44,7 @@ def cookie_jar_to_list(cookie_jar): return [cookie_to_dict(cookie) for cookie in cookie_jar] -class PhantomJSwrapper(object): +class PhantomJSwrapper: """PhantomJS wrapper class This class is experimental. @@ -135,10 +128,8 @@ class PhantomJSwrapper(object): def __del__(self): for name in self._TMP_FILE_NAMES: - try: + with contextlib.suppress(OSError, KeyError): os.remove(self._TMP_FILES[name].name) - except (IOError, OSError, KeyError): - pass def _save_cookies(self, url): cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar) @@ -158,7 +149,7 @@ class PhantomJSwrapper(object): cookie['rest'] = {'httpOnly': None} if 'expiry' in cookie: cookie['expire_time'] = cookie['expiry'] - self.extractor._set_cookie(**compat_kwargs(cookie)) + self.extractor._set_cookie(**cookie) def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'): """ @@ -218,9 +209,9 @@ class PhantomJSwrapper(object): f.write(self._TEMPLATE.format(**replaces).encode('utf-8')) if video_id is None: - self.extractor.to_screen('%s' % (note2,)) + self.extractor.to_screen(f'{note2}') else: - self.extractor.to_screen('%s: %s' % (video_id, note2)) + self.extractor.to_screen(f'{video_id}: {note2}') p = Popen( [self.exe, '--ssl-protocol=any', self._TMP_FILES['script'].name], diff --git a/yt_dlp/extractor/openrec.py b/yt_dlp/extractor/openrec.py index 5eb1cdbad..6c1eb8f3a 100644 --- a/yt_dlp/extractor/openrec.py +++ b/yt_dlp/extractor/openrec.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -38,8 +35,8 @@ class OpenRecBaseIE(InfoExtractor): raise ExtractorError(f'Failed to extract {name} info') formats = list(self._expand_media(video_id, get_first(movie_stores, 'media'))) - if not formats and is_live: - # archived livestreams + if not formats: + # archived livestreams or subscriber-only videos cookies = self._get_cookies('https://www.openrec.tv/') detail = self._download_json( f'https://apiv5.openrec.tv/api/v5/movies/{video_id}/detail', video_id, diff --git a/yt_dlp/extractor/ora.py b/yt_dlp/extractor/ora.py index 422d0b330..09b121422 100644 --- a/yt_dlp/extractor/ora.py +++ b/yt_dlp/extractor/ora.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import compat_urlparse diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py index 0628977a0..56309ffcb 100644 --- a/yt_dlp/extractor/orf.py +++ b/yt_dlp/extractor/orf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/outsidetv.py b/yt_dlp/extractor/outsidetv.py index c5333b08c..b1fcbd6a7 100644 --- a/yt_dlp/extractor/outsidetv.py +++ b/yt_dlp/extractor/outsidetv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/packtpub.py b/yt_dlp/extractor/packtpub.py index 62c52cd6e..51778d8a2 100644 --- a/yt_dlp/extractor/packtpub.py +++ b/yt_dlp/extractor/packtpub.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/palcomp3.py b/yt_dlp/extractor/palcomp3.py index d0a62fb17..4b0801c1a 100644 --- a/yt_dlp/extractor/palcomp3.py +++ b/yt_dlp/extractor/palcomp3.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/pandoratv.py b/yt_dlp/extractor/pandoratv.py index 623005338..3747f31d2 100644 --- a/yt_dlp/extractor/pandoratv.py +++ b/yt_dlp/extractor/pandoratv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/paramountplus.py b/yt_dlp/extractor/paramountplus.py index 94a9319ea..7987d77c6 100644 --- a/yt_dlp/extractor/paramountplus.py +++ b/yt_dlp/extractor/paramountplus.py @@ -1,4 +1,3 @@ -from __future__ import unicode_literals import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/parliamentliveuk.py b/yt_dlp/extractor/parliamentliveuk.py index 974d65482..38cb03164 100644 --- a/yt_dlp/extractor/parliamentliveuk.py +++ b/yt_dlp/extractor/parliamentliveuk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import uuid diff --git a/yt_dlp/extractor/parlview.py b/yt_dlp/extractor/parlview.py index c85eaa7dc..f31ae576c 100644 --- a/yt_dlp/extractor/parlview.py +++ b/yt_dlp/extractor/parlview.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index 963a0d6fb..cce9843d4 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index e48a2b8e0..4e6674e85 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pearvideo.py b/yt_dlp/extractor/pearvideo.py index 1d777221c..e76305acd 100644 --- a/yt_dlp/extractor/pearvideo.py +++ b/yt_dlp/extractor/pearvideo.py @@ -1,12 +1,10 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..utils import ( qualities, unified_timestamp, + traverse_obj, ) @@ -39,6 +37,14 @@ class PearVideoIE(InfoExtractor): } for mobj in re.finditer( r'(?P<id>[a-zA-Z]+)Url\s*=\s*(["\'])(?P<url>(?:https?:)?//.+?)\2', webpage)] + if not formats: + info = self._download_json( + 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id, + query={'contId': video_id}, headers={'Referer': url}) + formats = [{ + 'format_id': k, + 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v + } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] self._sort_formats(formats) title = self._search_regex( diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index 4bf68559a..f1c4469d6 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/peertube.py b/yt_dlp/extractor/peertube.py index 9d6b82178..0d3bc18a8 100644 --- a/yt_dlp/extractor/peertube.py +++ b/yt_dlp/extractor/peertube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py index 002d33a88..821abe496 100644 --- a/yt_dlp/extractor/peertv.py +++ b/yt_dlp/extractor/peertv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json diff --git a/yt_dlp/extractor/peloton.py b/yt_dlp/extractor/peloton.py index 7d832253f..8e50ffc7f 100644 --- a/yt_dlp/extractor/peloton.py +++ b/yt_dlp/extractor/peloton.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/people.py b/yt_dlp/extractor/people.py index 6ca95715e..c5143c3ed 100644 --- a/yt_dlp/extractor/people.py +++ b/yt_dlp/extractor/people.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/performgroup.py b/yt_dlp/extractor/performgroup.py index c00d39375..824495f40 100644 --- a/yt_dlp/extractor/performgroup.py +++ b/yt_dlp/extractor/performgroup.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/periscope.py b/yt_dlp/extractor/periscope.py index 1a292b8ac..fc8591a2c 100644 --- a/yt_dlp/extractor/periscope.py +++ b/yt_dlp/extractor/periscope.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/philharmoniedeparis.py b/yt_dlp/extractor/philharmoniedeparis.py index 9f4899c09..22164caaa 100644 --- a/yt_dlp/extractor/philharmoniedeparis.py +++ b/yt_dlp/extractor/philharmoniedeparis.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/phoenix.py b/yt_dlp/extractor/phoenix.py index e3ea01443..5fa133afe 100644 --- a/yt_dlp/extractor/phoenix.py +++ b/yt_dlp/extractor/phoenix.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/photobucket.py b/yt_dlp/extractor/photobucket.py index 53aebe2d9..71e9a4805 100644 --- a/yt_dlp/extractor/photobucket.py +++ b/yt_dlp/extractor/photobucket.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index ae160623b..d8d9c7801 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index adf21fda8..54999a832 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/piksel.py b/yt_dlp/extractor/piksel.py index 84c3de2f0..14a540859 100644 --- a/yt_dlp/extractor/piksel.py +++ b/yt_dlp/extractor/piksel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pinkbike.py b/yt_dlp/extractor/pinkbike.py index 9f3501f77..313b5cce0 100644 --- a/yt_dlp/extractor/pinkbike.py +++ b/yt_dlp/extractor/pinkbike.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pinterest.py b/yt_dlp/extractor/pinterest.py index 80e9cd00e..171f9e4eb 100644 --- a/yt_dlp/extractor/pinterest.py +++ b/yt_dlp/extractor/pinterest.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pixivsketch.py b/yt_dlp/extractor/pixivsketch.py index f0ad0b24a..bfdb8b24e 100644 --- a/yt_dlp/extractor/pixivsketch.py +++ b/yt_dlp/extractor/pixivsketch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/pladform.py b/yt_dlp/extractor/pladform.py index 99ade85ec..301f5c838 100644 --- a/yt_dlp/extractor/pladform.py +++ b/yt_dlp/extractor/pladform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index 07ac15b54..03b9d6aaa 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/platzi.py b/yt_dlp/extractor/platzi.py index 17f52e7f4..29d3210ac 100644 --- a/yt_dlp/extractor/platzi.py +++ b/yt_dlp/extractor/platzi.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/playfm.py b/yt_dlp/extractor/playfm.py index 4298cbe30..e895ba480 100644 --- a/yt_dlp/extractor/playfm.py +++ b/yt_dlp/extractor/playfm.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/playplustv.py b/yt_dlp/extractor/playplustv.py index cad2c3a0f..05dbaf066 100644 --- a/yt_dlp/extractor/playplustv.py +++ b/yt_dlp/extractor/playplustv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/plays.py b/yt_dlp/extractor/plays.py index ddfc6f148..700dfe407 100644 --- a/yt_dlp/extractor/plays.py +++ b/yt_dlp/extractor/plays.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/playstuff.py b/yt_dlp/extractor/playstuff.py index 5a329957f..b424ba187 100644 --- a/yt_dlp/extractor/playstuff.py +++ b/yt_dlp/extractor/playstuff.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index 30c8a599e..f7e5ddbe7 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urlparse, diff --git a/yt_dlp/extractor/playvid.py b/yt_dlp/extractor/playvid.py index e1c406b6c..5ffefc934 100644 --- a/yt_dlp/extractor/playvid.py +++ b/yt_dlp/extractor/playvid.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/playwire.py b/yt_dlp/extractor/playwire.py index 9c9e597b5..ab7f71493 100644 --- a/yt_dlp/extractor/playwire.py +++ b/yt_dlp/extractor/playwire.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/pluralsight.py b/yt_dlp/extractor/pluralsight.py index 2a5e0e488..b50152ad8 100644 --- a/yt_dlp/extractor/pluralsight.py +++ b/yt_dlp/extractor/pluralsight.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import collections import json import os diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 26aff1af5..6e8f46fa3 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import uuid diff --git a/yt_dlp/extractor/podchaser.py b/yt_dlp/extractor/podchaser.py new file mode 100644 index 000000000..290c48817 --- /dev/null +++ b/yt_dlp/extractor/podchaser.py @@ -0,0 +1,97 @@ +import functools +import json + +from .common import InfoExtractor +from ..utils import ( + OnDemandPagedList, + float_or_none, + str_or_none, + str_to_int, + traverse_obj, + unified_timestamp, +) + + +class PodchaserIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?' + _PAGE_SIZE = 100 + _TESTS = [{ + 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585', + 'info_dict': { + 'id': '104365585', + 'title': 'Ep. 285 โ freeze me off', + 'description': 'cam ahn', + 'thumbnail': r're:^https?://.*\.jpg$', + 'ext': 'mp3', + 'categories': ['Comedy'], + 'tags': ['comedy', 'dark humor'], + 'series': 'Cum Town', + 'duration': 3708, + 'timestamp': 1636531259, + 'upload_date': '20211110', + 'rating': 4.0 + } + }, { + 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', + 'info_dict': { + 'id': '28853', + 'title': 'The Bone Zone', + 'description': 'Podcast by The Bone Zone', + }, + 'playlist_count': 275 + }, { + 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', + 'info_dict': { + 'id': '699349', + 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas', + 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1' + }, + 'playlist_mincount': 225 + }] + + @staticmethod + def _parse_episode(episode, podcast): + return { + 'id': str(episode.get('id')), + 'title': episode.get('title'), + 'description': episode.get('description'), + 'url': episode.get('audio_url'), + 'thumbnail': episode.get('image_url'), + 'duration': str_to_int(episode.get('length')), + 'timestamp': unified_timestamp(episode.get('air_date')), + 'rating': float_or_none(episode.get('rating')), + 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))), + 'tags': traverse_obj(podcast, ('tags', ..., 'text')), + 'series': podcast.get('title'), + } + + def _call_api(self, path, *args, **kwargs): + return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs) + + def _fetch_page(self, podcast_id, podcast, page): + json_response = self._call_api( + 'list/episode', podcast_id, + headers={'Content-Type': 'application/json;charset=utf-8'}, + data=json.dumps({ + 'start': page * self._PAGE_SIZE, + 'count': self._PAGE_SIZE, + 'sort_order': 'SORT_ORDER_RECENT', + 'filters': { + 'podcast_id': podcast_id + }, + 'options': {} + }).encode()) + + for episode in json_response['entities']: + yield self._parse_episode(episode, podcast) + + def _real_extract(self, url): + podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id') + podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id) + if not episode_id: + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE), + str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description')) + + episode = self._call_api(f'episodes/{episode_id}', episode_id) + return self._parse_episode(episode, podcast) diff --git a/yt_dlp/extractor/podomatic.py b/yt_dlp/extractor/podomatic.py index 673a3ab94..985bfae9d 100644 --- a/yt_dlp/extractor/podomatic.py +++ b/yt_dlp/extractor/podomatic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pokemon.py b/yt_dlp/extractor/pokemon.py index b411390e2..eef0d02ca 100644 --- a/yt_dlp/extractor/pokemon.py +++ b/yt_dlp/extractor/pokemon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pokergo.py b/yt_dlp/extractor/pokergo.py index c9e2fed12..5c7baadf2 100644 --- a/yt_dlp/extractor/pokergo.py +++ b/yt_dlp/extractor/pokergo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 from .common import InfoExtractor diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py index 1e3f46c07..e44d951e6 100644 --- a/yt_dlp/extractor/polsatgo.py +++ b/yt_dlp/extractor/polsatgo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from uuid import uuid4 import json diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index b2b3eb29c..514753b64 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json import math diff --git a/yt_dlp/extractor/popcorntimes.py b/yt_dlp/extractor/popcorntimes.py index 5f9d0e720..ed741a07b 100644 --- a/yt_dlp/extractor/popcorntimes.py +++ b/yt_dlp/extractor/popcorntimes.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/popcorntv.py b/yt_dlp/extractor/popcorntv.py index 66d2e5094..77984626f 100644 --- a/yt_dlp/extractor/popcorntv.py +++ b/yt_dlp/extractor/popcorntv.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/porn91.py b/yt_dlp/extractor/porn91.py index 20eac647a..af4a0dc9c 100644 --- a/yt_dlp/extractor/porn91.py +++ b/yt_dlp/extractor/porn91.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/porncom.py b/yt_dlp/extractor/porncom.py index 83df22141..2ebd3fa09 100644 --- a/yt_dlp/extractor/porncom.py +++ b/yt_dlp/extractor/porncom.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/pornez.py b/yt_dlp/extractor/pornez.py index 713dc0080..df0e44a69 100644 --- a/yt_dlp/extractor/pornez.py +++ b/yt_dlp/extractor/pornez.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/pornflip.py b/yt_dlp/extractor/pornflip.py index accf45269..26536bc65 100644 --- a/yt_dlp/extractor/pornflip.py +++ b/yt_dlp/extractor/pornflip.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/pornhd.py b/yt_dlp/extractor/pornhd.py index 9dbd72f1d..06a44ddd1 100644 --- a/yt_dlp/extractor/pornhd.py +++ b/yt_dlp/extractor/pornhd.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py index 17c8c9100..d296ccacb 100644 --- a/yt_dlp/extractor/pornhub.py +++ b/yt_dlp/extractor/pornhub.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import itertools import math diff --git a/yt_dlp/extractor/pornotube.py b/yt_dlp/extractor/pornotube.py index 1b5b9a320..e0960f4c6 100644 --- a/yt_dlp/extractor/pornotube.py +++ b/yt_dlp/extractor/pornotube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/pornovoisines.py b/yt_dlp/extractor/pornovoisines.py index 18459fc94..96d2da7c7 100644 --- a/yt_dlp/extractor/pornovoisines.py +++ b/yt_dlp/extractor/pornovoisines.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/pornoxo.py b/yt_dlp/extractor/pornoxo.py index 489dc2b25..5104d8a49 100644 --- a/yt_dlp/extractor/pornoxo.py +++ b/yt_dlp/extractor/pornoxo.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( str_to_int, diff --git a/yt_dlp/extractor/presstv.py b/yt_dlp/extractor/presstv.py index bfb2eb71e..26ce74a59 100644 --- a/yt_dlp/extractor/presstv.py +++ b/yt_dlp/extractor/presstv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import remove_start diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index 9e9867ba5..e4aa4bd35 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/prosiebensat1.py b/yt_dlp/extractor/prosiebensat1.py index e89bbfd27..cb5ada1b9 100644 --- a/yt_dlp/extractor/prosiebensat1.py +++ b/yt_dlp/extractor/prosiebensat1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from hashlib import sha1 diff --git a/yt_dlp/extractor/prx.py b/yt_dlp/extractor/prx.py index 80561b80a..5bb183270 100644 --- a/yt_dlp/extractor/prx.py +++ b/yt_dlp/extractor/prx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/puhutv.py b/yt_dlp/extractor/puhutv.py index ca71665e0..a5dac1dff 100644 --- a/yt_dlp/extractor/puhutv.py +++ b/yt_dlp/extractor/puhutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_HTTPError, diff --git a/yt_dlp/extractor/puls4.py b/yt_dlp/extractor/puls4.py index 80091b85f..3c13d1f56 100644 --- a/yt_dlp/extractor/puls4.py +++ b/yt_dlp/extractor/puls4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import ( unified_strdate, diff --git a/yt_dlp/extractor/pyvideo.py b/yt_dlp/extractor/pyvideo.py index 869619723..7b25166b2 100644 --- a/yt_dlp/extractor/pyvideo.py +++ b/yt_dlp/extractor/pyvideo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/qqmusic.py b/yt_dlp/extractor/qqmusic.py index 0106d166f..fa2454df4 100644 --- a/yt_dlp/extractor/qqmusic.py +++ b/yt_dlp/extractor/qqmusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import time diff --git a/yt_dlp/extractor/r7.py b/yt_dlp/extractor/r7.py index e2202d603..b459efceb 100644 --- a/yt_dlp/extractor/r7.py +++ b/yt_dlp/extractor/r7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/radiko.py b/yt_dlp/extractor/radiko.py index 1e60de153..dbb748715 100644 --- a/yt_dlp/extractor/radiko.py +++ b/yt_dlp/extractor/radiko.py @@ -1,29 +1,22 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re import base64 -import calendar -import datetime +import re +import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, - update_url_query, clean_html, + time_seconds, + try_call, unified_timestamp, + update_url_query, ) -from ..compat import compat_urllib_parse class RadikoBaseIE(InfoExtractor): _FULL_KEY = None def _auth_client(self): - auth_cache = self._downloader.cache.load('radiko', 'auth_data') - if auth_cache: - return auth_cache - _, auth1_handle = self._download_webpage_handle( 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', headers={ @@ -92,8 +85,8 @@ class RadikoBaseIE(InfoExtractor): def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): m3u8_playlist_data = self._download_xml( - 'https://radiko.jp/v3/station/stream/pc_html5/%s.xml' % station, video_id, - note='Downloading m3u8 information') + f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, + note='Downloading stream information') m3u8_urls = m3u8_playlist_data.findall('.//url') formats = [] @@ -105,7 +98,7 @@ class RadikoBaseIE(InfoExtractor): 'station_id': station, **query, 'l': '15', - 'lsid': '77d0678df93a1034659c14d6fc89f018', + 'lsid': '88ecea37e968c1f17d5413312d9f8003', 'type': 'b', }) if playlist_url in found: @@ -115,20 +108,21 @@ class RadikoBaseIE(InfoExtractor): time_to_skip = None if is_onair else cursor - ft + domain = urllib.parse.urlparse(playlist_url).netloc subformats = self._extract_m3u8_formats( playlist_url, video_id, ext='m4a', - live=True, fatal=False, m3u8_id=None, + live=True, fatal=False, m3u8_id=domain, + note=f'Downloading m3u8 information from {domain}', headers={ 'X-Radiko-AreaId': area_id, 'X-Radiko-AuthToken': auth_token, }) for sf in subformats: - domain = sf['format_id'] = compat_urllib_parse.urlparse(sf['url']).netloc - if re.match(r'^[cf]-radiko\.smartstream\.ne\.jp$', domain): + if re.fullmatch(r'[cf]-radiko\.smartstream\.ne\.jp', domain): # Prioritize live radio vs playback based on extractor sf['preference'] = 100 if is_onair else -100 if not is_onair and url_attrib['timefree'] == '1' and time_to_skip: - sf['_ffmpeg_args'] = ['-ss', time_to_skip] + sf['downloader_options'] = {'ffmpeg_args': ['-ss', time_to_skip]} formats.extend(subformats) self._sort_formats(formats) @@ -154,31 +148,29 @@ class RadikoIE(RadikoBaseIE): def _real_extract(self, url): station, video_id = self._match_valid_url(url).groups() vid_int = unified_timestamp(video_id, False) - - auth_token, area_id = self._auth_client() - prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) - title = prog.find('title').text - description = clean_html(prog.find('info').text) - station_name = station_program.find('.//name').text - - formats = self._extract_formats( - video_id=video_id, station=station, is_onair=False, - ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, - query={ - 'start_at': radio_begin, - 'ft': radio_begin, - 'end_at': radio_end, - 'to': radio_end, - 'seek': video_id, - }) + auth_cache = self._downloader.cache.load('radiko', 'auth_data') + for attempt in range(2): + auth_token, area_id = (not attempt and auth_cache) or self._auth_client() + formats = self._extract_formats( + video_id=video_id, station=station, is_onair=False, + ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, + query={ + 'start_at': radio_begin, + 'ft': radio_begin, + 'end_at': radio_end, + 'to': radio_end, + 'seek': video_id, + }) + if formats: + break return { 'id': video_id, - 'title': title, - 'description': description, - 'uploader': station_name, + 'title': try_call(lambda: prog.find('title').text), + 'description': clean_html(try_call(lambda: prog.find('info').text)), + 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, 'formats': formats, @@ -208,8 +200,7 @@ class RadikoRadioIE(RadikoBaseIE): auth_token, area_id = self._auth_client() # get current time in JST (GMT+9:00 w/o DST) - vid_now = datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))) - vid_now = calendar.timegm(vid_now.timetuple()) + vid_now = time_seconds(hours=9) prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) diff --git a/yt_dlp/extractor/radiobremen.py b/yt_dlp/extractor/radiobremen.py index 2c35f9845..99ba050d0 100644 --- a/yt_dlp/extractor/radiobremen.py +++ b/yt_dlp/extractor/radiobremen.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiocanada.py b/yt_dlp/extractor/radiocanada.py index 4b4445c30..dd6f899a4 100644 --- a/yt_dlp/extractor/radiocanada.py +++ b/yt_dlp/extractor/radiocanada.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py index 038287363..befb0b72b 100644 --- a/yt_dlp/extractor/radiode.py +++ b/yt_dlp/extractor/radiode.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 082238bbc..8fef54dab 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiojavan.py b/yt_dlp/extractor/radiojavan.py index 3f74f0c01..6a6118899 100644 --- a/yt_dlp/extractor/radiojavan.py +++ b/yt_dlp/extractor/radiojavan.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py index 2e93e034f..8f9737ac3 100644 --- a/yt_dlp/extractor/radiokapital.py +++ b/yt_dlp/extractor/radiokapital.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py index 2e1ff36c2..67520172e 100644 --- a/yt_dlp/extractor/radiozet.py +++ b/yt_dlp/extractor/radiozet.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 7c72d60c6..31199e32e 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/raywenderlich.py b/yt_dlp/extractor/raywenderlich.py index f04d51f7b..e0e3c3ead 100644 --- a/yt_dlp/extractor/raywenderlich.py +++ b/yt_dlp/extractor/raywenderlich.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rbmaradio.py b/yt_dlp/extractor/rbmaradio.py index 9642fbbe1..86c63dbb7 100644 --- a/yt_dlp/extractor/rbmaradio.py +++ b/yt_dlp/extractor/rbmaradio.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index ace611bc9..abbc167c0 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rcti.py b/yt_dlp/extractor/rcti.py index ac42e58d9..0cfecbc9a 100644 --- a/yt_dlp/extractor/rcti.py +++ b/yt_dlp/extractor/rcti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import random import time diff --git a/yt_dlp/extractor/rds.py b/yt_dlp/extractor/rds.py index 0c497856e..9a2e0d985 100644 --- a/yt_dlp/extractor/rds.py +++ b/yt_dlp/extractor/rds.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/redbulltv.py b/yt_dlp/extractor/redbulltv.py index 756a3666b..2f0e41c5b 100644 --- a/yt_dlp/extractor/redbulltv.py +++ b/yt_dlp/extractor/redbulltv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index a042a59cc..aabc8dba9 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -1,4 +1,5 @@ import random +from urllib.parse import urlparse from .common import InfoExtractor from ..utils import ( @@ -19,6 +20,7 @@ class RedditIE(InfoExtractor): 'info_dict': { 'id': 'zv89llsvexdz', 'ext': 'mp4', + 'display_id': '6rrwyj', 'title': 'That small heart attack.', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:4', @@ -158,6 +160,15 @@ class RedditIE(InfoExtractor): 'duration': int_or_none(reddit_video.get('duration')), } + parsed_url = urlparse(video_url) + if parsed_url.netloc == 'v.redd.it': + self.raise_no_formats('This video is processing', expected=True, video_id=video_id) + return { + **info, + 'id': parsed_url.path.split('/')[1], + 'display_id': video_id, + } + # Not hosted on reddit, must continue extraction return { **info, diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 55196b768..e3712a1d6 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,4 +1,3 @@ -# coding: utf-8 import functools from .common import InfoExtractor diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 7fee54fee..ab7c505da 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/regiotv.py b/yt_dlp/extractor/regiotv.py index e250a52f0..6114841fb 100644 --- a/yt_dlp/extractor/regiotv.py +++ b/yt_dlp/extractor/regiotv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/rentv.py b/yt_dlp/extractor/rentv.py index 7c8909d95..ab47ee552 100644 --- a/yt_dlp/extractor/rentv.py +++ b/yt_dlp/extractor/rentv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/restudy.py b/yt_dlp/extractor/restudy.py index d47fb45ca..cd3c20d7a 100644 --- a/yt_dlp/extractor/restudy.py +++ b/yt_dlp/extractor/restudy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/reuters.py b/yt_dlp/extractor/reuters.py index 9dc482d21..1428b7cc9 100644 --- a/yt_dlp/extractor/reuters.py +++ b/yt_dlp/extractor/reuters.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/reverbnation.py b/yt_dlp/extractor/reverbnation.py index 4cb99c244..06b6c3c2f 100644 --- a/yt_dlp/extractor/reverbnation.py +++ b/yt_dlp/extractor/reverbnation.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( qualities, diff --git a/yt_dlp/extractor/rice.py b/yt_dlp/extractor/rice.py index cf2bb1b51..9ca47f3d4 100644 --- a/yt_dlp/extractor/rice.py +++ b/yt_dlp/extractor/rice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rmcdecouverte.py b/yt_dlp/extractor/rmcdecouverte.py index 8bfce3416..8d29b302b 100644 --- a/yt_dlp/extractor/rmcdecouverte.py +++ b/yt_dlp/extractor/rmcdecouverte.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from .brightcove import BrightcoveLegacyIE from ..compat import ( diff --git a/yt_dlp/extractor/rockstargames.py b/yt_dlp/extractor/rockstargames.py index cd6904bc9..5f1db0f05 100644 --- a/yt_dlp/extractor/rockstargames.py +++ b/yt_dlp/extractor/rockstargames.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 0fd65db4b..ad53d697e 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -1,26 +1,33 @@ -# coding: utf-8 import itertools +import json +import re +import urllib.parse from datetime import datetime -from .common import InfoExtractor +from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( - determine_ext, ExtractorError, + determine_ext, float_or_none, format_field, int_or_none, str_or_none, traverse_obj, + try_get, + unescapeHTML, unified_timestamp, url_or_none, + urlencode_postdata, ) - _API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/' class RokfinIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' + _NETRC_MACHINE = 'rokfin' + _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect' + _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5 _TESTS = [{ 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change', 'info_dict': { @@ -84,8 +91,7 @@ class RokfinIE(InfoExtractor): def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') - - metadata = self._download_json(f'{_API_BASE_URL}{video_id}', video_id) + metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id) scheduled = unified_timestamp(metadata.get('scheduledAt')) live_status = ('was_live' if metadata.get('stoppedAt') @@ -160,6 +166,79 @@ class RokfinIE(InfoExtractor): if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False): return + def _perform_login(self, username, password): + # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1) + login_page = self._download_webpage( + f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid', + None, note='loading login page', errnote='error loading login page') + authentication_point_url = unescapeHTML(self._search_regex( + r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"', + login_page, name='Authentication URL')) + + resp_body = self._download_webpage( + authentication_point_url, None, note='logging in', fatal=False, expected_status=404, + data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''})) + if not self._authentication_active(): + if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''): + raise ExtractorError('invalid username/password', expected=True) + raise ExtractorError('Login failed') + + urlh = self._request_webpage( + f'{self._AUTH_BASE}/auth', None, + note='granting user authorization', errnote='user authorization rejected by Rokfin', + query={ + 'client_id': 'web', + 'prompt': 'none', + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid', + }) + self._access_mgmt_tokens = self._download_json( + f'{self._AUTH_BASE}/token', None, + note='getting access credentials', errnote='error getting access credentials', + data=urlencode_postdata({ + 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.geturl()).fragment).get('code')[0], + 'client_id': 'web', + 'grant_type': 'authorization_code', + 'redirect_uri': 'https://rokfin.com/silent-check-sso.html' + })) + + def _authentication_active(self): + return not ( + {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'} + - set(self._get_cookies(self._AUTH_BASE))) + + def _get_auth_token(self): + return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']])) + + def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}): + assert 'authorization' not in headers + headers = headers.copy() + auth_token = self._get_auth_token() + refresh_token = self._access_mgmt_tokens.get('refresh_token') + if auth_token: + headers['authorization'] = auth_token + + json_string, urlh = self._download_webpage_handle( + url_or_request, video_id, headers=headers, query=query, expected_status=401) + if not auth_token or urlh.code != 401 or refresh_token is None: + return self._parse_json(json_string, video_id) + + self._access_mgmt_tokens = self._download_json( + f'{self._AUTH_BASE}/token', video_id, + note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize', + data=urlencode_postdata({ + 'grant_type': 'refresh_token', + 'refresh_token': refresh_token, + 'client_id': 'web' + })) + headers['authorization'] = self._get_auth_token() + if headers['authorization'] is None: + raise ExtractorError('User authorization lost', expected=True) + + return self._download_json(url_or_request, video_id, headers=headers, query=query) + class RokfinPlaylistBaseIE(InfoExtractor): _TYPES = { @@ -183,6 +262,7 @@ class RokfinPlaylistBaseIE(InfoExtractor): class RokfinStackIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:stack' + IE_DESC = 'Rokfin Stacks' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)' _TESTS = [{ 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020', @@ -200,6 +280,7 @@ class RokfinStackIE(RokfinPlaylistBaseIE): class RokfinChannelIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:channel' + IE_DESC = 'Rokfin Channels' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$' _TESTS = [{ 'url': 'https://rokfin.com/TheConvoCouch', @@ -254,3 +335,76 @@ class RokfinChannelIE(RokfinPlaylistBaseIE): return self.playlist_result( self._entries(channel_id, channel_name, self._TABS[tab]), f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description'))) + + +class RokfinSearchIE(SearchInfoExtractor): + IE_NAME = 'rokfin:search' + IE_DESC = 'Rokfin Search' + _SEARCH_KEY = 'rkfnsearch' + _TYPES = { + 'video': (('id', 'raw'), 'post'), + 'audio': (('id', 'raw'), 'post'), + 'stream': (('content_id', 'raw'), 'stream'), + 'dead_stream': (('content_id', 'raw'), 'stream'), + 'stack': (('content_id', 'raw'), 'stack'), + } + _TESTS = [{ + 'url': 'rkfnsearch5:"zelenko"', + 'playlist_count': 5, + 'info_dict': { + 'id': '"zelenko"', + 'title': '"zelenko"', + } + }] + _db_url = None + _db_access_key = None + + def _real_initialize(self): + self._db_url, self._db_access_key = self._downloader.cache.load(self.ie_key(), 'auth', default=(None, None)) + if not self._db_url: + self._get_db_access_credentials() + + def _search_results(self, query): + total_pages = None + for page_number in itertools.count(1): + search_results = self._run_search_query( + query, data={'query': query, 'page': {'size': 100, 'current': page_number}}, + note=f'Downloading page {page_number}{format_field(total_pages, template=" of ~%s")}') + total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none) + + for result in search_results.get('results') or []: + video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None)) + video_id = traverse_obj(result, video_id_key, expected_type=int_or_none) + if video_id and video_type: + yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}') + if not search_results.get('results'): + return + + def _run_search_query(self, video_id, data, **kwargs): + data = json.dumps(data).encode() + for attempt in range(2): + search_results = self._download_json( + self._db_url, video_id, data=data, fatal=(attempt == 1), + headers={'authorization': self._db_access_key}, **kwargs) + if search_results: + return search_results + self.write_debug('Updating access credentials') + self._get_db_access_credentials(video_id) + + def _get_db_access_credentials(self, video_id=None): + auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None} + notfound_err_page = self._download_webpage( + 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page') + for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page): + js_content = self._download_webpage( + f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False) + auth_data.update(re.findall( + rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or '')) + if not all(auth_data.values()): + continue + + self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json') + self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}' + self._downloader.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) + return + raise ExtractorError('Unable to extract access credentials') diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index a55dd4f8b..011dadfaa 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/rottentomatoes.py b/yt_dlp/extractor/rottentomatoes.py index 14c8e8236..f133c851b 100644 --- a/yt_dlp/extractor/rottentomatoes.py +++ b/yt_dlp/extractor/rottentomatoes.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/rozhlas.py b/yt_dlp/extractor/rozhlas.py index fccf69401..a8189676f 100644 --- a/yt_dlp/extractor/rozhlas.py +++ b/yt_dlp/extractor/rozhlas.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rtbf.py b/yt_dlp/extractor/rtbf.py index 4b61fdb17..a300a2482 100644 --- a/yt_dlp/extractor/rtbf.py +++ b/yt_dlp/extractor/rtbf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rte.py b/yt_dlp/extractor/rte.py index 1fbc72915..93faf1b32 100644 --- a/yt_dlp/extractor/rte.py +++ b/yt_dlp/extractor/rte.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtl2.py b/yt_dlp/extractor/rtl2.py index e29171474..afa0d33cf 100644 --- a/yt_dlp/extractor/rtl2.py +++ b/yt_dlp/extractor/rtl2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtlnl.py b/yt_dlp/extractor/rtlnl.py index 9eaa06f25..ed89554ab 100644 --- a/yt_dlp/extractor/rtlnl.py +++ b/yt_dlp/extractor/rtlnl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/rtnews.py b/yt_dlp/extractor/rtnews.py index 68b6044b6..6be9945f7 100644 --- a/yt_dlp/extractor/rtnews.py +++ b/yt_dlp/extractor/rtnews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/rtp.py b/yt_dlp/extractor/rtp.py index c165ade78..5928a207a 100644 --- a/yt_dlp/extractor/rtp.py +++ b/yt_dlp/extractor/rtp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import js_to_json import re diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py index 93d51e8ed..7381d8202 100644 --- a/yt_dlp/extractor/rtrfm.py +++ b/yt_dlp/extractor/rtrfm.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/rts.py b/yt_dlp/extractor/rts.py index 865a73024..e5ba1a26b 100644 --- a/yt_dlp/extractor/rts.py +++ b/yt_dlp/extractor/rts.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .srgssr import SRGSSRIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 7a1dc6f32..42a602968 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -1,9 +1,5 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import io -import sys from .common import InfoExtractor from ..compat import ( @@ -20,8 +16,6 @@ from ..utils import ( try_get, ) -_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) - class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' @@ -90,7 +84,7 @@ class RTVEALaCartaIE(InfoExtractor): alphabet = [] e = 0 d = 0 - for l in _bytes_to_chr(alphabet_data): + for l in alphabet_data.decode('iso-8859-1'): if d == 0: alphabet.append(l) d = e = (e + 1) % 4 @@ -100,7 +94,7 @@ class RTVEALaCartaIE(InfoExtractor): f = 0 e = 3 b = 1 - for letter in _bytes_to_chr(url_data): + for letter in url_data.decode('iso-8859-1'): if f == 0: l = int(letter) * 10 f = 1 diff --git a/yt_dlp/extractor/rtvnh.py b/yt_dlp/extractor/rtvnh.py index 6a00f7007..58af3dda2 100644 --- a/yt_dlp/extractor/rtvnh.py +++ b/yt_dlp/extractor/rtvnh.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index 3ea0f1883..fb06efa4b 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ruhd.py b/yt_dlp/extractor/ruhd.py index 3c8053a26..abaa3f9ea 100644 --- a/yt_dlp/extractor/ruhd.py +++ b/yt_dlp/extractor/ruhd.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/rule34video.py b/yt_dlp/extractor/rule34video.py index a602a9f33..bb113d822 100644 --- a/yt_dlp/extractor/rule34video.py +++ b/yt_dlp/extractor/rule34video.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import re from ..utils import parse_duration diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index a0d5f88d9..50c383d79 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/rutube.py b/yt_dlp/extractor/rutube.py index 2f753b41f..ecfcea939 100644 --- a/yt_dlp/extractor/rutube.py +++ b/yt_dlp/extractor/rutube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 0ea8253fa..adf78ddb0 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ruutu.py b/yt_dlp/extractor/ruutu.py index 5a30e3360..c6d94c100 100644 --- a/yt_dlp/extractor/ruutu.py +++ b/yt_dlp/extractor/ruutu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re @@ -41,6 +38,7 @@ class RuutuIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114, 'age_limit': 0, + 'upload_date': '20150508', }, }, { @@ -54,6 +52,9 @@ class RuutuIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 40, 'age_limit': 0, + 'upload_date': '20150507', + 'series': 'Superpesis', + 'categories': ['Urheilu'], }, }, { @@ -66,6 +67,8 @@ class RuutuIE(InfoExtractor): 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, + 'upload_date': '20151012', + 'series': 'Lรคpivalaisu', }, }, # Episode where <SourceFile> is "NOT-USED", but has other @@ -85,6 +88,9 @@ class RuutuIE(InfoExtractor): 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, + 'upload_date': '20190320', + 'series': 'Mysteeritarinat', + 'duration': 1324, }, 'expected_warnings': [ 'HTTP Error 502: Bad Gateway', @@ -129,14 +135,30 @@ class RuutuIE(InfoExtractor): _API_BASE = 'https://gatling.nelonenmedia.fi' @classmethod - def _extract_url(cls, webpage): + def _extract_urls(cls, webpage): + # nelonen.fi settings = try_call( lambda: json.loads(re.search( r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) - video_id = traverse_obj(settings, ( - 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) - if video_id: - return f'http://www.ruutu.fi/video/{video_id}' + if settings: + video_id = traverse_obj(settings, ( + 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] + # hs.fi and is.fi + settings = try_call( + lambda: json.loads(re.search( + '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', + webpage).group(1), strict=False)) + if settings: + video_ids = set(traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) + if video_ids: + return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] + video_id = traverse_obj(settings, ( + 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) + if video_id: + return [f'http://www.ruutu.fi/video/{video_id}'] def _real_extract(self, url): video_id = self._match_id(url) @@ -209,10 +231,10 @@ class RuutuIE(InfoExtractor): extract_formats(video_xml.find('./Clip')) def pv(name): - node = find_xpath_attr( - video_xml, './Clip/PassthroughVariables/variable', 'name', name) - if node is not None: - return node.get('value') + value = try_call(lambda: find_xpath_attr( + video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) + if value != 'NA': + return value or None if not formats: if (not self.get_param('allow_unplayable_formats') @@ -237,6 +259,6 @@ class RuutuIE(InfoExtractor): 'series': pv('series_name'), 'season_number': int_or_none(pv('season_number')), 'episode_number': int_or_none(pv('episode_number')), - 'categories': themes.split(',') if themes else [], + 'categories': themes.split(',') if themes else None, 'formats': formats, } diff --git a/yt_dlp/extractor/ruv.py b/yt_dlp/extractor/ruv.py index d806ed068..12499d6ca 100644 --- a/yt_dlp/extractor/ruv.py +++ b/yt_dlp/extractor/ruv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/safari.py b/yt_dlp/extractor/safari.py index 7b4571daa..450a661e9 100644 --- a/yt_dlp/extractor/safari.py +++ b/yt_dlp/extractor/safari.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/saitosan.py b/yt_dlp/extractor/saitosan.py index 621335ca0..d2f60e92f 100644 --- a/yt_dlp/extractor/saitosan.py +++ b/yt_dlp/extractor/saitosan.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, try_get diff --git a/yt_dlp/extractor/samplefocus.py b/yt_dlp/extractor/samplefocus.py index 806c3c354..e9f5c227b 100644 --- a/yt_dlp/extractor/samplefocus.py +++ b/yt_dlp/extractor/samplefocus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sapo.py b/yt_dlp/extractor/sapo.py index df202a3a4..9a601a01c 100644 --- a/yt_dlp/extractor/sapo.py +++ b/yt_dlp/extractor/sapo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/savefrom.py b/yt_dlp/extractor/savefrom.py index 98efdc2a4..9c9e74b6d 100644 --- a/yt_dlp/extractor/savefrom.py +++ b/yt_dlp/extractor/savefrom.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import os.path from .common import InfoExtractor diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index 4090f6385..711524406 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( smuggle_url, diff --git a/yt_dlp/extractor/screencast.py b/yt_dlp/extractor/screencast.py index 69a0d01f3..e3dbaab69 100644 --- a/yt_dlp/extractor/screencast.py +++ b/yt_dlp/extractor/screencast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/screencastomatic.py b/yt_dlp/extractor/screencastomatic.py index 0afdc1715..f2f281f47 100644 --- a/yt_dlp/extractor/screencastomatic.py +++ b/yt_dlp/extractor/screencastomatic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( get_element_by_class, diff --git a/yt_dlp/extractor/scrippsnetworks.py b/yt_dlp/extractor/scrippsnetworks.py index 84918b67f..c3cee6e4a 100644 --- a/yt_dlp/extractor/scrippsnetworks.py +++ b/yt_dlp/extractor/scrippsnetworks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import hashlib diff --git a/yt_dlp/extractor/scte.py b/yt_dlp/extractor/scte.py index 7215cf5d1..d839ffcde 100644 --- a/yt_dlp/extractor/scte.py +++ b/yt_dlp/extractor/scte.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/seeker.py b/yt_dlp/extractor/seeker.py index e5c18c7a5..65eb16a09 100644 --- a/yt_dlp/extractor/seeker.py +++ b/yt_dlp/extractor/seeker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py index b295184a1..bced14328 100644 --- a/yt_dlp/extractor/senategov.py +++ b/yt_dlp/extractor/senategov.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index 858547b54..cf4b93d45 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/servus.py b/yt_dlp/extractor/servus.py index 1610ddc2c..ac030ea41 100644 --- a/yt_dlp/extractor/servus.py +++ b/yt_dlp/extractor/servus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/sevenplus.py b/yt_dlp/extractor/sevenplus.py index 9867961f0..8e95bc230 100644 --- a/yt_dlp/extractor/sevenplus.py +++ b/yt_dlp/extractor/sevenplus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/sexu.py b/yt_dlp/extractor/sexu.py index 3df51520b..000f7e166 100644 --- a/yt_dlp/extractor/sexu.py +++ b/yt_dlp/extractor/sexu.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/seznamzpravy.py b/yt_dlp/extractor/seznamzpravy.py index eef4975cb..891bfcfee 100644 --- a/yt_dlp/extractor/seznamzpravy.py +++ b/yt_dlp/extractor/seznamzpravy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/shahid.py b/yt_dlp/extractor/shahid.py index ab45d9ce4..53ca86b73 100644 --- a/yt_dlp/extractor/shahid.py +++ b/yt_dlp/extractor/shahid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import math import re diff --git a/yt_dlp/extractor/shared.py b/yt_dlp/extractor/shared.py index 93ab2a167..5bc097b0d 100644 --- a/yt_dlp/extractor/shared.py +++ b/yt_dlp/extractor/shared.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_b64decode, diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 45c12915a..c0780abe2 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..aes import aes_cbc_decrypt, unpad_pkcs7 from ..compat import ( diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py index 1aada69ac..cd681a035 100644 --- a/yt_dlp/extractor/showroomlive.py +++ b/yt_dlp/extractor/showroomlive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/simplecast.py b/yt_dlp/extractor/simplecast.py index 857e9414f..ecbb6123b 100644 --- a/yt_dlp/extractor/simplecast.py +++ b/yt_dlp/extractor/simplecast.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sina.py b/yt_dlp/extractor/sina.py index b62b0c3e5..d30d57d85 100644 --- a/yt_dlp/extractor/sina.py +++ b/yt_dlp/extractor/sina.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( HEADRequest, diff --git a/yt_dlp/extractor/sixplay.py b/yt_dlp/extractor/sixplay.py index fd747f59b..b7b7d7d7f 100644 --- a/yt_dlp/extractor/sixplay.py +++ b/yt_dlp/extractor/sixplay.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/skeb.py b/yt_dlp/extractor/skeb.py index 81aecb311..e02f8cef0 100644 --- a/yt_dlp/extractor/skeb.py +++ b/yt_dlp/extractor/skeb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj diff --git a/yt_dlp/extractor/sky.py b/yt_dlp/extractor/sky.py index ad1e62d88..0a8b6cc76 100644 --- a/yt_dlp/extractor/sky.py +++ b/yt_dlp/extractor/sky.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index ddb43c075..438fb60e3 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_parse_qs, diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py index 47bbb7632..4292bb2ae 100644 --- a/yt_dlp/extractor/skylinewebcams.py +++ b/yt_dlp/extractor/skylinewebcams.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/skynewsarabia.py b/yt_dlp/extractor/skynewsarabia.py index fffc9aa22..6264b04bb 100644 --- a/yt_dlp/extractor/skynewsarabia.py +++ b/yt_dlp/extractor/skynewsarabia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py index 8e079ee31..43a9c82cf 100644 --- a/yt_dlp/extractor/skynewsau.py +++ b/yt_dlp/extractor/skynewsau.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/slideshare.py b/yt_dlp/extractor/slideshare.py index 9b3ad0ad4..ab9dad0ec 100644 --- a/yt_dlp/extractor/slideshare.py +++ b/yt_dlp/extractor/slideshare.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/slideslive.py b/yt_dlp/extractor/slideslive.py index df6084647..72ca56057 100644 --- a/yt_dlp/extractor/slideslive.py +++ b/yt_dlp/extractor/slideslive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( bool_or_none, diff --git a/yt_dlp/extractor/slutload.py b/yt_dlp/extractor/slutload.py index 661f9e59d..8e6e89c9a 100644 --- a/yt_dlp/extractor/slutload.py +++ b/yt_dlp/extractor/slutload.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/snotr.py b/yt_dlp/extractor/snotr.py index 0bb548255..6889f1929 100644 --- a/yt_dlp/extractor/snotr.py +++ b/yt_dlp/extractor/snotr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/sohu.py b/yt_dlp/extractor/sohu.py index 3bff5c595..c3a135955 100644 --- a/yt_dlp/extractor/sohu.py +++ b/yt_dlp/extractor/sohu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py index 5b6849fc9..17d28478f 100644 --- a/yt_dlp/extractor/sonyliv.py +++ b/yt_dlp/extractor/sonyliv.py @@ -1,7 +1,5 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime +import json import math import random import time @@ -85,21 +83,32 @@ class SonyLIVIE(InfoExtractor): raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}') self.report_login() - data = '''{"mobileNumber":"%s","channelPartnerID":"MSMIND","country":"IN","timestamp":"%s", - "otpSize":6,"loginType":"REGISTERORSIGNIN","isMobileMandatory":true} - ''' % (username, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ")) otp_request_json = self._download_json( 'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2', - None, note='Sending OTP', data=data.encode(), headers=self._HEADERS) + None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({ + 'mobileNumber': username, + 'channelPartnerID': 'MSMIND', + 'country': 'IN', + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'), + 'otpSize': 6, + 'loginType': 'REGISTERORSIGNIN', + 'isMobileMandatory': True, + }).encode()) if otp_request_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) - otp_code = self._get_tfa_info('OTP') - data = '''{"channelPartnerID":"MSMIND","mobileNumber":"%s","country":"IN","otp":"%s", - "dmaId":"IN","ageConfirmation":true,"timestamp":"%s","isMobileMandatory":true} - ''' % (username, otp_code, datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%MZ")) + otp_verify_json = self._download_json( 'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2', - None, note='Verifying OTP', data=data.encode(), headers=self._HEADERS) + None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({ + 'channelPartnerID': 'MSMIND', + 'mobileNumber': username, + 'country': 'IN', + 'otp': self._get_tfa_info('OTP'), + 'dmaId': 'IN', + 'ageConfirmation': True, + 'timestamp': datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'), + 'isMobileMandatory': True, + }).encode()) if otp_verify_json['resultCode'] == 'KO': raise ExtractorError(otp_request_json['message'], expected=True) self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken'] diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index bbc79c2be..6dfa50c60 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re import json @@ -12,7 +9,6 @@ from .common import ( ) from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, ) from ..utils import ( @@ -96,7 +92,7 @@ class SoundcloudBaseIE(InfoExtractor): query['client_id'] = self._CLIENT_ID kwargs['query'] = query try: - return super()._download_json(*args, **compat_kwargs(kwargs)) + return super()._download_json(*args, **kwargs) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): self._store_client_id(None) diff --git a/yt_dlp/extractor/soundgasm.py b/yt_dlp/extractor/soundgasm.py index d608eb7a7..9e59c7c0e 100644 --- a/yt_dlp/extractor/soundgasm.py +++ b/yt_dlp/extractor/soundgasm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index 942a52dcf..855f1d6d3 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 4bc2263f0..fc5a492a6 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/spankbang.py b/yt_dlp/extractor/spankbang.py index dd849ae13..1aa8eaba1 100644 --- a/yt_dlp/extractor/spankbang.py +++ b/yt_dlp/extractor/spankbang.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/spankwire.py b/yt_dlp/extractor/spankwire.py index e97c1d23e..603f17e9d 100644 --- a/yt_dlp/extractor/spankwire.py +++ b/yt_dlp/extractor/spankwire.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/spiegel.py b/yt_dlp/extractor/spiegel.py index 58f2ed353..3701e295a 100644 --- a/yt_dlp/extractor/spiegel.py +++ b/yt_dlp/extractor/spiegel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .jwplatform import JWPlatformIE diff --git a/yt_dlp/extractor/spiegeltv.py b/yt_dlp/extractor/spiegeltv.py deleted file mode 100644 index 6ccf4c342..000000000 --- a/yt_dlp/extractor/spiegeltv.py +++ /dev/null @@ -1,17 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .nexx import NexxIE - - -class SpiegeltvIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?spiegel\.tv/videos/(?P<id>\d+)' - _TEST = { - 'url': 'http://www.spiegel.tv/videos/161681-flug-mh370/', - 'only_matching': True, - } - - def _real_extract(self, url): - return self.url_result( - 'https://api.nexx.cloud/v3/748/videos/byid/%s' - % self._match_id(url), ie=NexxIE.ie_key()) diff --git a/yt_dlp/extractor/spike.py b/yt_dlp/extractor/spike.py index 5805f3d44..5c1c78d8f 100644 --- a/yt_dlp/extractor/spike.py +++ b/yt_dlp/extractor/spike.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor diff --git a/yt_dlp/extractor/sport5.py b/yt_dlp/extractor/sport5.py index 35c57d62a..f4ac98b6e 100644 --- a/yt_dlp/extractor/sport5.py +++ b/yt_dlp/extractor/sport5.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/sportbox.py b/yt_dlp/extractor/sportbox.py index b9017fd2a..1041cc7d1 100644 --- a/yt_dlp/extractor/sportbox.py +++ b/yt_dlp/extractor/sportbox.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 15b488ab7..75074b310 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/spotify.py b/yt_dlp/extractor/spotify.py index 826f98cff..a2068a1b6 100644 --- a/yt_dlp/extractor/spotify.py +++ b/yt_dlp/extractor/spotify.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re @@ -22,7 +19,7 @@ class SpotifyBaseIE(InfoExtractor): 'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0', 'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d', } - _VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)' + _VALID_URL_TEMPL = r'https?://open\.spotify\.com/(?:embed-podcast/|embed/|)%s/(?P<id>[^/?&#]+)' def _real_initialize(self): self._ACCESS_TOKEN = self._download_json( @@ -96,11 +93,18 @@ class SpotifyBaseIE(InfoExtractor): 'series': series, } + @classmethod + def _extract_embed_urls(cls, webpage): + return re.findall( + r'<iframe[^>]+src="(https?://open\.spotify.com/embed/[^"]+)"', + webpage) + class SpotifyIE(SpotifyBaseIE): IE_NAME = 'spotify' + IE_DESC = 'Spotify episodes' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode' - _TEST = { + _TESTS = [{ 'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo', 'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b', 'info_dict': { @@ -112,7 +116,10 @@ class SpotifyIE(SpotifyBaseIE): 'release_date': '20201217', 'series': "The Guardian's Audio Long Reads", } - } + }, { + 'url': 'https://open.spotify.com/embed/episode/4TvCsKKs2thXmarHigWvXE?si=7eatS8AbQb6RxqO2raIuWA', + 'only_matching': True, + }] def _real_extract(self, url): episode_id = self._match_id(url) @@ -125,6 +132,7 @@ class SpotifyIE(SpotifyBaseIE): class SpotifyShowIE(SpotifyBaseIE): IE_NAME = 'spotify:show' + IE_DESC = 'Spotify shows' _VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show' _TEST = { 'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M', diff --git a/yt_dlp/extractor/spreaker.py b/yt_dlp/extractor/spreaker.py index 6c7e40ae4..36a9bd291 100644 --- a/yt_dlp/extractor/spreaker.py +++ b/yt_dlp/extractor/spreaker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/springboardplatform.py b/yt_dlp/extractor/springboardplatform.py index 49ac1f559..8e156bf1a 100644 --- a/yt_dlp/extractor/springboardplatform.py +++ b/yt_dlp/extractor/springboardplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sprout.py b/yt_dlp/extractor/sprout.py index e243732f2..444a6c270 100644 --- a/yt_dlp/extractor/sprout.py +++ b/yt_dlp/extractor/sprout.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/srgssr.py b/yt_dlp/extractor/srgssr.py index f9919816d..6dd312985 100644 --- a/yt_dlp/extractor/srgssr.py +++ b/yt_dlp/extractor/srgssr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/srmediathek.py b/yt_dlp/extractor/srmediathek.py index 359dadaa3..3cc39870f 100644 --- a/yt_dlp/extractor/srmediathek.py +++ b/yt_dlp/extractor/srmediathek.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .ard import ARDMediathekBaseIE from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/stanfordoc.py b/yt_dlp/extractor/stanfordoc.py index 0003075ac..be0f4afc1 100644 --- a/yt_dlp/extractor/stanfordoc.py +++ b/yt_dlp/extractor/stanfordoc.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/startv.py b/yt_dlp/extractor/startv.py index 411320ede..bb6e8f1ea 100644 --- a/yt_dlp/extractor/startv.py +++ b/yt_dlp/extractor/startv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/steam.py b/yt_dlp/extractor/steam.py index 4ed0fb592..ab22fdbc6 100644 --- a/yt_dlp/extractor/steam.py +++ b/yt_dlp/extractor/steam.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/stitcher.py b/yt_dlp/extractor/stitcher.py index 822782507..2fd200f87 100644 --- a/yt_dlp/extractor/stitcher.py +++ b/yt_dlp/extractor/stitcher.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/storyfire.py b/yt_dlp/extractor/storyfire.py index e18a59a49..716190220 100644 --- a/yt_dlp/extractor/storyfire.py +++ b/yt_dlp/extractor/storyfire.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamable.py b/yt_dlp/extractor/streamable.py index 808129649..a2935b04b 100644 --- a/yt_dlp/extractor/streamable.py +++ b/yt_dlp/extractor/streamable.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamanity.py b/yt_dlp/extractor/streamanity.py index 2e2d5eedf..f8c37c0dd 100644 --- a/yt_dlp/extractor/streamanity.py +++ b/yt_dlp/extractor/streamanity.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamcloud.py b/yt_dlp/extractor/streamcloud.py index b97bb4374..728980921 100644 --- a/yt_dlp/extractor/streamcloud.py +++ b/yt_dlp/extractor/streamcloud.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamcz.py b/yt_dlp/extractor/streamcz.py index 4cb9923e2..85fc3a3c3 100644 --- a/yt_dlp/extractor/streamcz.py +++ b/yt_dlp/extractor/streamcz.py @@ -1,4 +1,3 @@ -# coding: utf-8 import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py index 6b190bb3b..93c42942c 100644 --- a/yt_dlp/extractor/streamff.py +++ b/yt_dlp/extractor/streamff.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor from ..utils import int_or_none, parse_iso8601 diff --git a/yt_dlp/extractor/streetvoice.py b/yt_dlp/extractor/streetvoice.py index f21681ae7..a32c8bc37 100644 --- a/yt_dlp/extractor/streetvoice.py +++ b/yt_dlp/extractor/streetvoice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/stretchinternet.py b/yt_dlp/extractor/stretchinternet.py index ec08eae55..e438dee11 100644 --- a/yt_dlp/extractor/stretchinternet.py +++ b/yt_dlp/extractor/stretchinternet.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index 0d4a0ce4c..a7c7b0649 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/stv.py b/yt_dlp/extractor/stv.py index ba5661d74..618dc4329 100644 --- a/yt_dlp/extractor/stv.py +++ b/yt_dlp/extractor/stv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( compat_str, diff --git a/yt_dlp/extractor/sunporno.py b/yt_dlp/extractor/sunporno.py index 59b77bf92..19498701c 100644 --- a/yt_dlp/extractor/sunporno.py +++ b/yt_dlp/extractor/sunporno.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/sverigesradio.py b/yt_dlp/extractor/sverigesradio.py index aa0691f0d..4a4b5cf7e 100644 --- a/yt_dlp/extractor/sverigesradio.py +++ b/yt_dlp/extractor/sverigesradio.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 8ca62e370..e0c436b67 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/swrmediathek.py b/yt_dlp/extractor/swrmediathek.py index 0f615979e..deebdd1a4 100644 --- a/yt_dlp/extractor/swrmediathek.py +++ b/yt_dlp/extractor/swrmediathek.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/syfy.py b/yt_dlp/extractor/syfy.py index def7e5a2c..c79d27a0d 100644 --- a/yt_dlp/extractor/syfy.py +++ b/yt_dlp/extractor/syfy.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .adobepass import AdobePassIE from ..utils import ( update_url_query, diff --git a/yt_dlp/extractor/sztvhu.py b/yt_dlp/extractor/sztvhu.py index cfad33146..1cbc2a3cf 100644 --- a/yt_dlp/extractor/sztvhu.py +++ b/yt_dlp/extractor/sztvhu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tagesschau.py b/yt_dlp/extractor/tagesschau.py index 6e03d0a7d..9b9513f07 100644 --- a/yt_dlp/extractor/tagesschau.py +++ b/yt_dlp/extractor/tagesschau.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tass.py b/yt_dlp/extractor/tass.py index 6d336da78..d20dacfc1 100644 --- a/yt_dlp/extractor/tass.py +++ b/yt_dlp/extractor/tass.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tastytrade.py b/yt_dlp/extractor/tastytrade.py deleted file mode 100644 index 7fe96bd5f..000000000 --- a/yt_dlp/extractor/tastytrade.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import unicode_literals - -from .common import InfoExtractor -from .ooyala import OoyalaIE - - -class TastyTradeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tastytrade\.com/tt/shows/[^/]+/episodes/(?P<id>[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://www.tastytrade.com/tt/shows/market-measures/episodes/correlation-in-short-volatility-06-28-2017', - 'info_dict': { - 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM', - 'ext': 'mp4', - 'title': 'A History of Teaming', - 'description': 'md5:2a9033db8da81f2edffa4c99888140b3', - 'duration': 422.255, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Ooyala'], - }, { - 'url': 'https://www.tastytrade.com/tt/shows/daily-dose/episodes/daily-dose-06-30-2017', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - ooyala_code = self._search_regex( - r'data-media-id=(["\'])(?P<code>(?:(?!\1).)+)\1', - webpage, 'ooyala code', group='code') - - info = self._search_json_ld(webpage, display_id, fatal=False) - info.update({ - '_type': 'url_transparent', - 'ie_key': OoyalaIE.ie_key(), - 'url': 'ooyala:%s' % ooyala_code, - 'display_id': display_id, - }) - return info diff --git a/yt_dlp/extractor/tbs.py b/yt_dlp/extractor/tbs.py index c7d62ff4e..808c6c73d 100644 --- a/yt_dlp/extractor/tbs.py +++ b/yt_dlp/extractor/tbs.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/tdslifeway.py b/yt_dlp/extractor/tdslifeway.py index 101c6ee31..3623a68c8 100644 --- a/yt_dlp/extractor/tdslifeway.py +++ b/yt_dlp/extractor/tdslifeway.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachable.py b/yt_dlp/extractor/teachable.py index 232eaa521..e480d7610 100644 --- a/yt_dlp/extractor/teachable.py +++ b/yt_dlp/extractor/teachable.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachertube.py b/yt_dlp/extractor/teachertube.py index e22f0114c..2bf836abd 100644 --- a/yt_dlp/extractor/teachertube.py +++ b/yt_dlp/extractor/teachertube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/teachingchannel.py b/yt_dlp/extractor/teachingchannel.py index 624cdb3ad..275f6d1f9 100644 --- a/yt_dlp/extractor/teachingchannel.py +++ b/yt_dlp/extractor/teachingchannel.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/teamcoco.py b/yt_dlp/extractor/teamcoco.py index 5793b711f..840702ed9 100644 --- a/yt_dlp/extractor/teamcoco.py +++ b/yt_dlp/extractor/teamcoco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .turner import TurnerBaseIE diff --git a/yt_dlp/extractor/teamtreehouse.py b/yt_dlp/extractor/teamtreehouse.py index 64522ec4c..dd802db5b 100644 --- a/yt_dlp/extractor/teamtreehouse.py +++ b/yt_dlp/extractor/teamtreehouse.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/techtalks.py b/yt_dlp/extractor/techtalks.py index 78f07319b..d37de360b 100644 --- a/yt_dlp/extractor/techtalks.py +++ b/yt_dlp/extractor/techtalks.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tele13.py b/yt_dlp/extractor/tele13.py index f8a27550e..8e35bc85f 100644 --- a/yt_dlp/extractor/tele13.py +++ b/yt_dlp/extractor/tele13.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE from ..utils import ( diff --git a/yt_dlp/extractor/tele5.py b/yt_dlp/extractor/tele5.py index c7beee153..58d343b44 100644 --- a/yt_dlp/extractor/tele5.py +++ b/yt_dlp/extractor/tele5.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .dplay import DPlayIE from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/telebruxelles.py b/yt_dlp/extractor/telebruxelles.py index 9e8c89bd6..8d87b6ec1 100644 --- a/yt_dlp/extractor/telebruxelles.py +++ b/yt_dlp/extractor/telebruxelles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telecinco.py b/yt_dlp/extractor/telecinco.py index eecd6a5c9..a9c0755f4 100644 --- a/yt_dlp/extractor/telecinco.py +++ b/yt_dlp/extractor/telecinco.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/telegraaf.py b/yt_dlp/extractor/telegraaf.py index 2dc020537..bc9a8d608 100644 --- a/yt_dlp/extractor/telegraaf.py +++ b/yt_dlp/extractor/telegraaf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/telegram.py b/yt_dlp/extractor/telegram.py index 2dfa261e9..bb9ca8c45 100644 --- a/yt_dlp/extractor/telegram.py +++ b/yt_dlp/extractor/telegram.py @@ -1,4 +1,5 @@ from .common import InfoExtractor +from ..utils import clean_html, get_element_by_class class TelegramEmbedIE(InfoExtractor): @@ -17,8 +18,8 @@ class TelegramEmbedIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - webpage_embed = self._download_webpage(f'{url}?embed=1', video_id) + webpage = self._download_webpage(url, video_id, query={'embed': 0}) + webpage_embed = self._download_webpage(url, video_id, query={'embed': 1}, note='Downloading ermbed page') formats = [{ 'url': self._proto_relative_url(self._search_regex( @@ -29,9 +30,12 @@ class TelegramEmbedIE(InfoExtractor): return { 'id': video_id, - 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, fatal=True), - 'description': self._html_search_meta(['og:description', 'twitter:description'], webpage, fatal=True), - 'thumbnail': self._search_regex(r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', - webpage_embed, 'thumbnail'), + 'title': self._html_search_meta(['og:title', 'twitter:title'], webpage, default=None), + 'description': self._html_search_meta( + ['og:description', 'twitter:description'], webpage, + default=clean_html(get_element_by_class('tgme_widget_message_text', webpage_embed))), + 'thumbnail': self._search_regex( + r'tgme_widget_message_video_thumb"[^>]+background-image:url\(\'([^\']+)\'\)', + webpage_embed, 'thumbnail'), 'formats': formats, } diff --git a/yt_dlp/extractor/telemb.py b/yt_dlp/extractor/telemb.py index ac2d603b6..7e444c0d0 100644 --- a/yt_dlp/extractor/telemb.py +++ b/yt_dlp/extractor/telemb.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index ebcecf55f..64954b8f1 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/telequebec.py b/yt_dlp/extractor/telequebec.py index 4bef2fe76..e89137269 100644 --- a/yt_dlp/extractor/telequebec.py +++ b/yt_dlp/extractor/telequebec.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/teletask.py b/yt_dlp/extractor/teletask.py index b9e2ef8ca..a73dd68fb 100644 --- a/yt_dlp/extractor/teletask.py +++ b/yt_dlp/extractor/teletask.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/telewebion.py b/yt_dlp/extractor/telewebion.py index 1207b1a1b..550549f05 100644 --- a/yt_dlp/extractor/telewebion.py +++ b/yt_dlp/extractor/telewebion.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 58fdecebe..80acaf190 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index 5c7b54531..fc4781447 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from datetime import datetime import base64 diff --git a/yt_dlp/extractor/testurl.py b/yt_dlp/extractor/testurl.py index 8bc512a9c..32cae429e 100644 --- a/yt_dlp/extractor/testurl.py +++ b/yt_dlp/extractor/testurl.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -10,55 +8,36 @@ class TestURLIE(InfoExtractor): """ Allows addressing of the test cases as test:yout.*be_1 """ IE_DESC = False # Do not list - _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$' + _VALID_URL = r'test(?:url)?:(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?$' def _real_extract(self, url): - from ..extractor import gen_extractors + from ..extractor import gen_extractor_classes - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - extractor_id = mobj.group('extractor') - all_extractors = gen_extractors() + extractor_id, num = self._match_valid_url(url).group('extractor', 'num') rex = re.compile(extractor_id, flags=re.IGNORECASE) - matching_extractors = [ - e for e in all_extractors if rex.search(e.IE_NAME)] + matching_extractors = [e for e in gen_extractor_classes() if rex.search(e.IE_NAME)] if len(matching_extractors) == 0: - raise ExtractorError( - 'No extractors matching %r found' % extractor_id, - expected=True) + raise ExtractorError('No extractors matching {extractor_id!r} found', expected=True) elif len(matching_extractors) > 1: - # Is it obvious which one to pick? - try: + try: # Check for exact match extractor = next( ie for ie in matching_extractors if ie.IE_NAME.lower() == extractor_id.lower()) except StopIteration: raise ExtractorError( - ('Found multiple matching extractors: %s' % - ' '.join(ie.IE_NAME for ie in matching_extractors)), + 'Found multiple matching extractors: %s' % ' '.join(ie.IE_NAME for ie in matching_extractors), expected=True) else: extractor = matching_extractors[0] - num_str = mobj.group('num') - num = int(num_str) if num_str else 0 - - testcases = [] - t = getattr(extractor, '_TEST', None) - if t: - testcases.append(t) - testcases.extend(getattr(extractor, '_TESTS', [])) - + testcases = tuple(extractor.get_testcases(True)) try: - tc = testcases[num] + tc = testcases[int(num or 0)] except IndexError: raise ExtractorError( - ('Test case %d not found, got only %d tests' % - (num, len(testcases))), - expected=True) - - self.to_screen('Test URL: %s' % tc['url']) + f'Test case {num or 0} not found, got only {len(testcases)} tests', expected=True) - return self.url_result(tc['url'], video_id=video_id) + self.to_screen(f'Test URL: {tc["url"]}') + return self.url_result(tc['url']) diff --git a/yt_dlp/extractor/tf1.py b/yt_dlp/extractor/tf1.py index 44785bc65..4cf0322b3 100644 --- a/yt_dlp/extractor/tf1.py +++ b/yt_dlp/extractor/tf1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/tfo.py b/yt_dlp/extractor/tfo.py index 0631cb7ab..a24789cb3 100644 --- a/yt_dlp/extractor/tfo.py +++ b/yt_dlp/extractor/tfo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/theintercept.py b/yt_dlp/extractor/theintercept.py index f23b58713..a991a4dfd 100644 --- a/yt_dlp/extractor/theintercept.py +++ b/yt_dlp/extractor/theintercept.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py index c2729f12d..bf7efc013 100644 --- a/yt_dlp/extractor/theplatform.py +++ b/yt_dlp/extractor/theplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time import hmac diff --git a/yt_dlp/extractor/thestar.py b/yt_dlp/extractor/thestar.py index c3f118894..293c34c06 100644 --- a/yt_dlp/extractor/thestar.py +++ b/yt_dlp/extractor/thestar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/thesun.py b/yt_dlp/extractor/thesun.py index 15d4a6932..ba5848283 100644 --- a/yt_dlp/extractor/thesun.py +++ b/yt_dlp/extractor/thesun.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/theta.py b/yt_dlp/extractor/theta.py index 8b6d70a9f..3ec6b9711 100644 --- a/yt_dlp/extractor/theta.py +++ b/yt_dlp/extractor/theta.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import try_get diff --git a/yt_dlp/extractor/theweatherchannel.py b/yt_dlp/extractor/theweatherchannel.py index 9e506c9e0..9e94cd1ea 100644 --- a/yt_dlp/extractor/theweatherchannel.py +++ b/yt_dlp/extractor/theweatherchannel.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .theplatform import ThePlatformIE diff --git a/yt_dlp/extractor/thisamericanlife.py b/yt_dlp/extractor/thisamericanlife.py index 91e45f2c3..9a3d79840 100644 --- a/yt_dlp/extractor/thisamericanlife.py +++ b/yt_dlp/extractor/thisamericanlife.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/thisav.py b/yt_dlp/extractor/thisav.py index 6bb00b3ab..b1cd57d1f 100644 --- a/yt_dlp/extractor/thisav.py +++ b/yt_dlp/extractor/thisav.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import remove_end diff --git a/yt_dlp/extractor/thisoldhouse.py b/yt_dlp/extractor/thisoldhouse.py index 8a1d17311..55b6413ae 100644 --- a/yt_dlp/extractor/thisoldhouse.py +++ b/yt_dlp/extractor/thisoldhouse.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import HEADRequest diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index 00a51dccd..1c0baf5ed 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index fe6a9554a..ce28a37c0 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 987b0c43b..4ba993582 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import string diff --git a/yt_dlp/extractor/tinypic.py b/yt_dlp/extractor/tinypic.py index 39056e52e..216208cbd 100644 --- a/yt_dlp/extractor/tinypic.py +++ b/yt_dlp/extractor/tinypic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tmz.py b/yt_dlp/extractor/tmz.py index aee2273b8..ffb30c6b8 100644 --- a/yt_dlp/extractor/tmz.py +++ b/yt_dlp/extractor/tmz.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -21,8 +18,10 @@ class TMZIE(InfoExtractor): "title": "No Charges Against Hillary Clinton? Harvey Says It Ain't Over Yet", "description": "Harvey talks about Director Comeyโs decision not to prosecute Hillary Clinton.", "timestamp": 1467831837, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20160706", + "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2016/07/06/5eea7dc01baa5c2e83eb06930c170e46_xl.jpg", + "duration": 772.0, }, }, { @@ -33,8 +32,10 @@ class TMZIE(InfoExtractor): "title": "Angry Bagel Shop Guy Says He Doesn't Trust Women", "description": "The enraged man who went viral for ranting about women on dating sites before getting ragdolled in a bagel shop is defending his misogyny ... he says it's women's fault in the first place.", "timestamp": 1562889485, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20190711", + "thumbnail": "https://imagez.tmz.com/image/a8/4by3/2019/07/12/a85480d27b2f50a7bfea2322151d67a5_xl.jpg", + "duration": 123.0, }, }, { @@ -46,8 +47,10 @@ class TMZIE(InfoExtractor): "title": "Bobby Brown Tells Crowd ... Bobbi Kristina is Awake", "description": 'Bobby Brown stunned his audience during a concert Saturday night, when he told the crowd, "Bobbi is awake. She\'s watching me."', "timestamp": 1429467813, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20150419", + "duration": 29.0, + "thumbnail": "https://imagez.tmz.com/image/15/4by3/2015/04/20/1539c7ae136359fc979236fa6a9449dd_xl.jpg", }, }, { @@ -59,8 +62,10 @@ class TMZIE(InfoExtractor): "description": "Patti LaBelle made it known loud and clear last night ... NO " "ONE gets on her stage and strips down.", "timestamp": 1442683746, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20150919", + "duration": 104.0, + "thumbnail": "https://imagez.tmz.com/image/5e/4by3/2015/09/20/5e57d7575062528082994e18ac3f0f48_xl.jpg", }, }, { @@ -71,8 +76,10 @@ class TMZIE(InfoExtractor): "title": "NBA's Adam Silver -- Blake Griffin's a Great Guy ... He'll Learn from This", "description": "Two pretty parts of this video with NBA Commish Adam Silver.", "timestamp": 1454010989, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20160128", + "duration": 59.0, + "thumbnail": "https://imagez.tmz.com/image/38/4by3/2016/01/29/3856e83e0beb57059ec412122b842fb1_xl.jpg", }, }, { @@ -83,8 +90,10 @@ class TMZIE(InfoExtractor): "title": "Trump Star Vandal -- I'm Not Afraid of Donald or the Cops!", "description": "James Otis is the the guy who took a pickaxe to Donald Trump's star on the Walk of Fame, and he tells TMZ .. he's ready and willing to go to jail for the crime.", "timestamp": 1477500095, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20161026", + "thumbnail": "https://imagez.tmz.com/image/0d/4by3/2016/10/27/0d904814d4a75dcf9cc3b8cfd1edc1a3_xl.jpg", + "duration": 128.0, }, }, { @@ -99,8 +108,10 @@ class TMZIE(InfoExtractor): "swinging their billy clubs at both Anti-Fascist and Pro-Trump " "demonstrators.", "timestamp": 1604182772, - "uploader": "{'@type': 'Person', 'name': 'TMZ Staff'}", + "uploader": "TMZ Staff", "upload_date": "20201031", + "duration": 96.0, + "thumbnail": "https://imagez.tmz.com/image/f3/4by3/2020/10/31/f37bd5a8aef84497866f425130c58be3_xl.jpg", }, }, { @@ -111,8 +122,23 @@ class TMZIE(InfoExtractor): "title": "SICK LAMBO GERVONTA DAVIS IN HIS NEW RIDE RIGHT AFTER KO AFTER LEO EsNews Boxing", "uploader": "ESNEWS", "description": "md5:49675bc58883ccf80474b8aa701e1064", - "upload_date": "20201101", + "upload_date": "20201102", "uploader_id": "ESNEWS", + "uploader_url": "http://www.youtube.com/user/ESNEWS", + "like_count": int, + "channel_id": "UCI-Oq7oFGakzSzHFlTtsUsQ", + "channel": "ESNEWS", + "view_count": int, + "duration": 225, + "live_status": "not_live", + "thumbnail": "https://i.ytimg.com/vi_webp/Dddb6IGe-ws/maxresdefault.webp", + "channel_url": "https://www.youtube.com/channel/UCI-Oq7oFGakzSzHFlTtsUsQ", + "channel_follower_count": int, + "playable_in_embed": True, + "categories": ["Sports"], + "age_limit": 0, + "tags": "count:10", + "availability": "public", }, }, { @@ -120,12 +146,20 @@ class TMZIE(InfoExtractor): "info_dict": { "id": "1329450007125225473", "ext": "mp4", - "title": "TheMacLife - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.", - "uploader": "TheMacLife", + "title": "The Mac Life - BREAKING: Conor McGregor (@thenotoriousmma) has signed his bout agreement for his rematch with Dustin Poirier for January 23.", + "uploader": "The Mac Life", "description": "md5:56e6009bbc3d12498e10d08a8e1f1c69", "upload_date": "20201119", - "uploader_id": "Maclifeofficial", + "uploader_id": "TheMacLife", "timestamp": 1605800556, + "thumbnail": "https://pbs.twimg.com/media/EnMmfT8XYAExgxJ.jpg?name=small", + "like_count": int, + "duration": 11.812, + "uploader_url": "https://twitter.com/TheMacLife", + "age_limit": 0, + "repost_count": int, + "tags": [], + "comment_count": int, }, }, ] diff --git a/yt_dlp/extractor/tnaflix.py b/yt_dlp/extractor/tnaflix.py index d7617f708..6b766f3cc 100644 --- a/yt_dlp/extractor/tnaflix.py +++ b/yt_dlp/extractor/tnaflix.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/toggle.py b/yt_dlp/extractor/toggle.py index eb873495f..51a51d84b 100644 --- a/yt_dlp/extractor/toggle.py +++ b/yt_dlp/extractor/toggle.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/toggo.py b/yt_dlp/extractor/toggo.py index da5f0c4d1..9f98cfaf0 100644 --- a/yt_dlp/extractor/toggo.py +++ b/yt_dlp/extractor/toggo.py @@ -4,7 +4,7 @@ from ..utils import int_or_none, parse_qs class ToggoIE(InfoExtractor): IE_NAME = 'toggo' - _VALID_URL = r'https?://(?:www\.)?toggo\.de/[\w-]+/folge/(?P<id>[\w-]+)' + _VALID_URL = r'https?://(?:www\.)?toggo\.de/(?:toggolino/)?[^/?#]+/folge/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.toggo.de/weihnachtsmann--co-kg/folge/ein-geschenk-fuer-zwei', 'info_dict': { @@ -27,6 +27,12 @@ class ToggoIE(InfoExtractor): 'upload_date': '20200217', }, 'params': {'skip_download': True}, + }, { + 'url': 'https://www.toggo.de/grizzy--die-lemminge/folge/ab-durch-die-wand-vogelfrei-rock\'n\'lemming', + 'only_matching': True, + }, { + 'url': 'https://www.toggo.de/toggolino/paw-patrol/folge/der-wetter-zeppelin-der-chili-kochwettbewerb', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index 579623fed..a30cabb3c 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import re diff --git a/yt_dlp/extractor/tonline.py b/yt_dlp/extractor/tonline.py index 9b6a40db5..720282663 100644 --- a/yt_dlp/extractor/tonline.py +++ b/yt_dlp/extractor/tonline.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none, join_nonempty diff --git a/yt_dlp/extractor/toongoggles.py b/yt_dlp/extractor/toongoggles.py index df13d64c0..1b8fc3acd 100644 --- a/yt_dlp/extractor/toongoggles.py +++ b/yt_dlp/extractor/toongoggles.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/toutv.py b/yt_dlp/extractor/toutv.py index 1d5da1040..349c0bded 100644 --- a/yt_dlp/extractor/toutv.py +++ b/yt_dlp/extractor/toutv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .radiocanada import RadioCanadaIE diff --git a/yt_dlp/extractor/toypics.py b/yt_dlp/extractor/toypics.py index f705a06c9..bc7336186 100644 --- a/yt_dlp/extractor/toypics.py +++ b/yt_dlp/extractor/toypics.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor import re diff --git a/yt_dlp/extractor/traileraddict.py b/yt_dlp/extractor/traileraddict.py index 514f4793e..5c4a138c4 100644 --- a/yt_dlp/extractor/traileraddict.py +++ b/yt_dlp/extractor/traileraddict.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/trilulilu.py b/yt_dlp/extractor/trilulilu.py index a800449e9..fb97be737 100644 --- a/yt_dlp/extractor/trilulilu.py +++ b/yt_dlp/extractor/trilulilu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 65ea13ddb..c049025a3 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -1,8 +1,7 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json +import random +import string from .common import InfoExtractor from ..utils import ( @@ -18,10 +17,20 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} - def _call_api(self, video_id, query=None, data=None): - return self._download_json( - 'https://gql.trovo.live/', video_id, query=query, data=data, - headers={'Accept': 'application/json'}) + def _call_api(self, video_id, data): + if 'persistedQuery' in data.get('extensions', {}): + url = 'https://gql.trovo.live' + else: + url = 'https://api-web.trovo.live/graphql' + + resp = self._download_json( + url, video_id, data=json.dumps([data]).encode(), headers={'Accept': 'application/json'}, + query={ + 'qid': ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)), + })[0] + if 'errors' in resp: + raise ExtractorError(f'Trovo said: {resp["errors"][0]["message"]}') + return resp['data'][data['operationName']] def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} @@ -38,27 +47,14 @@ class TrovoIE(TrovoBaseIE): def _real_extract(self, url): username = self._match_id(url) - live_info = self._call_api(username, query={ - 'query': '''{ - getLiveInfo(params: {userName: "%s"}) { - isLive - programInfo { - coverUrl - id - streamInfo { - desc - playUrl - } - title - } - streamerInfo { - nickName - uid - userName - } - } -}''' % username, - })['data']['getLiveInfo'] + live_info = self._call_api(username, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': username, + }, + }, + }) if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -93,56 +89,61 @@ class TrovoIE(TrovoBaseIE): class TrovoVodIE(TrovoBaseIE): _VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)' _TESTS = [{ - 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', + 'url': 'https://trovo.live/clip/lc-5285890818705062210?ltab=videos', + 'params': {'getcomments': True}, 'info_dict': { - 'id': 'ltv-100095501_100095501_1609596043', + 'id': 'lc-5285890818705062210', 'ext': 'mp4', - 'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!', - 'uploader': 'Exsl', - 'timestamp': 1609640305, - 'upload_date': '20210103', - 'uploader_id': '100095501', - 'duration': 43977, + 'title': 'fatal moaning for a super good๐คฃ๐คฃ', + 'uploader': 'OneTappedYou', + 'timestamp': 1621628019, + 'upload_date': '20210521', + 'uploader_id': '100719456', + 'duration': 31, 'view_count': int, 'like_count': int, 'comment_count': int, - 'comments': 'mincount:8', - 'categories': ['Grand Theft Auto V'], + 'comments': 'mincount:1', + 'categories': ['Call of Duty: Mobile'], + 'uploader_url': 'https://trovo.live/OneTappedYou', + 'thumbnail': r're:^https?://.*\.jpg', }, - 'skip': '404' }, { - 'url': 'https://trovo.live/clip/lc-5285890810184026005', + 'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043', 'only_matching': True, }] def _real_extract(self, url): vid = self._match_id(url) - resp = self._call_api(vid, data=json.dumps([{ - 'query': '''{ - batchGetVodDetailInfo(params: {vids: ["%s"]}) { - VodDetailInfos - } -}''' % vid, - }, { - 'query': '''{ - getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { - commentList { - author { - nickName - uid - } - commentID - content - createdAt - parentID - } - } -}''' % vid, - }]).encode()) - vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] + + # NOTE: It is also possible to extract this info from the Nuxt data on the website, + # however that seems unreliable - sometimes it randomly doesn't return the data, + # at least when using a non-residential IP. + resp = self._call_api(vid, data={ + 'operationName': 'batchGetVodDetailInfo', + 'variables': { + 'params': { + 'vids': [vid], + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'ceae0355d66476e21a1dd8e8af9f68de95b4019da2cda8b177c9a2255dad31d0', + }, + }, + }) + vod_detail_info = resp['VodDetailInfos'][vid] vod_info = vod_detail_info['vodInfo'] title = vod_info['title'] + if try_get(vod_info, lambda x: x['playbackRights']['playbackRights'] != 'Normal'): + playback_rights_setting = vod_info['playbackRights']['playbackRightsSetting'] + if playback_rights_setting == 'SubscriberOnly': + raise ExtractorError('This video is only available for subscribers', expected=True) + else: + raise ExtractorError(f'This video is not available ({playback_rights_setting})', expected=True) + language = vod_info.get('languageName') formats = [] for play_info in (vod_info.get('playInfos') or []): @@ -166,23 +167,6 @@ class TrovoVodIE(TrovoBaseIE): category = vod_info.get('categoryName') get_count = lambda x: int_or_none(vod_info.get(x + 'Num')) - comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or [] - comments = [] - for comment in comment_list: - content = comment.get('content') - if not content: - continue - author = comment.get('author') or {} - parent = comment.get('parentID') - comments.append({ - 'author': author.get('nickName'), - 'author_id': str_or_none(author.get('uid')), - 'id': str_or_none(comment.get('commentID')), - 'text': content, - 'timestamp': int_or_none(comment.get('createdAt')), - 'parent': 'root' if parent == 0 else str_or_none(parent), - }) - info = { 'id': vid, 'title': title, @@ -193,12 +177,51 @@ class TrovoVodIE(TrovoBaseIE): 'view_count': get_count('watch'), 'like_count': get_count('like'), 'comment_count': get_count('comment'), - 'comments': comments, 'categories': [category] if category else None, + '__post_extractor': self.extract_comments(vid), } info.update(self._extract_streamer_info(vod_detail_info)) return info + def _get_comments(self, vid): + for page in itertools.count(1): + comments_json = self._call_api(vid, data={ + 'operationName': 'getCommentList', + 'variables': { + 'params': { + 'appInfo': { + 'postID': vid, + }, + 'preview': {}, + 'pageSize': 99, + 'page': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'be8e5f9522ddac7f7c604c0d284fd22481813263580849926c4c66fb767eed25', + }, + }, + }) + for comment in comments_json['commentList']: + content = comment.get('content') + if not content: + continue + author = comment.get('author') or {} + parent = comment.get('parentID') + yield { + 'author': author.get('nickName'), + 'author_id': str_or_none(author.get('uid')), + 'id': str_or_none(comment.get('commentID')), + 'text': content, + 'timestamp': int_or_none(comment.get('createdAt')), + 'parent': 'root' if parent == 0 else str_or_none(parent), + } + + if comments_json['lastPage']: + break + class TrovoChannelBaseIE(TrovoBaseIE): def _get_vod_json(self, page, uid): @@ -218,9 +241,15 @@ class TrovoChannelBaseIE(TrovoBaseIE): def _real_extract(self, url): id = self._match_id(url) - uid = str(self._call_api(id, query={ - 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id - })['data']['getLiveInfo']['streamerInfo']['uid']) + live_info = self._call_api(id, data={ + 'operationName': 'live_LiveReaderService_GetLiveInfo', + 'variables': { + 'params': { + 'userName': id, + }, + }, + }) + uid = str(live_info['streamerInfo']['uid']) return self.playlist_result(self._entries(uid), playlist_id=uid) @@ -236,13 +265,25 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelLtvVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s}){hasMore,vodInfos{vid}}}' _TYPE = 'video' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelLtvVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelLtvVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': '78fe32792005eab7e922cafcdad9c56bed8bbc5f5df3c7cd24fcb84a744f5f78', + }, + }, + }) class TrovoChannelClipIE(TrovoChannelBaseIE): @@ -257,10 +298,22 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): }, }] - _QUERY = '{getChannelClipVideoInfos(params:{pageSize:99,currPage:%d,channelID:%s,albumType:VOD_CLIP_ALBUM_TYPE_LATEST}){hasMore,vodInfos{vid}}}' _TYPE = 'clip' def _get_vod_json(self, page, uid): - return self._call_api(uid, query={ - 'query': self._QUERY % (page, uid) - })['data']['getChannelClipVideoInfos'] + return self._call_api(uid, data={ + 'operationName': 'getChannelClipVideoInfos', + 'variables': { + 'params': { + 'channelID': int(uid), + 'pageSize': 99, + 'currPage': page, + }, + }, + 'extensions': { + 'persistedQuery': { + 'version': 1, + 'sha256Hash': 'e7924bfe20059b5c75fc8ff9e7929f43635681a7bdf3befa01072ed22c8eff31', + }, + }, + }) diff --git a/yt_dlp/extractor/trueid.py b/yt_dlp/extractor/trueid.py index fc98303ab..696343627 100644 --- a/yt_dlp/extractor/trueid.py +++ b/yt_dlp/extractor/trueid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_HTTPError from ..utils import ( diff --git a/yt_dlp/extractor/trunews.py b/yt_dlp/extractor/trunews.py index cca5b5ceb..d5ce86ece 100644 --- a/yt_dlp/extractor/trunews.py +++ b/yt_dlp/extractor/trunews.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/trutv.py b/yt_dlp/extractor/trutv.py index c09ff897c..ea0f2f40e 100644 --- a/yt_dlp/extractor/trutv.py +++ b/yt_dlp/extractor/trutv.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .turner import TurnerBaseIE from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tube8.py b/yt_dlp/extractor/tube8.py index db93b0182..32e80d9d2 100644 --- a/yt_dlp/extractor/tube8.py +++ b/yt_dlp/extractor/tube8.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from ..utils import ( diff --git a/yt_dlp/extractor/tubitv.py b/yt_dlp/extractor/tubitv.py index 31feb9a70..9c8e1ac87 100644 --- a/yt_dlp/extractor/tubitv.py +++ b/yt_dlp/extractor/tubitv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tudou.py b/yt_dlp/extractor/tudou.py deleted file mode 100644 index 7421378a8..000000000 --- a/yt_dlp/extractor/tudou.py +++ /dev/null @@ -1,49 +0,0 @@ -# coding: utf-8 - -from __future__ import unicode_literals - -from .common import InfoExtractor - - -class TudouPlaylistIE(InfoExtractor): - IE_NAME = 'tudou:playlist' - _VALID_URL = r'https?://(?:www\.)?tudou\.com/listplay/(?P<id>[\w-]{11})\.html' - _TESTS = [{ - 'url': 'http://www.tudou.com/listplay/zzdE77v6Mmo.html', - 'info_dict': { - 'id': 'zzdE77v6Mmo', - }, - 'playlist_mincount': 209, - }] - - def _real_extract(self, url): - playlist_id = self._match_id(url) - playlist_data = self._download_json( - 'http://www.tudou.com/tvp/plist.action?lcode=%s' % playlist_id, playlist_id) - entries = [self.url_result( - 'http://www.tudou.com/programs/view/%s' % item['icode'], - 'Tudou', item['icode'], - item['kw']) for item in playlist_data['items']] - return self.playlist_result(entries, playlist_id) - - -class TudouAlbumIE(InfoExtractor): - IE_NAME = 'tudou:album' - _VALID_URL = r'https?://(?:www\.)?tudou\.com/album(?:cover|play)/(?P<id>[\w-]{11})' - _TESTS = [{ - 'url': 'http://www.tudou.com/albumplay/v5qckFJvNJg.html', - 'info_dict': { - 'id': 'v5qckFJvNJg', - }, - 'playlist_mincount': 45, - }] - - def _real_extract(self, url): - album_id = self._match_id(url) - album_data = self._download_json( - 'http://www.tudou.com/tvp/alist.action?acode=%s' % album_id, album_id) - entries = [self.url_result( - 'http://www.tudou.com/programs/view/%s' % item['icode'], - 'Tudou', item['icode'], - item['kw']) for item in album_data['items']] - return self.playlist_result(entries, album_id) diff --git a/yt_dlp/extractor/tumblr.py b/yt_dlp/extractor/tumblr.py index 8086f613d..5d6615100 100644 --- a/yt_dlp/extractor/tumblr.py +++ b/yt_dlp/extractor/tumblr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py index 7e51de89e..e3d3f2a96 100644 --- a/yt_dlp/extractor/tunein.py +++ b/yt_dlp/extractor/tunein.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tunepk.py b/yt_dlp/extractor/tunepk.py index 9d42651ce..2973d15ec 100644 --- a/yt_dlp/extractor/tunepk.py +++ b/yt_dlp/extractor/tunepk.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/turbo.py b/yt_dlp/extractor/turbo.py index f6bbf2529..e3f8941c4 100644 --- a/yt_dlp/extractor/turbo.py +++ b/yt_dlp/extractor/turbo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 519dc323c..fae8b51e7 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .adobepass import AdobePassIE @@ -144,7 +141,7 @@ class TurnerBaseIE(AdobePassIE): m3u8_id=format_id or 'hls', fatal=False) if '/secure/' in video_url and '?hdnea=' in video_url: for f in m3u8_formats: - f['_ffmpeg_args'] = ['-seekable', '0'] + f['downloader_options'] = {'ffmpeg_args': ['-seekable', '0']} formats.extend(m3u8_formats) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index 977da30fe..391baa6c5 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tv2dk.py b/yt_dlp/extractor/tv2dk.py index ec5cbdf03..0af286312 100644 --- a/yt_dlp/extractor/tv2dk.py +++ b/yt_dlp/extractor/tv2dk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/tv2hu.py b/yt_dlp/extractor/tv2hu.py index f2104358b..6ac07716b 100644 --- a/yt_dlp/extractor/tv2hu.py +++ b/yt_dlp/extractor/tv2hu.py @@ -1,6 +1,4 @@ # encoding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( traverse_obj, diff --git a/yt_dlp/extractor/tv4.py b/yt_dlp/extractor/tv4.py index 4043e6366..e8cdd5c8c 100644 --- a/yt_dlp/extractor/tv4.py +++ b/yt_dlp/extractor/tv4.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tv5mondeplus.py b/yt_dlp/extractor/tv5mondeplus.py index a0832d28f..d449cdc04 100644 --- a/yt_dlp/extractor/tv5mondeplus.py +++ b/yt_dlp/extractor/tv5mondeplus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/tv5unis.py b/yt_dlp/extractor/tv5unis.py index 398b85db5..978255b17 100644 --- a/yt_dlp/extractor/tv5unis.py +++ b/yt_dlp/extractor/tv5unis.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tva.py b/yt_dlp/extractor/tva.py index 52a4ddf32..9afe23328 100644 --- a/yt_dlp/extractor/tva.py +++ b/yt_dlp/extractor/tva.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/tvanouvelles.py b/yt_dlp/extractor/tvanouvelles.py index 1086176a2..b9f5e110e 100644 --- a/yt_dlp/extractor/tvanouvelles.py +++ b/yt_dlp/extractor/tvanouvelles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvc.py b/yt_dlp/extractor/tvc.py index 008f64cc2..4ccc8f522 100644 --- a/yt_dlp/extractor/tvc.py +++ b/yt_dlp/extractor/tvc.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index f23af1f14..b04575bd5 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,11 +1,10 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, + join_nonempty, smuggle_url, str_or_none, + strip_or_none, traverse_obj, ) @@ -14,19 +13,16 @@ class TVerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/episodes/ephss8yveb', + 'url': 'https://tver.jp/episodes/ep83nf3w4p', 'info_dict': { - 'title': '#44ใๆ็ใจๅคๆฎตใจๅบไธปใซใณใฃใใใใชใขใฆใชใใใใฆใใๅบใ2ๆ้SP', - 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', - }, - 'add_ie': ['BrightcoveNew'], - }, { - 'skip': 'videos are only available for 7 days', - 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', - 'info_dict': { - # sorry but this is "correct" - 'title': '4ๆ11ๆฅ(ๆ)23ๆ06ๅ ~ ๆพ้ไบๅฎ', - 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + 'title': 'ๅฎถไบใคใญใฆ!!! ๅฃฒใๅ ดๅธญๅทปใฎใใผใบSP๏ผ่ฒกๅ็ด่ฆรๆฃฎๆณ่ฆชๅญใฎ่ฑๆฑไบฌๆฎใใๅฏ็๏ผ', + 'description': 'md5:dc2c06b6acc23f1e7c730c513737719b', + 'series': 'ๅฎถไบใคใญใฆ!!!', + 'episode': 'ๅฃฒใๅ ดๅธญๅทปใฎใใผใบSP๏ผ่ฒกๅ็ด่ฆรๆฃฎๆณ่ฆชๅญใฎ่ฑๆฑไบฌๆฎใใๅฏ็๏ผ', + 'alt_title': 'ๅฃฒใๅ ดๅธญๅทปใฎใใผใบSP๏ผ่ฒกๅ็ด่ฆรๆฃฎๆณ่ฆชๅญใฎ่ฑๆฑไบฌๆฎใใๅฏ็๏ผ', + 'channel': 'ใใฌใๆๆฅ', + 'onair_label': '5ๆ3ๆฅ(็ซ)ๆพ้ๅ', + 'ext_title': 'ๅฎถไบใคใญใฆ!!! ๅฃฒใๅ ดๅธญๅทปใฎใใผใบSP๏ผ่ฒกๅ็ด่ฆรๆฃฎๆณ่ฆชๅญใฎ่ฑๆฑไบฌๆฎใใๅฏ็๏ผ ใใฌใๆๆฅ 5ๆ3ๆฅ(็ซ)ๆพ้ๅ', }, 'add_ie': ['BrightcoveNew'], }, { @@ -81,14 +77,26 @@ class TVerIE(InfoExtractor): 'x-tver-platform-type': 'web' }) + additional_content_info = traverse_obj( + additional_info, ('result', 'episode', 'content'), get_all=False) or {} + episode = strip_or_none(additional_content_info.get('title')) + series = str_or_none(additional_content_info.get('seriesTitle')) + title = ( + join_nonempty(series, episode, delim=' ') + or str_or_none(video_info.get('title'))) + provider = str_or_none(additional_content_info.get('productionProviderName')) + onair_label = str_or_none(additional_content_info.get('broadcastDateLabel')) + return { '_type': 'url_transparent', - 'title': str_or_none(video_info.get('title')), + 'title': title, + 'series': series, + 'episode': episode, + # an another title which is considered "full title" for some viewers + 'alt_title': join_nonempty(title, provider, onair_label, delim=' '), + 'channel': provider, 'description': str_or_none(video_info.get('description')), 'url': smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), - 'series': traverse_obj( - additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), - get_all=False), 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/tvigle.py b/yt_dlp/extractor/tvigle.py index aa25ba0dc..cc1d35dc2 100644 --- a/yt_dlp/extractor/tvigle.py +++ b/yt_dlp/extractor/tvigle.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/tvland.py b/yt_dlp/extractor/tvland.py index 9ebf57f74..481d5eb19 100644 --- a/yt_dlp/extractor/tvland.py +++ b/yt_dlp/extractor/tvland.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor # TODO: Remove - Reason not used anymore - Service moved to youtube diff --git a/yt_dlp/extractor/tvn24.py b/yt_dlp/extractor/tvn24.py index de0fb5063..22b605823 100644 --- a/yt_dlp/extractor/tvn24.py +++ b/yt_dlp/extractor/tvn24.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py index aa1e9d923..5820bb4a7 100644 --- a/yt_dlp/extractor/tvnet.py +++ b/yt_dlp/extractor/tvnet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvnoe.py b/yt_dlp/extractor/tvnoe.py index 26a5aeae4..712fbb275 100644 --- a/yt_dlp/extractor/tvnoe.py +++ b/yt_dlp/extractor/tvnoe.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/tvnow.py b/yt_dlp/extractor/tvnow.py index b31818477..4aa558d83 100644 --- a/yt_dlp/extractor/tvnow.py +++ b/yt_dlp/extractor/tvnow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvopengr.py b/yt_dlp/extractor/tvopengr.py index a11cdc6b0..aded261f3 100644 --- a/yt_dlp/extractor/tvopengr.py +++ b/yt_dlp/extractor/tvopengr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 48e2c6e76..69168f655 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import random import re diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index b5dbc5526..f815b5137 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 5970596b2..31d70b6b8 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_HTTPError, diff --git a/yt_dlp/extractor/tweakers.py b/yt_dlp/extractor/tweakers.py index 2b10d9bca..6d1f92bbb 100644 --- a/yt_dlp/extractor/tweakers.py +++ b/yt_dlp/extractor/tweakers.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/twentyfourvideo.py b/yt_dlp/extractor/twentyfourvideo.py index ae19e11e1..baeb85d47 100644 --- a/yt_dlp/extractor/twentyfourvideo.py +++ b/yt_dlp/extractor/twentyfourvideo.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_iso8601, diff --git a/yt_dlp/extractor/twentymin.py b/yt_dlp/extractor/twentymin.py index a42977f39..616c3c36e 100644 --- a/yt_dlp/extractor/twentymin.py +++ b/yt_dlp/extractor/twentymin.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/twentythreevideo.py b/yt_dlp/extractor/twentythreevideo.py index e8cf5a1e9..290c3761e 100644 --- a/yt_dlp/extractor/twentythreevideo.py +++ b/yt_dlp/extractor/twentythreevideo.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 7f3fa0735..0dbb97a36 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -1,11 +1,8 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re from .common import InfoExtractor -from ..downloader.websocket import has_websockets +from ..dependencies import websockets from ..utils import ( clean_html, ExtractorError, @@ -164,7 +161,7 @@ class TwitCastingIE(InfoExtractor): note='Downloading source quality m3u8', headers=self._M3U8_HEADERS, fatal=False)) - if has_websockets: + if websockets: qq = qualities(['base', 'mobilesource', 'main']) streams = traverse_obj(stream_server_data, ('llfmp4', 'streams')) or {} for mode, ws_url in streams.items(): @@ -190,6 +187,7 @@ class TwitCastingIE(InfoExtractor): infodict = { # No problem here since there's only one manifest 'formats': formats, + 'http_headers': self._M3U8_HEADERS, } else: infodict = { diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 10de74c8e..834350d12 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import collections import itertools import json diff --git a/yt_dlp/extractor/twitter.py b/yt_dlp/extractor/twitter.py index 8ccc38e24..af6750333 100644 --- a/yt_dlp/extractor/twitter.py +++ b/yt_dlp/extractor/twitter.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/udemy.py b/yt_dlp/extractor/udemy.py index 235f89713..d35cd0d43 100644 --- a/yt_dlp/extractor/udemy.py +++ b/yt_dlp/extractor/udemy.py @@ -1,11 +1,8 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_kwargs, compat_str, compat_urllib_request, compat_urlparse, @@ -132,7 +129,7 @@ class UdemyIE(InfoExtractor): headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36' kwargs['headers'] = headers ret = super(UdemyIE, self)._download_webpage_handle( - *args, **compat_kwargs(kwargs)) + *args, **kwargs) if not ret: return ret webpage, _ = ret diff --git a/yt_dlp/extractor/udn.py b/yt_dlp/extractor/udn.py index 2c8e5c7b4..4fa74b9e8 100644 --- a/yt_dlp/extractor/udn.py +++ b/yt_dlp/extractor/udn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/ufctv.py b/yt_dlp/extractor/ufctv.py index 3d74ba071..2c1c5e0ff 100644 --- a/yt_dlp/extractor/ufctv.py +++ b/yt_dlp/extractor/ufctv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .imggaming import ImgGamingBaseIE diff --git a/yt_dlp/extractor/ukcolumn.py b/yt_dlp/extractor/ukcolumn.py index d2626f0d3..aade79f20 100644 --- a/yt_dlp/extractor/ukcolumn.py +++ b/yt_dlp/extractor/ukcolumn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from ..utils import ( unescapeHTML, urljoin, diff --git a/yt_dlp/extractor/uktvplay.py b/yt_dlp/extractor/uktvplay.py index f28fd514d..abea07ab5 100644 --- a/yt_dlp/extractor/uktvplay.py +++ b/yt_dlp/extractor/uktvplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py index c1b65d189..e6ed656b9 100644 --- a/yt_dlp/extractor/umg.py +++ b/yt_dlp/extractor/umg.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/unistra.py b/yt_dlp/extractor/unistra.py index 685d74f35..083c87209 100644 --- a/yt_dlp/extractor/unistra.py +++ b/yt_dlp/extractor/unistra.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/unity.py b/yt_dlp/extractor/unity.py index 73daacf29..d1b0ecbf3 100644 --- a/yt_dlp/extractor/unity.py +++ b/yt_dlp/extractor/unity.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .youtube import YoutubeIE diff --git a/yt_dlp/extractor/uol.py b/yt_dlp/extractor/uol.py index 1baee0b10..e3d9127d8 100644 --- a/yt_dlp/extractor/uol.py +++ b/yt_dlp/extractor/uol.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_str, diff --git a/yt_dlp/extractor/uplynk.py b/yt_dlp/extractor/uplynk.py index 9adb96943..04c96f388 100644 --- a/yt_dlp/extractor/uplynk.py +++ b/yt_dlp/extractor/uplynk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/urort.py b/yt_dlp/extractor/urort.py index 020425fc7..296799d38 100644 --- a/yt_dlp/extractor/urort.py +++ b/yt_dlp/extractor/urort.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import ( compat_urllib_parse, diff --git a/yt_dlp/extractor/urplay.py b/yt_dlp/extractor/urplay.py index eb2ab26e1..30bd3dcbf 100644 --- a/yt_dlp/extractor/urplay.py +++ b/yt_dlp/extractor/urplay.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/usanetwork.py b/yt_dlp/extractor/usanetwork.py index d953e460b..d6b58a51c 100644 --- a/yt_dlp/extractor/usanetwork.py +++ b/yt_dlp/extractor/usanetwork.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .nbc import NBCIE diff --git a/yt_dlp/extractor/usatoday.py b/yt_dlp/extractor/usatoday.py index b2103448d..3243f3e3b 100644 --- a/yt_dlp/extractor/usatoday.py +++ b/yt_dlp/extractor/usatoday.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/ustream.py b/yt_dlp/extractor/ustream.py index 4a7a8f879..fff21667a 100644 --- a/yt_dlp/extractor/ustream.py +++ b/yt_dlp/extractor/ustream.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import random import re diff --git a/yt_dlp/extractor/ustudio.py b/yt_dlp/extractor/ustudio.py index 92509d1bf..fd5dad0fc 100644 --- a/yt_dlp/extractor/ustudio.py +++ b/yt_dlp/extractor/ustudio.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 4986635f2..1213ae1bf 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( dict_get, diff --git a/yt_dlp/extractor/varzesh3.py b/yt_dlp/extractor/varzesh3.py index 32655b96d..2c13cbdc0 100644 --- a/yt_dlp/extractor/varzesh3.py +++ b/yt_dlp/extractor/varzesh3.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/vbox7.py b/yt_dlp/extractor/vbox7.py index 8152acefd..76c844cb8 100644 --- a/yt_dlp/extractor/vbox7.py +++ b/yt_dlp/extractor/vbox7.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/veehd.py b/yt_dlp/extractor/veehd.py index a6dc3c8d8..5ecd88726 100644 --- a/yt_dlp/extractor/veehd.py +++ b/yt_dlp/extractor/veehd.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/veo.py b/yt_dlp/extractor/veo.py index d87bb5b47..25d462a7d 100644 --- a/yt_dlp/extractor/veo.py +++ b/yt_dlp/extractor/veo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( diff --git a/yt_dlp/extractor/veoh.py b/yt_dlp/extractor/veoh.py index d9afb5617..70280ae85 100644 --- a/yt_dlp/extractor/veoh.py +++ b/yt_dlp/extractor/veoh.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/vesti.py b/yt_dlp/extractor/vesti.py index 002047dbf..e9731a941 100644 --- a/yt_dlp/extractor/vesti.py +++ b/yt_dlp/extractor/vesti.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vevo.py b/yt_dlp/extractor/vevo.py index 8a0f29259..bc0187511 100644 --- a/yt_dlp/extractor/vevo.py +++ b/yt_dlp/extractor/vevo.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index 9d6090b08..6564b7b0b 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vh1.py b/yt_dlp/extractor/vh1.py index 862c5c7dc..41b8a4607 100644 --- a/yt_dlp/extractor/vh1.py +++ b/yt_dlp/extractor/vh1.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .mtv import MTVServicesInfoExtractor # TODO Remove - Reason: Outdated Site diff --git a/yt_dlp/extractor/vice.py b/yt_dlp/extractor/vice.py index c8c30559e..abb4a6fa0 100644 --- a/yt_dlp/extractor/vice.py +++ b/yt_dlp/extractor/vice.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import functools import hashlib import json diff --git a/yt_dlp/extractor/vidbit.py b/yt_dlp/extractor/vidbit.py index 91f45b7cc..2813032db 100644 --- a/yt_dlp/extractor/vidbit.py +++ b/yt_dlp/extractor/vidbit.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urlparse from ..utils import ( diff --git a/yt_dlp/extractor/viddler.py b/yt_dlp/extractor/viddler.py index ecc48246f..f491b67ef 100644 --- a/yt_dlp/extractor/viddler.py +++ b/yt_dlp/extractor/viddler.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( float_or_none, diff --git a/yt_dlp/extractor/videa.py b/yt_dlp/extractor/videa.py index 90d705092..251eb78fe 100644 --- a/yt_dlp/extractor/videa.py +++ b/yt_dlp/extractor/videa.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/videocampus_sachsen.py b/yt_dlp/extractor/videocampus_sachsen.py index 96e98573f..906412f08 100644 --- a/yt_dlp/extractor/videocampus_sachsen.py +++ b/yt_dlp/extractor/videocampus_sachsen.py @@ -1,12 +1,70 @@ -# coding: utf-8 +import re + from .common import InfoExtractor +from ..compat import compat_HTTPError +from ..utils import ExtractorError class VideocampusSachsenIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://videocampus\.sachsen\.de/(?: + IE_NAME = 'Vimp' + _INSTANCES = ( + 'campus.demo.vimp.com', + 'corporate.demo.vimp.com', + 'dancehalldatabase.com', + 'educhannel.hs-gesundheit.de', + 'emedia.ls.haw-hamburg.de', + 'globale-evolution.net', + 'k210039.vimp.mivitec.net', + 'media.cmslegal.com', + 'media.hs-furtwangen.de', + 'media.hwr-berlin.de', + 'mediathek.dkfz.de', + 'mediathek.htw-berlin.de', + 'mediathek.polizei-bw.de', + 'medien.hs-merseburg.de', + 'mportal.europa-uni.de', + 'pacific.demo.vimp.com', + 'slctv.com', + 'tube.isbonline.cn', + 'univideo.uni-kassel.de', + 'ursula2.genetics.emory.edu', + 'ursulablicklevideoarchiv.com', + 'v.agrarumweltpaedagogik.at', + 'video.eplay-tv.de', + 'video.fh-dortmund.de', + 'video.hs-offenburg.de', + 'video.hs-pforzheim.de', + 'video.hspv.nrw.de', + 'video.irtshdf.fr', + 'video.pareygo.de', + 'video.tu-freiberg.de', + 'videocampus.sachsen.de', + 'videoportal.uni-freiburg.de', + 'videoportal.vm.uni-freiburg.de', + 'videos.duoc.cl', + 'videos.uni-paderborn.de', + 'vimp-bemus.udk-berlin.de', + 'vimp.aekwl.de', + 'vimp.hs-mittweida.de', + 'vimp.oth-regensburg.de', + 'vimp.ph-heidelberg.de', + 'vimp.sma-events.com', + 'vimp.weka-fachmedien.de', + 'webtv.univ-montp3.fr', + 'www.b-tu.de/media', + 'www.bigcitytv.de', + 'www.cad-videos.de', + 'www.fh-bielefeld.de/medienportal', + 'www.orvovideo.com', + 'www.rwe.tv', + 'www.wenglor-media.com', + 'www2.univ-sba.dz', + ) + _VALID_URL = r'''(?x)https?://(?P<host>%s)/(?: m/(?P<tmp_id>[0-9a-f]+)| - (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32}) - )''' + (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{32})| + media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{32}&?) + )''' % ('|'.join(map(re.escape, _INSTANCES))) _TESTS = [ { @@ -14,6 +72,7 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': 'e6b9349905c1628631f175712250f2a1', 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7', + 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7', 'ext': 'mp4', }, }, @@ -22,6 +81,7 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': 'fc99c527e4205b121cb7c74433469262', 'title': 'Was ist selbstgesteuertes Lernen?', + 'description': 'md5:196aa3b0509a526db62f84679522a2f5', 'display_id': 'Was-ist-selbstgesteuertes-Lernen', 'ext': 'mp4', }, @@ -31,43 +91,32 @@ class VideocampusSachsenIE(InfoExtractor): 'info_dict': { 'id': '09d4ed029002eb1bdda610f1103dd54c', 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht', + 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58', 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht', 'ext': 'mp4', }, }, - ] - - def _real_extract(self, url): - video_id, tmp_id, display_id = self._match_valid_url(url).group('id', 'tmp_id', 'display_id') - webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' - - if not tmp_id: - video_id = self._html_search_regex( - r'src="https?://videocampus\.sachsen\.de/media/embed\?key=([0-9a-f]+)&', - webpage, 'video_id') - - title = self._html_search_regex( - (r'<h1>(?P<content>[^<]+)</h1>', *self._meta_regex('title')), - webpage, 'title', group='content', fatal=False) - - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', - video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': title, - 'display_id': display_id, - 'formats': formats, - 'subtitles': subtitles - } - - -class VideocampusSachsenEmbedIE(InfoExtractor): - _VALID_URL = r'https?://videocampus.sachsen.de/media/embed\?key=(?P<id>[0-9a-f]+)' - - _TESTS = [ + { + 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3', + 'info_dict': { + 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4', + 'id': '0183356e41af7bfb83d7667b20d9b6a3', + 'title': 'Prรฉsentation de la Facultรฉ de droit et des sciences politiques - Journรฉe portes ouvertes 2021/22', + 'description': 'md5:508958bd93e0ca002ac731d94182a54f', + 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122', + 'ext': 'mp4', + } + }, + { + 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c', + 'info_dict': { + 'id': 'c8816f1cc942c12b6cce57c835cffd7c', + 'title': 'Preisverleihung ยปProdukte des Jahres 2022ยซ', + 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3', + 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022', + 'ext': 'mp4', + }, + }, { 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262', 'info_dict': { @@ -79,18 +128,41 @@ class VideocampusSachsenEmbedIE(InfoExtractor): ] def _real_extract(self, url): - video_id = self._match_id(url) + host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group( + 'host', 'id', 'tmp_id', 'display_id', 'embed_id') + webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or '' + + if not video_id: + video_id = embed_id or self._html_search_regex( + rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?', + webpage, 'video_id') - webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<img[^>]*title="([^"<]+)"', webpage, 'title', fatal=False) - formats, subtitles = self._extract_m3u8_formats_and_subtitles( - f'https://videocampus.sachsen.de/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', - video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + if not (display_id or tmp_id): + # Title, description from embedded page's meta wouldn't be correct + title = self._html_search_regex(r'<img[^>]* title="([^"<]+)"', webpage, 'title', fatal=False) + description = None + else: + title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False) + description = self._html_search_meta( + ('og:description', 'twitter:description', 'description'), webpage, default=None) + + formats, subtitles = [], {} + try: + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8', + video_id, 'mp4', m3u8_id='hls', fatal=True) + except ExtractorError as e: + if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500): + raise + + formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'}) self._sort_formats(formats) return { 'id': video_id, 'title': title, + 'description': description, + 'display_id': display_id, 'formats': formats, - 'subtitles': subtitles, + 'subtitles': subtitles } diff --git a/yt_dlp/extractor/videodetective.py b/yt_dlp/extractor/videodetective.py index fe70db713..7928a41c2 100644 --- a/yt_dlp/extractor/videodetective.py +++ b/yt_dlp/extractor/videodetective.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from .internetvideoarchive import InternetVideoArchiveIE diff --git a/yt_dlp/extractor/videofyme.py b/yt_dlp/extractor/videofyme.py index cd3f50a63..1d1c8f7b7 100644 --- a/yt_dlp/extractor/videofyme.py +++ b/yt_dlp/extractor/videofyme.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/videomore.py b/yt_dlp/extractor/videomore.py index 17ef3b1b9..09d12d192 100644 --- a/yt_dlp/extractor/videomore.py +++ b/yt_dlp/extractor/videomore.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/videopress.py b/yt_dlp/extractor/videopress.py index 6376ff096..3c5e27a9d 100644 --- a/yt_dlp/extractor/videopress.py +++ b/yt_dlp/extractor/videopress.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vidio.py b/yt_dlp/extractor/vidio.py index 6bfb8d442..599996bf9 100644 --- a/yt_dlp/extractor/vidio.py +++ b/yt_dlp/extractor/vidio.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( clean_html, diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index a63919ff2..b9845affd 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vidzi.py b/yt_dlp/extractor/vidzi.py deleted file mode 100644 index 42ea4952c..000000000 --- a/yt_dlp/extractor/vidzi.py +++ /dev/null @@ -1,68 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor -from ..utils import ( - decode_packed_codes, - js_to_json, - NO_DEFAULT, - PACKED_CODES_RE, -) - - -class VidziIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' - _TESTS = [{ - 'url': 'http://vidzi.tv/cghql9yq6emu.html', - 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660', - 'info_dict': { - 'id': 'cghql9yq6emu', - 'ext': 'mp4', - 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6รค7โญ', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html', - 'only_matching': True, - }, { - 'url': 'http://vidzi.cc/cghql9yq6emu.html', - 'only_matching': True, - }, { - 'url': 'https://vidzi.si/rph9gztxj1et.html', - 'only_matching': True, - }, { - 'url': 'http://vidzi.nu/cghql9yq6emu.html', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage( - 'http://vidzi.tv/%s' % video_id, video_id) - title = self._html_search_regex( - r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title') - - codes = [webpage] - codes.extend([ - decode_packed_codes(mobj.group(0)).replace('\\\'', '\'') - for mobj in re.finditer(PACKED_CODES_RE, webpage)]) - for num, code in enumerate(codes, 1): - jwplayer_data = self._parse_json( - self._search_regex( - r'setup\(([^)]+)\)', code, 'jwplayer data', - default=NO_DEFAULT if num == len(codes) else '{}'), - video_id, transform_source=lambda s: js_to_json( - re.sub(r'\s*\+\s*window\[.+?\]', '', s))) - if jwplayer_data: - break - - info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False) - info_dict['title'] = title - - return info_dict diff --git a/yt_dlp/extractor/vier.py b/yt_dlp/extractor/vier.py index 94aa350e7..eab894ab6 100644 --- a/yt_dlp/extractor/vier.py +++ b/yt_dlp/extractor/vier.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import itertools diff --git a/yt_dlp/extractor/viewlift.py b/yt_dlp/extractor/viewlift.py index 4627f66fd..d081a2f12 100644 --- a/yt_dlp/extractor/viewlift.py +++ b/yt_dlp/extractor/viewlift.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import json import re diff --git a/yt_dlp/extractor/viidea.py b/yt_dlp/extractor/viidea.py index 0da06818b..157ce4d8f 100644 --- a/yt_dlp/extractor/viidea.py +++ b/yt_dlp/extractor/viidea.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py index 8a930798d..a922b195c 100644 --- a/yt_dlp/extractor/viki.py +++ b/yt_dlp/extractor/viki.py @@ -1,5 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals import hashlib import hmac import json diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 972fb480b..59c5353ab 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import functools import re @@ -8,7 +5,6 @@ import itertools from .common import InfoExtractor from ..compat import ( - compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -109,7 +105,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): vimeo_config = self._search_regex( r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', - webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + webpage, 'vimeo config', *args, **kwargs) if vimeo_config: return self._parse_json(vimeo_config, video_id) @@ -123,7 +119,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): def _parse_config(self, config, video_id): video_data = config['video'] - video_title = video_data['title'] + video_title = video_data.get('title') live_event = video_data.get('live_event') or {} is_live = live_event.get('status') == 'started' request = config.get('request') or {} @@ -1337,7 +1333,7 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): class VimeoWatchLaterIE(VimeoChannelIE): IE_NAME = 'vimeo:watchlater' - IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' + IE_DESC = 'Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication)' _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater' _TITLE = 'Watch Later' _LOGIN_REQUIRED = True diff --git a/yt_dlp/extractor/vimm.py b/yt_dlp/extractor/vimm.py index 060b92ba6..3522b8e33 100644 --- a/yt_dlp/extractor/vimm.py +++ b/yt_dlp/extractor/vimm.py @@ -1,4 +1,3 @@ -# coding: utf-8 from .common import InfoExtractor diff --git a/yt_dlp/extractor/vimple.py b/yt_dlp/extractor/vimple.py index c74b43766..a8b16dd29 100644 --- a/yt_dlp/extractor/vimple.py +++ b/yt_dlp/extractor/vimple.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import int_or_none diff --git a/yt_dlp/extractor/vine.py b/yt_dlp/extractor/vine.py index e59b1037b..bbf43a83f 100644 --- a/yt_dlp/extractor/vine.py +++ b/yt_dlp/extractor/vine.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/viqeo.py b/yt_dlp/extractor/viqeo.py index be7dfa814..d214223e9 100644 --- a/yt_dlp/extractor/viqeo.py +++ b/yt_dlp/extractor/viqeo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py index ba627ca5b..63b6fd3a1 100644 --- a/yt_dlp/extractor/viu.py +++ b/yt_dlp/extractor/viu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json import uuid diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index cbc315961..3b105e6c0 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import collections import re @@ -593,7 +590,6 @@ class VKWallPostIE(VKBaseIE): }], 'params': { 'skip_download': True, - 'usenetrc': True, }, 'skip': 'Requires vk account credentials', }, { @@ -604,9 +600,6 @@ class VKWallPostIE(VKBaseIE): 'title': 'ะกะตัะณะตะน ะะพัะฑัะฝะพะฒ - Wall post 85155021_6319', }, 'playlist_count': 1, - 'params': { - 'usenetrc': True, - }, 'skip': 'Requires vk account credentials', }, { # wall page URL diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index ae35c976c..c60801417 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import json diff --git a/yt_dlp/extractor/vodlocker.py b/yt_dlp/extractor/vodlocker.py index 02c9617d2..1c7236ed3 100644 --- a/yt_dlp/extractor/vodlocker.py +++ b/yt_dlp/extractor/vodlocker.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/vodpl.py b/yt_dlp/extractor/vodpl.py index 9e919708e..8af1572d0 100644 --- a/yt_dlp/extractor/vodpl.py +++ b/yt_dlp/extractor/vodpl.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .onet import OnetBaseIE diff --git a/yt_dlp/extractor/vodplatform.py b/yt_dlp/extractor/vodplatform.py index 74d2257e7..2b45dcd86 100644 --- a/yt_dlp/extractor/vodplatform.py +++ b/yt_dlp/extractor/vodplatform.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import unescapeHTML diff --git a/yt_dlp/extractor/voicerepublic.py b/yt_dlp/extractor/voicerepublic.py index a52e40afa..e8cbd0e32 100644 --- a/yt_dlp/extractor/voicerepublic.py +++ b/yt_dlp/extractor/voicerepublic.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index 37c7d5685..e4570a03a 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index a9b66b95c..7ac38a813 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/voxmedia.py b/yt_dlp/extractor/voxmedia.py index 661208125..a7bf298aa 100644 --- a/yt_dlp/extractor/voxmedia.py +++ b/yt_dlp/extractor/voxmedia.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from .once import OnceIE from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/vrak.py b/yt_dlp/extractor/vrak.py index daa247cce..198c0a294 100644 --- a/yt_dlp/extractor/vrak.py +++ b/yt_dlp/extractor/vrak.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vrt.py b/yt_dlp/extractor/vrt.py index 10dc94abc..26f48bf67 100644 --- a/yt_dlp/extractor/vrt.py +++ b/yt_dlp/extractor/vrt.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( extract_attributes, diff --git a/yt_dlp/extractor/vrv.py b/yt_dlp/extractor/vrv.py index 00e1006c4..35662753e 100644 --- a/yt_dlp/extractor/vrv.py +++ b/yt_dlp/extractor/vrv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import json import hashlib diff --git a/yt_dlp/extractor/vshare.py b/yt_dlp/extractor/vshare.py index b4874ac39..8ef75d30e 100644 --- a/yt_dlp/extractor/vshare.py +++ b/yt_dlp/extractor/vshare.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vtm.py b/yt_dlp/extractor/vtm.py index 093f1aa69..6381fd311 100644 --- a/yt_dlp/extractor/vtm.py +++ b/yt_dlp/extractor/vtm.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/vuclip.py b/yt_dlp/extractor/vuclip.py index 55e087bdb..0e562983d 100644 --- a/yt_dlp/extractor/vuclip.py +++ b/yt_dlp/extractor/vuclip.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vupload.py b/yt_dlp/extractor/vupload.py index b561f63f7..23ea70c77 100644 --- a/yt_dlp/extractor/vupload.py +++ b/yt_dlp/extractor/vupload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/vvvvid.py b/yt_dlp/extractor/vvvvid.py index 3faa90fbd..ccc44d08a 100644 --- a/yt_dlp/extractor/vvvvid.py +++ b/yt_dlp/extractor/vvvvid.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/vyborymos.py b/yt_dlp/extractor/vyborymos.py index 4d93666c5..386518795 100644 --- a/yt_dlp/extractor/vyborymos.py +++ b/yt_dlp/extractor/vyborymos.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str diff --git a/yt_dlp/extractor/vzaar.py b/yt_dlp/extractor/vzaar.py index 54f88bba8..7ce0ba9f5 100644 --- a/yt_dlp/extractor/vzaar.py +++ b/yt_dlp/extractor/vzaar.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wakanim.py b/yt_dlp/extractor/wakanim.py index a70a71961..155008f8c 100644 --- a/yt_dlp/extractor/wakanim.py +++ b/yt_dlp/extractor/wakanim.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from urllib.parse import unquote from .common import InfoExtractor diff --git a/yt_dlp/extractor/walla.py b/yt_dlp/extractor/walla.py index 00f081bca..6b954c5cc 100644 --- a/yt_dlp/extractor/walla.py +++ b/yt_dlp/extractor/walla.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py index 38c10dc62..bf1ad65b2 100644 --- a/yt_dlp/extractor/wasdtv.py +++ b/yt_dlp/extractor/wasdtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 9d6ae2870..7274eaa39 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wat.py b/yt_dlp/extractor/wat.py index 9ff4523db..e6a89adf6 100644 --- a/yt_dlp/extractor/wat.py +++ b/yt_dlp/extractor/wat.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -57,7 +54,7 @@ class WatIE(InfoExtractor): # 'http://www.wat.tv/interface/contentv4s/' + video_id, video_id) video_data = self._download_json( 'https://mediainfo.tf1.fr/mediainfocombo/' + video_id, - video_id, query={'context': 'MYTF1'}) + video_id, query={'context': 'MYTF1', 'pver': '4020003'}) video_info = video_data['media'] error_desc = video_info.get('error_desc') diff --git a/yt_dlp/extractor/watchbox.py b/yt_dlp/extractor/watchbox.py index d19d80102..e41148d4a 100644 --- a/yt_dlp/extractor/watchbox.py +++ b/yt_dlp/extractor/watchbox.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/watchindianporn.py b/yt_dlp/extractor/watchindianporn.py index a86819173..3ded2d1d4 100644 --- a/yt_dlp/extractor/watchindianporn.py +++ b/yt_dlp/extractor/watchindianporn.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index ef58a66c3..d0ad69477 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/webcaster.py b/yt_dlp/extractor/webcaster.py index a858e992c..374fe35cd 100644 --- a/yt_dlp/extractor/webcaster.py +++ b/yt_dlp/extractor/webcaster.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/webofstories.py b/yt_dlp/extractor/webofstories.py index f2b8d19b4..fde9300b0 100644 --- a/yt_dlp/extractor/webofstories.py +++ b/yt_dlp/extractor/webofstories.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index dafa2af3b..d5a52ce20 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor import json diff --git a/yt_dlp/extractor/weiqitv.py b/yt_dlp/extractor/weiqitv.py index 7e0befd39..c9ff64154 100644 --- a/yt_dlp/extractor/weiqitv.py +++ b/yt_dlp/extractor/weiqitv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/whowatch.py b/yt_dlp/extractor/whowatch.py index e4b610d00..21574471c 100644 --- a/yt_dlp/extractor/whowatch.py +++ b/yt_dlp/extractor/whowatch.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py index 4d3d62f95..6c71e9a04 100644 --- a/yt_dlp/extractor/willow.py +++ b/yt_dlp/extractor/willow.py @@ -1,4 +1,3 @@ -# coding: utf-8 from ..utils import ExtractorError from .common import InfoExtractor diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py index ea953bf77..263844d72 100644 --- a/yt_dlp/extractor/wimtv.py +++ b/yt_dlp/extractor/wimtv.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -15,14 +12,14 @@ from ..utils import ( class WimTVIE(InfoExtractor): _player = None _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' - _VALID_URL = r'''(?x) + _VALID_URL = r'''(?x: https?://platform.wim.tv/ (?: (?:embed/)?\? |\#/webtv/.+?/ ) (?P<type>vod|live|cast)[=/] - (?P<id>%s).*?''' % _UUID_RE + (?P<id>%s).*?)''' % _UUID_RE _TESTS = [{ # vod stream 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a', diff --git a/yt_dlp/extractor/wistia.py b/yt_dlp/extractor/wistia.py index a170966c3..3cbcb4aa0 100644 --- a/yt_dlp/extractor/wistia.py +++ b/yt_dlp/extractor/wistia.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -14,7 +12,7 @@ from ..utils import ( class WistiaBaseIE(InfoExtractor): _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})' - _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/' + _VALID_URL_BASE = r'https?://(?:\w+\.)?wistia\.(?:net|com)/(?:embed/)?' _EMBED_BASE_URL = 'http://fast.wistia.com/embed/' def _download_embed_config(self, config_type, config_id, referer): @@ -175,7 +173,7 @@ class WistiaIE(WistiaBaseIE): class WistiaPlaylistIE(WistiaBaseIE): - _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX) + _VALID_URL = r'%splaylists/%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX) _TEST = { 'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc', diff --git a/yt_dlp/extractor/worldstarhiphop.py b/yt_dlp/extractor/worldstarhiphop.py index 82587b4ce..c6948a1eb 100644 --- a/yt_dlp/extractor/worldstarhiphop.py +++ b/yt_dlp/extractor/worldstarhiphop.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py index 3003a0f10..6349e5326 100644 --- a/yt_dlp/extractor/wppilot.py +++ b/yt_dlp/extractor/wppilot.py @@ -1,5 +1,3 @@ -# coding: utf-8 - from .common import InfoExtractor from ..utils import ( try_get, diff --git a/yt_dlp/extractor/wsj.py b/yt_dlp/extractor/wsj.py index 67236f377..8be3645e3 100644 --- a/yt_dlp/extractor/wsj.py +++ b/yt_dlp/extractor/wsj.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/wwe.py b/yt_dlp/extractor/wwe.py index bebc77bb5..9bbd477c3 100644 --- a/yt_dlp/extractor/wwe.py +++ b/yt_dlp/extractor/wwe.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xbef.py b/yt_dlp/extractor/xbef.py index 4c41e98b2..ac69528a3 100644 --- a/yt_dlp/extractor/xbef.py +++ b/yt_dlp/extractor/xbef.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote diff --git a/yt_dlp/extractor/xboxclips.py b/yt_dlp/extractor/xboxclips.py index 9bac982f8..235b567d9 100644 --- a/yt_dlp/extractor/xboxclips.py +++ b/yt_dlp/extractor/xboxclips.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xfileshare.py b/yt_dlp/extractor/xfileshare.py index cd97c77dc..28b6ecb6e 100644 --- a/yt_dlp/extractor/xfileshare.py +++ b/yt_dlp/extractor/xfileshare.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xhamster.py b/yt_dlp/extractor/xhamster.py index 9d4ed47d4..ff15d3707 100644 --- a/yt_dlp/extractor/xhamster.py +++ b/yt_dlp/extractor/xhamster.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xiami.py b/yt_dlp/extractor/xiami.py index 769aab331..71b2956a8 100644 --- a/yt_dlp/extractor/xiami.py +++ b/yt_dlp/extractor/xiami.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_urllib_parse_unquote from ..utils import int_or_none diff --git a/yt_dlp/extractor/ximalaya.py b/yt_dlp/extractor/ximalaya.py index 802d1bb1b..c3447fba0 100644 --- a/yt_dlp/extractor/ximalaya.py +++ b/yt_dlp/extractor/ximalaya.py @@ -1,7 +1,3 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xinpianchang.py b/yt_dlp/extractor/xinpianchang.py index 9832d2398..96e23bb8d 100644 --- a/yt_dlp/extractor/xinpianchang.py +++ b/yt_dlp/extractor/xinpianchang.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( int_or_none, diff --git a/yt_dlp/extractor/xminus.py b/yt_dlp/extractor/xminus.py index 36e5ead1e..5f113810f 100644 --- a/yt_dlp/extractor/xminus.py +++ b/yt_dlp/extractor/xminus.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import time diff --git a/yt_dlp/extractor/xnxx.py b/yt_dlp/extractor/xnxx.py index 27f991627..14beb1347 100644 --- a/yt_dlp/extractor/xnxx.py +++ b/yt_dlp/extractor/xnxx.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xstream.py b/yt_dlp/extractor/xstream.py index 792843df5..42bffb071 100644 --- a/yt_dlp/extractor/xstream.py +++ b/yt_dlp/extractor/xstream.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xtube.py b/yt_dlp/extractor/xtube.py index abd319188..93a6a3f33 100644 --- a/yt_dlp/extractor/xtube.py +++ b/yt_dlp/extractor/xtube.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/xuite.py b/yt_dlp/extractor/xuite.py index 0276c0dbb..52423a327 100644 --- a/yt_dlp/extractor/xuite.py +++ b/yt_dlp/extractor/xuite.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index d5261b6ab..50b939496 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/xxxymovies.py b/yt_dlp/extractor/xxxymovies.py index 0d536015c..e3e3a9fe6 100644 --- a/yt_dlp/extractor/xxxymovies.py +++ b/yt_dlp/extractor/xxxymovies.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( parse_duration, diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 20504de2c..3fe6192bf 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools import re diff --git a/yt_dlp/extractor/yandexdisk.py b/yt_dlp/extractor/yandexdisk.py index c15f3a4f3..d87a7f9be 100644 --- a/yt_dlp/extractor/yandexdisk.py +++ b/yt_dlp/extractor/yandexdisk.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor diff --git a/yt_dlp/extractor/yandexmusic.py b/yt_dlp/extractor/yandexmusic.py index a3558cc12..8ea416a1d 100644 --- a/yt_dlp/extractor/yandexmusic.py +++ b/yt_dlp/extractor/yandexmusic.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import hashlib import itertools diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index 7d3966bf1..37ff514b3 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools import re diff --git a/yt_dlp/extractor/yapfiles.py b/yt_dlp/extractor/yapfiles.py index cfb368de9..8fabdf81c 100644 --- a/yt_dlp/extractor/yapfiles.py +++ b/yt_dlp/extractor/yapfiles.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/yesjapan.py b/yt_dlp/extractor/yesjapan.py index 681338c96..b45fa8f14 100644 --- a/yt_dlp/extractor/yesjapan.py +++ b/yt_dlp/extractor/yesjapan.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( HEADRequest, diff --git a/yt_dlp/extractor/yinyuetai.py b/yt_dlp/extractor/yinyuetai.py index 1fd8d35c6..b28c39380 100644 --- a/yt_dlp/extractor/yinyuetai.py +++ b/yt_dlp/extractor/yinyuetai.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ExtractorError diff --git a/yt_dlp/extractor/ynet.py b/yt_dlp/extractor/ynet.py index c4ae4d88e..444785947 100644 --- a/yt_dlp/extractor/ynet.py +++ b/yt_dlp/extractor/ynet.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re import json diff --git a/yt_dlp/extractor/youjizz.py b/yt_dlp/extractor/youjizz.py index 111623ffe..cd12be500 100644 --- a/yt_dlp/extractor/youjizz.py +++ b/yt_dlp/extractor/youjizz.py @@ -1,6 +1,3 @@ -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( determine_ext, diff --git a/yt_dlp/extractor/youku.py b/yt_dlp/extractor/youku.py index b50579915..45856fbbe 100644 --- a/yt_dlp/extractor/youku.py +++ b/yt_dlp/extractor/youku.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import random import re import string diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index 583aea38d..76d89f3ce 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import itertools from .common import InfoExtractor diff --git a/yt_dlp/extractor/youporn.py b/yt_dlp/extractor/youporn.py index 5feb568e7..5aea82295 100644 --- a/yt_dlp/extractor/youporn.py +++ b/yt_dlp/extractor/youporn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/yourporn.py b/yt_dlp/extractor/yourporn.py index 98347491e..38f42a991 100644 --- a/yt_dlp/extractor/yourporn.py +++ b/yt_dlp/extractor/yourporn.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - from .common import InfoExtractor from ..compat import compat_str from ..utils import ( diff --git a/yt_dlp/extractor/yourupload.py b/yt_dlp/extractor/yourupload.py index 9fa772838..def63293a 100644 --- a/yt_dlp/extractor/yourupload.py +++ b/yt_dlp/extractor/yourupload.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import urljoin diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index f284487b8..5546aa9a3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1,7 +1,4 @@ -# coding: utf-8 - -from __future__ import unicode_literals - +import base64 import calendar import copy import datetime @@ -14,9 +11,9 @@ import os.path import random import re import sys +import threading import time import traceback -import threading from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -31,12 +28,14 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + NO_DEFAULT, + ExtractorError, bug_reports_message, + classproperty, clean_html, datetime_from_str, dict_get, error_to_compat_str, - ExtractorError, float_or_none, format_field, get_first, @@ -46,7 +45,6 @@ from ..utils import ( js_to_json, mimetype2ext, network_exceptions, - NO_DEFAULT, orderedSet, parse_codecs, parse_count, @@ -72,7 +70,6 @@ from ..utils import ( variadic, ) - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -292,7 +289,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', r'(?:(?:www|dev)\.)?invidio\.us', - # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md + # Invidious instances taken from https://github.com/iv-org/documentation/blob/master/docs/instances.md r'(?:www\.)?invidious\.pussthecat\.org', r'(?:www\.)?invidious\.zee\.li', r'(?:www\.)?invidious\.ethibox\.fr', @@ -352,6 +349,16 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion', r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion', r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', + # piped instances from https://github.com/TeamPiped/Piped/wiki/Instances + r'(?:www\.)?piped\.kavin\.rocks', + r'(?:www\.)?piped\.silkky\.cloud', + r'(?:www\.)?piped\.tokhmi\.xyz', + r'(?:www\.)?piped\.moomoo\.me', + r'(?:www\.)?il\.ax', + r'(?:www\.)?piped\.syncpundit\.com', + r'(?:www\.)?piped\.mha\.fi', + r'(?:www\.)?piped\.mint\.lgbt', + r'(?:www\.)?piped\.privacy\.com\.de', ) def _initialize_consent(self): @@ -387,9 +394,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): self._check_login_required() def _check_login_required(self): - if (self._LOGIN_REQUIRED - and self.get_param('cookiefile') is None - and self.get_param('cookiesfrombrowser') is None): + if self._LOGIN_REQUIRED and not self._cookies_passed: self.raise_login_required('Login details are needed to download this content', method='cookies') _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' @@ -452,7 +457,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return None # SAPISIDHASH algorithm from https://stackoverflow.com/a/32065323 sapisidhash = hashlib.sha1( - f'{time_now} {self._SAPISID} {origin}'.encode('utf-8')).hexdigest() + f'{time_now} {self._SAPISID} {origin}'.encode()).hexdigest() return f'SAPISIDHASH {time_now}_{sapisidhash}' def _call_api(self, ep, query, video_id, fatal=True, headers=None, @@ -466,14 +471,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if headers: real_headers.update(headers) return self._download_json( - 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep), + f'https://{api_hostname or self._get_innertube_host(default_client)}/youtubei/v1/{ep}', video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): data = self._search_regex( - (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE), + (fr'{self._YT_INITIAL_DATA_RE}\s*{self._YT_INITIAL_BOUNDARY_RE}', self._YT_INITIAL_DATA_RE), webpage, 'yt initial data', fatal=fatal) if data: return self._parse_json(data, item_id, fatal=fatal) @@ -657,7 +662,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): warnings.append([alert_type, alert_message]) for alert_type, alert_message in (warnings + errors[:-1]): - self.report_warning('YouTube said: %s - %s' % (alert_type, alert_message), only_once=only_once) + self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) @@ -2204,7 +2209,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:2ef1d002cad520f65825346e2084e49d', }, 'params': {'skip_download': True} - }, + }, { + # Story. Requires specific player params to work. + # Note: stories get removed after some period of time + 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA', + 'info_dict': { + 'id': 'yN3x1t3sieA', + 'ext': 'mp4', + 'uploader': 'Linus Tech Tips', + 'duration': 13, + 'channel': 'Linus Tech Tips', + 'playable_in_embed': True, + 'tags': [], + 'age_limit': 0, + 'uploader_url': 'http://www.youtube.com/user/LinusTechTips', + 'upload_date': '20220402', + 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp', + 'title': 'Story', + 'live_status': 'not_live', + 'uploader_id': 'LinusTechTips', + 'view_count': int, + 'description': '', + 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw', + 'categories': ['Science & Technology'], + 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw', + 'availability': 'unlisted', + } + } ] @classmethod @@ -2214,10 +2245,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): qs = parse_qs(url) if qs.get('list', [None])[0]: return False - return super(YoutubeIE, cls).suitable(url) + return super().suitable(url) def __init__(self, *args, **kwargs): - super(YoutubeIE, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self._code_cache = {} self._player_cache = {} @@ -2413,8 +2444,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_id = self._extract_player_info(player_url) # Read from filesystem cache - func_id = 'js_%s_%s' % ( - player_id, self._signature_cache_id(example_sig)) + func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}' assert os.path.basename(func_id) == func_id cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id) @@ -2441,7 +2471,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): starts = '' if start == 0 else str(start) ends = (':%d' % (end + step)) if end + step >= 0 else ':' steps = '' if step == 1 else (':%d' % step) - return 's[%s%s%s]' % (starts, ends, steps) + return f's[{starts}{ends}{steps}]' step = None # Quelch pyflakes warnings - start will be set when step is set @@ -2603,7 +2633,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) qs.update({ 'ver': ['2'], @@ -2714,7 +2744,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_yt_initial_variable(self, webpage, regex, video_id, name): return self._parse_json(self._search_regex( - (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', regex), webpage, name, default='{}'), video_id, fatal=False) def _extract_comment(self, comment_renderer, parent=None): @@ -2812,8 +2842,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): comment_entries_iter = self._comment_entries( comment_replies_renderer, ytcfg, video_id, parent=comment.get('id'), tracker=tracker) - for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))): - yield reply_comment + yield from itertools.islice(comment_entries_iter, min( + max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))) # Keeps track of counts across recursive calls if not tracker: @@ -2837,12 +2867,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) continuation = self._extract_continuation(root_continuation_data) - message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) - if message and not parent: - self.report_warning(message, video_id=video_id) response = None + is_forced_continuation = False is_first_continuation = parent is None + if is_first_continuation and not continuation: + # Sometimes you can get comments by generating the continuation yourself, + # even if YouTube initially reports them being disabled - e.g. stories comments. + # Note: if the comment section is actually disabled, YouTube may return a response with + # required check_get_keys missing. So we will disable that check initially in this case. + continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id)) + is_forced_continuation = True for page_num in itertools.count(0): if not continuation: @@ -2863,8 +2898,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): response = self._extract_response( item_id=None, query=continuation, ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, - check_get_keys='onResponseReceivedEndpoints') - + check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None) + is_forced_continuation = False continuation_contents = traverse_obj( response, 'onResponseReceivedEndpoints', expected_type=list, default=[]) @@ -2889,6 +2924,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if continuation: break + message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) + if message and not parent and tracker['running_total'] == 0: + self.report_warning(f'Youtube said: {message}', video_id=video_id, only_once=True) + + @staticmethod + def _generate_comment_continuation(video_id): + """ + Generates initial comment section continuation token from given video id + """ + token = f'\x12\r\x12\x0b{video_id}\x18\x062\'"\x11"\x0b{video_id}0\x00x\x020\x00B\x10comments-section' + return base64.b64encode(token.encode()).decode() + def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): @@ -2942,7 +2989,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers = self.generate_api_headers( ytcfg=player_ytcfg, account_syncid=syncid, session_index=session_index, default_client=client) - yt_query = {'videoId': video_id} + yt_query = { + 'videoId': video_id, + 'params': '8AEB' # enable stories + } yt_query.update(self._generate_player_context(sts)) return self._extract_response( item_id=video_id, ep='player', query=yt_query, @@ -2955,7 +3005,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): requested_clients = [] default = ['android', 'web'] allowed_clients = sorted( - [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], + (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client in allowed_clients: @@ -3113,7 +3163,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) except ExtractorError as e: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n' + 'nsig extraction failed: You may experience throttling for some formats\n' f'n = {query["n"][0]} ; player = {player_url}\n{e}', only_once=True) throttled = True @@ -3132,7 +3182,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Eg: __2ABJjxzNo, ySuUZEjARPY is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) if is_damaged: - self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) + self.report_warning( + f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3142,7 +3193,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), - 'source_preference': -10 if throttled else -1, + # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 + 'source_preference': -10 if throttled else -5 if itag == '22' else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, 'quality': q(quality), @@ -3180,6 +3232,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): skip_manifests = self._configuration_arg('skip') if not self.get_param('youtube_include_hls_manifest', True): skip_manifests.append('hls') + if not self.get_param('youtube_include_dash_manifest', True): + skip_manifests.append('dash') get_dash = 'dash' not in skip_manifests and ( not is_live or live_from_start or self._configuration_arg('include_live_dash')) get_hls = not live_from_start and 'hls' not in skip_manifests @@ -3257,7 +3311,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): webpage = None if 'webpage' not in self._configuration_arg('player_skip'): webpage = self._download_webpage( - webpage_url + '&bpctr=9999999999&has_verified=1', video_id, fatal=False) + webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False) master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg() @@ -3408,13 +3462,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): original_thumbnails = thumbnails.copy() # The best resolution thumbnails sometimes does not appear in the webpage - # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 + # See: https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029> thumbnail_names = [ - 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', - 'hqdefault', 'hq1', 'hq2', 'hq3', '0', - 'mqdefault', 'mq1', 'mq2', 'mq3', - 'default', '1', '2', '3' + # While the *1,*2,*3 thumbnails are just below their correspnding "*default" variants + # in resolution, these are not the custom thumbnail. So de-prioritize them + 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', + 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3' ] n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ @@ -3592,17 +3646,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor): headers=self.generate_api_headers(ytcfg=master_ytcfg), note='Downloading initial data API JSON') - try: - # This will error if there is no livechat + try: # This will error if there is no livechat initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'] + except (KeyError, IndexError, TypeError): + pass + else: info.setdefault('subtitles', {})['live_chat'] = [{ - 'url': 'https://www.youtube.com/watch?v=%s' % video_id, # url is needed to set cookies + 'url': f'https://www.youtube.com/watch?v={video_id}', # url is needed to set cookies 'video_id': video_id, 'ext': 'json', 'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay', }] - except (KeyError, IndexError, TypeError): - pass if initial_data: info['chapters'] = ( @@ -3624,7 +3678,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': info['location'] = stl else: - mobj = re.search(r'(.+?)\s*S(\d+)\s*โข\s*E(\d+)', stl) + mobj = re.search(r'(.+?)\s*S(\d+)\s*โข?\s*E(\d+)', stl) if mobj: info.update({ 'series': mobj.group(1), @@ -3702,7 +3756,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): unified_strdate(get_first(microformats, 'uploadDate')) or unified_strdate(search_meta('uploadDate'))) if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'): - upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') + upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date info['upload_date'] = upload_date for to, frm in fallbacks.items(): @@ -3865,8 +3919,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): # TODO: add support for nested playlists so each shelf is processed # as separate playlist # TODO: this includes only first N items - for entry in self._grid_entries(renderer): - yield entry + yield from self._grid_entries(renderer) renderer = content.get('horizontalListRenderer') if renderer: # TODO @@ -3886,8 +3939,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): title = self._get_text(shelf_renderer, 'title') yield self.url_result(shelf_url, video_title=title) # Shelf may not contain shelf URL, fallback to extraction from content - for entry in self._shelf_entries_from_content(shelf_renderer): - yield entry + yield from self._shelf_entries_from_content(shelf_renderer) def _playlist_entries(self, video_list_renderer): for content in video_list_renderer['contents']: @@ -3965,8 +4017,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): renderer = content.get('backstagePostThreadRenderer') if not isinstance(renderer, dict): continue - for entry in self._post_thread_entries(renderer): - yield entry + yield from self._post_thread_entries(renderer) r''' # unused def _rich_grid_entries(self, contents): @@ -4036,8 +4087,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): parent_renderer = ( try_get(tab_content, lambda x: x['sectionListRenderer'], dict) or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) - for entry in extract_entries(parent_renderer): - yield entry + yield from extract_entries(parent_renderer) continuation = continuation_list[0] for page_num in itertools.count(1): @@ -4046,7 +4096,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) response = self._extract_response( - item_id='%s page %s' % (item_id, page_num), + item_id=f'{item_id} page {page_num}', query=continuation, headers=headers, ytcfg=ytcfg, check_get_keys=('continuationContents', 'onResponseReceivedActions', 'onResponseReceivedEndpoints')) @@ -4070,8 +4120,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue continuation_renderer = value continuation_list = [None] - for entry in known_continuation_renderers[key](continuation_renderer): - yield entry + yield from known_continuation_renderers[key](continuation_renderer) continuation = continuation_list[0] or self._extract_continuation(continuation_renderer) break if continuation_renderer: @@ -4097,8 +4146,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): continue video_items_renderer = {known_renderers[key][1]: continuation_items} continuation_list = [None] - for entry in known_renderers[key][0](video_items_renderer): - yield entry + yield from known_renderers[key][0](video_items_renderer) continuation = continuation_list[0] or self._extract_continuation(video_items_renderer) break if video_items_renderer: @@ -4223,7 +4271,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): self._extract_visitor_data(data, ytcfg)), **metadata) - def _extract_mix_playlist(self, playlist, playlist_id, data, ytcfg): + def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg): first_id = last_id = response = None for page_num in itertools.count(1): videos = list(self._playlist_entries(playlist)) @@ -4232,11 +4280,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): start = next((i for i, v in enumerate(videos) if v['id'] == last_id), -1) + 1 if start >= len(videos): return - for video in videos[start:]: - if video['id'] == first_id: - self.to_screen('First video %s found again; Assuming end of Mix' % first_id) - return - yield video + yield from videos[start:] first_id = first_id or videos[0]['id'] last_id = videos[-1]['id'] watch_endpoint = try_get( @@ -4267,13 +4311,18 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): playlist_url = urljoin(url, try_get( playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], compat_str)) - if playlist_url and playlist_url != url: + + # Some playlists are unviewable but YouTube still provides a link to the (broken) playlist page [1] + # [1] MLCT, RLTDwFCb4jeqaKWnciAYM-ZVHg + is_known_unviewable = re.fullmatch(r'MLCT|RLTD[\w-]{22}', playlist_id) + + if playlist_url and playlist_url != url and not is_known_unviewable: return self.url_result( playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) return self.playlist_result( - self._extract_mix_playlist(playlist, playlist_id, data, ytcfg), + self._extract_inline_playlist(playlist, playlist_id, data, ytcfg), playlist_id=playlist_id, playlist_title=title) def _extract_availability(self, data): @@ -4470,7 +4519,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ('continuationContents', ), ) display_id = f'query "{query}"' - check_get_keys = tuple(set(keys[0] for keys in content_keys)) + check_get_keys = tuple({keys[0] for keys in content_keys}) ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} self._report_playlist_authcheck(ytcfg, fatal=False) @@ -5180,8 +5229,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): @classmethod def suitable(cls, url): - return False if YoutubeIE.suitable(url) else super( - YoutubeTabIE, cls).suitable(url) + return False if YoutubeIE.suitable(url) else super().suitable(url) _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$') @@ -5228,7 +5276,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # Handle both video/playlist URLs qs = parse_qs(url) - video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')] + video_id, playlist_id = (qs.get(key, [None])[0] for key in ('v', 'list')) if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: @@ -5414,7 +5462,7 @@ class YoutubePlaylistIE(InfoExtractor): qs = parse_qs(url) if qs.get('v', [None])[0]: return False - return super(YoutubePlaylistIE, cls).suitable(url) + return super().suitable(url) def _real_extract(self, url): playlist_id = self._match_id(url) @@ -5741,16 +5789,17 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): class YoutubeFeedsInfoExtractor(InfoExtractor): """ Base class for feed extractors - Subclasses must define the _FEED_NAME property. + Subclasses must re-define the _FEED_NAME property. """ _LOGIN_REQUIRED = True + _FEED_NAME = 'feeds' def _real_initialize(self): YoutubeBaseInfoExtractor._check_login_required(self) - @property + @classproperty def IE_NAME(self): - return 'youtube:%s' % self._FEED_NAME + return f'youtube:{self._FEED_NAME}' def _real_extract(self, url): return self.url_result( @@ -5811,6 +5860,22 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): }] +class YoutubeStoriesIE(InfoExtractor): + IE_DESC = 'YouTube channel stories; "ytstories:" prefix' + IE_NAME = 'youtube:stories' + _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$' + _TESTS = [{ + 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = f'RLTD{self._match_id(url)}' + return self.url_result( + f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', + ie=YoutubeTabIE, video_id=playlist_id) + + class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list @@ -5883,5 +5948,5 @@ class YoutubeTruncatedIDIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) raise ExtractorError( - 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url), + f'Incomplete YouTube ID {video_id}. URL {url} looks truncated.', expected=True) diff --git a/yt_dlp/extractor/zapiks.py b/yt_dlp/extractor/zapiks.py index 161b011ab..a1546fd88 100644 --- a/yt_dlp/extractor/zapiks.py +++ b/yt_dlp/extractor/zapiks.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 8614ca23d..16f827a7e 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from uuid import uuid4 diff --git a/yt_dlp/extractor/zdf.py b/yt_dlp/extractor/zdf.py index 5f4d26622..a388ff562 100644 --- a/yt_dlp/extractor/zdf.py +++ b/yt_dlp/extractor/zdf.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 9e411d83f..9ff36052e 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import json from .common import InfoExtractor @@ -96,14 +93,14 @@ class Zee5IE(InfoExtractor): def _perform_login(self, username, password): if len(username) == 10 and username.isdigit() and self._USER_TOKEN is None: self.report_login() - otp_request_json = self._download_json('https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{}'.format(username), + otp_request_json = self._download_json(f'https://b2bapi.zee5.com/device/sendotp_v1.php?phoneno=91{username}', None, note='Sending OTP') if otp_request_json['code'] == 0: self.to_screen(otp_request_json['message']) else: raise ExtractorError(otp_request_json['message'], expected=True) otp_code = self._get_tfa_info('OTP') - otp_verify_json = self._download_json('https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{}&otp={}&guest_token={}&platform=web'.format(username, otp_code, self._DEVICE_ID), + otp_verify_json = self._download_json(f'https://b2bapi.zee5.com/device/verifyotp_v1.php?phoneno=91{username}&otp={otp_code}&guest_token={self._DEVICE_ID}&platform=web', None, note='Verifying OTP', fatal=False) if not otp_verify_json: raise ExtractorError('Unable to verify OTP.', expected=True) @@ -227,13 +224,13 @@ class Zee5SeriesIE(InfoExtractor): 'X-Access-Token': access_token_request['token'], 'Referer': 'https://www.zee5.com/', } - show_url = 'https://gwapi.zee5.com/content/tvshow/{}?translation=en&country=IN'.format(show_id) + show_url = f'https://gwapi.zee5.com/content/tvshow/{show_id}?translation=en&country=IN' page_num = 0 show_json = self._download_json(show_url, video_id=show_id, headers=headers) for season in show_json.get('seasons') or []: season_id = try_get(season, lambda x: x['id'], compat_str) - next_url = 'https://gwapi.zee5.com/content/tvshow/?season_id={}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100'.format(season_id) + next_url = f'https://gwapi.zee5.com/content/tvshow/?season_id={season_id}&type=episode&translation=en&country=IN&on_air=false&asset_subtype=tvshow&page=1&limit=100' while next_url: page_num += 1 episodes_json = self._download_json( diff --git a/yt_dlp/extractor/zhihu.py b/yt_dlp/extractor/zhihu.py index 278a9438e..70eb3ccd1 100644 --- a/yt_dlp/extractor/zhihu.py +++ b/yt_dlp/extractor/zhihu.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import format_field, float_or_none, int_or_none diff --git a/yt_dlp/extractor/zingmp3.py b/yt_dlp/extractor/zingmp3.py index 419bf30d8..8b2d842ff 100644 --- a/yt_dlp/extractor/zingmp3.py +++ b/yt_dlp/extractor/zingmp3.py @@ -1,131 +1,77 @@ -# coding: utf-8 -from __future__ import unicode_literals - +import functools import hashlib import hmac +import json import urllib.parse from .common import InfoExtractor from ..utils import ( + OnDemandPagedList, int_or_none, traverse_obj, + urljoin, ) class ZingMp3BaseIE(InfoExtractor): - _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/]+/(?P<id>\w+)(?:\.html|\?)' + _VALID_URL_TMPL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>(?:%s))/[^/?#]+/(?P<id>\w+)(?:\.html|\?)' _GEO_COUNTRIES = ['VN'] _DOMAIN = 'https://zingmp3.vn' - _SLUG_API = { + _PER_PAGE = 50 + _API_SLUGS = { + # Audio/video 'bai-hat': '/api/v2/page/get/song', 'embed': '/api/v2/page/get/song', 'video-clip': '/api/v2/page/get/video', + 'lyric': '/api/v2/lyric/get/lyric', + 'song-streaming': '/api/v2/song/get/streaming', + # Playlist 'playlist': '/api/v2/page/get/playlist', 'album': '/api/v2/page/get/playlist', - 'lyric': '/api/v2/lyric/get/lyric', - 'song_streaming': '/api/v2/song/get/streaming', + # Chart + 'zing-chart': '/api/v2/page/get/chart-home', + 'zing-chart-tuan': '/api/v2/page/get/week-chart', + 'moi-phat-hanh': '/api/v2/page/get/newrelease-chart', + 'the-loai-video': '/api/v2/video/get/list', + # User + 'info-artist': '/api/v2/page/get/artist', + 'user-list-song': '/api/v2/song/get/list', + 'user-list-video': '/api/v2/video/get/list', } - _API_KEY = '88265e23d4284f25963e6eedac8fbfa3' - _SECRET_KEY = b'2aa2d1c561e809b267f3638c4a307aab' - - def _extract_item(self, item, song_id, type_url, fatal): - item_id = item.get('encodeId') or song_id - title = item.get('title') or item.get('alias') - - if type_url == 'video-clip': - source = item.get('streaming') - else: - api = self.get_api_with_signature(name_api=self._SLUG_API.get('song_streaming'), param={'id': item_id}) - source = self._download_json(api, video_id=item_id).get('data') - - formats = [] - for k, v in (source or {}).items(): - if not v: - continue - if k in ('mp4', 'hls'): - for res, video_url in v.items(): - if not video_url: - continue - if k == 'hls': - formats.extend(self._extract_m3u8_formats( - video_url, item_id, 'mp4', - 'm3u8_native', m3u8_id=k, fatal=False)) - elif k == 'mp4': - formats.append({ - 'format_id': 'mp4-' + res, - 'url': video_url, - 'height': int_or_none(self._search_regex( - r'^(\d+)p', res, 'resolution', default=None)), - }) - continue - elif v == 'VIP': - continue - formats.append({ - 'ext': 'mp3', - 'format_id': k, - 'tbr': int_or_none(k), - 'url': self._proto_relative_url(v), - 'vcodec': 'none', - }) - if not formats: - if not fatal: - return - msg = item.get('msg') - if msg == 'Sorry, this content is not available in your country.': - self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) - self.raise_no_formats(msg, expected=True) - self._sort_formats(formats) - - lyric = item.get('lyric') - if not lyric: - api = self.get_api_with_signature(name_api=self._SLUG_API.get("lyric"), param={'id': item_id}) - info_lyric = self._download_json(api, video_id=item_id) - lyric = traverse_obj(info_lyric, ('data', 'file')) - subtitles = { - 'origin': [{ - 'url': lyric, - }], - } if lyric else None - - album = item.get('album') or {} - - return { - 'id': item_id, - 'title': title, - 'formats': formats, - 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), - 'subtitles': subtitles, - 'duration': int_or_none(item.get('duration')), - 'track': title, - 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), - 'album': traverse_obj(album, 'name', 'title'), - 'album_artist': traverse_obj(album, 'artistsNames', 'artists_names'), + def _api_url(self, url_type, params): + api_slug = self._API_SLUGS[url_type] + params.update({'ctime': '1'}) + sha256 = hashlib.sha256( + ''.join(f'{k}={v}' for k, v in sorted(params.items())).encode()).hexdigest() + data = { + **params, + 'apiKey': '88265e23d4284f25963e6eedac8fbfa3', + 'sig': hmac.new( + b'2aa2d1c561e809b267f3638c4a307aab', f'{api_slug}{sha256}'.encode(), hashlib.sha512).hexdigest(), } + return f'{self._DOMAIN}{api_slug}?{urllib.parse.urlencode(data)}' - def _real_initialize(self): - if not self.get_param('cookiefile') and not self.get_param('cookiesfrombrowser'): - self._request_webpage(self.get_api_with_signature(name_api=self._SLUG_API['bai-hat'], param={'id': ''}), - None, note='Updating cookies') + def _call_api(self, url_type, params, display_id=None, **kwargs): + resp = self._download_json( + self._api_url(url_type, params), display_id or params.get('id'), + note=f'Downloading {url_type} JSON metadata', **kwargs) + return (resp or {}).get('data') or {} - def _real_extract(self, url): - song_id, type_url = self._match_valid_url(url).group('id', 'type') - api = self.get_api_with_signature(name_api=self._SLUG_API[type_url], param={'id': song_id}) - return self._process_data(self._download_json(api, song_id)['data'], song_id, type_url) + def _real_initialize(self): + if not self._cookies_passed: + self._request_webpage( + self._api_url('bai-hat', {'id': ''}), None, note='Updating cookies') - def get_api_with_signature(self, name_api, param): - param.update({'ctime': '1'}) - sha256 = hashlib.sha256(''.join(f'{i}={param[i]}' for i in sorted(param)).encode('utf-8')).hexdigest() - data = { - 'apiKey': self._API_KEY, - 'sig': hmac.new(self._SECRET_KEY, f'{name_api}{sha256}'.encode('utf-8'), hashlib.sha512).hexdigest(), - **param, - } - return f'{self._DOMAIN}{name_api}?{urllib.parse.urlencode(data)}' + def _parse_items(self, items): + for url in traverse_obj(items, (..., 'link')) or []: + yield self.url_result(urljoin(self._DOMAIN, url)) class ZingMp3IE(ZingMp3BaseIE): _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'bai-hat|video-clip|embed' + IE_NAME = 'zingmp3' + IE_DESC = 'zingmp3.vn' _TESTS = [{ 'url': 'https://mp3.zing.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'md5': 'ead7ae13693b3205cbc89536a077daed', @@ -147,7 +93,7 @@ class ZingMp3IE(ZingMp3BaseIE): }, }, { 'url': 'https://zingmp3.vn/video-clip/Suong-Hoa-Dua-Loi-K-ICM-RYO/ZO8ZF7C7.html', - 'md5': 'c7f23d971ac1a4f675456ed13c9b9612', + 'md5': '3c2081e79471a2f4a3edd90b70b185ea', 'info_dict': { 'id': 'ZO8ZF7C7', 'title': 'Sฦฐฦกng Hoa ฤฦฐa Lแปi', @@ -180,11 +126,64 @@ class ZingMp3IE(ZingMp3BaseIE): 'url': 'https://zingmp3.vn/bai-hat/Xa-Mai-Xa-Bao-Thy/ZWZB9WAB.html', 'only_matching': True, }] - IE_NAME = 'zingmp3' - IE_DESC = 'zingmp3.vn' - def _process_data(self, data, song_id, type_url): - return self._extract_item(data, song_id, type_url, True) + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + item = self._call_api(url_type, {'id': song_id}) + + item_id = item.get('encodeId') or song_id + if url_type == 'video-clip': + source = item.get('streaming') + source['mp4'] = self._download_json( + 'http://api.mp3.zing.vn/api/mobile/video/getvideoinfo', item_id, + query={'requestdata': json.dumps({'id': item_id})}, + note='Downloading mp4 JSON metadata').get('source') + else: + source = self._call_api('song-streaming', {'id': item_id}) + + formats = [] + for k, v in (source or {}).items(): + if not v or v == 'VIP': + continue + if k not in ('mp4', 'hls'): + formats.append({ + 'ext': 'mp3', + 'format_id': k, + 'tbr': int_or_none(k), + 'url': self._proto_relative_url(v), + 'vcodec': 'none', + }) + continue + for res, video_url in v.items(): + if not video_url: + continue + if k == 'hls': + formats.extend(self._extract_m3u8_formats(video_url, item_id, 'mp4', m3u8_id=k, fatal=False)) + continue + formats.append({ + 'format_id': f'mp4-{res}', + 'url': video_url, + 'height': int_or_none(res), + }) + + if not formats and item.get('msg') == 'Sorry, this content is not available in your country.': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + self._sort_formats(formats) + + lyric = item.get('lyric') or self._call_api('lyric', {'id': item_id}, fatal=False).get('file') + + return { + 'id': item_id, + 'title': traverse_obj(item, 'title', 'alias'), + 'thumbnail': traverse_obj(item, 'thumbnail', 'thumbnailM'), + 'duration': int_or_none(item.get('duration')), + 'track': traverse_obj(item, 'title', 'alias'), + 'artist': traverse_obj(item, 'artistsNames', 'artists_names'), + 'album': traverse_obj(item, ('album', ('name', 'title')), get_all=False), + 'album_artist': traverse_obj(item, ('album', ('artistsNames', 'artists_names')), get_all=False), + 'formats': formats, + 'subtitles': {'origin': [{'url': lyric}]} if lyric else None, + } class ZingMp3AlbumIE(ZingMp3BaseIE): @@ -192,19 +191,17 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): _TESTS = [{ 'url': 'http://mp3.zing.vn/album/Lau-Dai-Tinh-Ai-Bang-Kieu-Minh-Tuyet/ZWZBWDAF.html', 'info_dict': { - '_type': 'playlist', 'id': 'ZWZBWDAF', 'title': 'Lรขu ฤร i Tรฌnh รi', }, - 'playlist_count': 9, + 'playlist_mincount': 9, }, { 'url': 'https://zingmp3.vn/album/Nhung-Bai-Hat-Hay-Nhat-Cua-Mr-Siro-Mr-Siro/ZWZAEZZD.html', 'info_dict': { - '_type': 'playlist', 'id': 'ZWZAEZZD', 'title': 'Nhแปฏng Bร i Hรกt Hay Nhแบฅt Cแปงa Mr. Siro', }, - 'playlist_count': 49, + 'playlist_mincount': 49, }, { 'url': 'http://mp3.zing.vn/playlist/Duong-Hong-Loan-apollobee/IWCAACCB.html', 'only_matching': True, @@ -214,12 +211,176 @@ class ZingMp3AlbumIE(ZingMp3BaseIE): }] IE_NAME = 'zingmp3:album' - def _process_data(self, data, song_id, type_url): - def entries(): - for item in traverse_obj(data, ('song', 'items')) or []: - entry = self._extract_item(item, song_id, type_url, False) - if entry: - yield entry + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + data = self._call_api(url_type, {'id': song_id}) + return self.playlist_result( + self._parse_items(traverse_obj(data, ('song', 'items'))), + traverse_obj(data, 'id', 'encodeId'), traverse_obj(data, 'name', 'title')) + + +class ZingMp3ChartHomeIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<id>(?:zing-chart|moi-phat-hanh))/?(?:[#?]|$)' + _TESTS = [{ + 'url': 'https://zingmp3.vn/zing-chart', + 'info_dict': { + 'id': 'zing-chart', + }, + 'playlist_mincount': 100, + }, { + 'url': 'https://zingmp3.vn/moi-phat-hanh', + 'info_dict': { + 'id': 'moi-phat-hanh', + }, + 'playlist_mincount': 100, + }] + IE_NAME = 'zingmp3:chart-home' + + def _real_extract(self, url): + url_type = self._match_id(url) + data = self._call_api(url_type, {'id': url_type}) + items = traverse_obj(data, ('RTChart', 'items') if url_type == 'zing-chart' else 'items') + return self.playlist_result(self._parse_items(items), url_type) + + +class ZingMp3WeekChartIE(ZingMp3BaseIE): + _VALID_URL = ZingMp3BaseIE._VALID_URL_TMPL % 'zing-chart-tuan' + IE_NAME = 'zingmp3:week-chart' + _TESTS = [{ + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-Viet-Nam/IWZ9Z08I.html', + 'info_dict': { + 'id': 'IWZ9Z08I', + 'title': 'zing-chart-vn', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-US-UK/IWZ9Z0BW.html', + 'info_dict': { + 'id': 'IWZ9Z0BW', + 'title': 'zing-chart-us', + }, + 'playlist_mincount': 10, + }, { + 'url': 'https://zingmp3.vn/zing-chart-tuan/Bai-hat-KPop/IWZ9Z0BO.html', + 'info_dict': { + 'id': 'IWZ9Z0BO', + 'title': 'zing-chart-korea', + }, + 'playlist_mincount': 10, + }] + + def _real_extract(self, url): + song_id, url_type = self._match_valid_url(url).group('id', 'type') + data = self._call_api(url_type, {'id': song_id}) + return self.playlist_result( + self._parse_items(data['items']), song_id, f'zing-chart-{data.get("country", "")}') + - return self.playlist_result(entries(), traverse_obj(data, 'id', 'encodeId'), - traverse_obj(data, 'name', 'title')) +class ZingMp3ChartMusicVideoIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<type>the-loai-video)/(?P<regions>[^/]+)/(?P<id>[^\.]+)' + IE_NAME = 'zingmp3:chart-music-video' + _TESTS = [{ + 'url': 'https://zingmp3.vn/the-loai-video/Viet-Nam/IWZ9Z08I.html', + 'info_dict': { + 'id': 'IWZ9Z08I', + 'title': 'the-loai-video_Viet-Nam', + }, + 'playlist_mincount': 400, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Au-My/IWZ9Z08O.html', + 'info_dict': { + 'id': 'IWZ9Z08O', + 'title': 'the-loai-video_Au-My', + }, + 'playlist_mincount': 40, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Han-Quoc/IWZ9Z08W.html', + 'info_dict': { + 'id': 'IWZ9Z08W', + 'title': 'the-loai-video_Han-Quoc', + }, + 'playlist_mincount': 30, + }, { + 'url': 'https://zingmp3.vn/the-loai-video/Khong-Loi/IWZ9Z086.html', + 'info_dict': { + 'id': 'IWZ9Z086', + 'title': 'the-loai-video_Khong-Loi', + }, + 'playlist_mincount': 10, + }] + + def _fetch_page(self, song_id, url_type, page): + return self._parse_items(self._call_api(url_type, { + 'id': song_id, + 'type': 'genre', + 'page': page + 1, + 'count': self._PER_PAGE + }).get('items')) + + def _real_extract(self, url): + song_id, regions, url_type = self._match_valid_url(url).group('id', 'regions', 'type') + return self.playlist_result( + OnDemandPagedList(functools.partial(self._fetch_page, song_id, url_type), self._PER_PAGE), + song_id, f'{url_type}_{regions}') + + +class ZingMp3UserIE(ZingMp3BaseIE): + _VALID_URL = r'https?://(?:mp3\.zing|zingmp3)\.vn/(?P<user>[^/]+)/(?P<type>bai-hat|single|album|video)/?(?:[?#]|$)' + IE_NAME = 'zingmp3:user' + _TESTS = [{ + 'url': 'https://zingmp3.vn/Mr-Siro/bai-hat', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - bai-hat', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 91, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/album', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - album', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 3, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/single', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - single', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 20, + }, { + 'url': 'https://zingmp3.vn/Mr-Siro/video', + 'info_dict': { + 'id': 'IWZ98609', + 'title': 'Mr. Siro - video', + 'description': 'md5:85ab29bd7b21725c12bf76fd1d6922e5', + }, + 'playlist_mincount': 15, + }] + + def _fetch_page(self, user_id, url_type, page): + url_type = 'user-list-song' if url_type == 'bai-hat' else 'user-list-video' + return self._parse_items(self._call_api(url_type, { + 'id': user_id, + 'type': 'artist', + 'page': page + 1, + 'count': self._PER_PAGE + }, query={'sort': 'new', 'sectionId': 'aSong'}).get('items')) + + def _real_extract(self, url): + user_alias, url_type = self._match_valid_url(url).group('user', 'type') + if not url_type: + url_type = 'bai-hat' + + user_info = self._call_api('info-artist', {}, user_alias, query={'alias': user_alias}) + if url_type in ('bai-hat', 'video'): + entries = OnDemandPagedList( + functools.partial(self._fetch_page, user_info['id'], url_type), self._PER_PAGE) + else: + entries = self._parse_items(traverse_obj(user_info, ( + 'sections', lambda _, v: v['link'] == f'/{user_alias}/{url_type}', 'items', ...))) + return self.playlist_result( + entries, user_info['id'], f'{user_info.get("name")} - {url_type}', user_info.get('biography')) diff --git a/yt_dlp/extractor/zoom.py b/yt_dlp/extractor/zoom.py index c00548839..a455f8c04 100644 --- a/yt_dlp/extractor/zoom.py +++ b/yt_dlp/extractor/zoom.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - - from .common import InfoExtractor from ..utils import ( ExtractorError, diff --git a/yt_dlp/extractor/zype.py b/yt_dlp/extractor/zype.py index 7663cb36b..6f2fbb9e9 100644 --- a/yt_dlp/extractor/zype.py +++ b/yt_dlp/extractor/zype.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 350b44dd0..70857b798 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -1,12 +1,10 @@ -from collections.abc import MutableMapping +import collections +import contextlib import json import operator import re -from .utils import ( - ExtractorError, - remove_quotes, -) +from .utils import ExtractorError, remove_quotes _OPERATORS = [ ('|', operator.or_), @@ -38,40 +36,19 @@ class JS_Continue(ExtractorError): ExtractorError.__init__(self, 'Invalid continue') -class LocalNameSpace(MutableMapping): - def __init__(self, *stack): - self.stack = tuple(stack) - - def __getitem__(self, key): - for scope in self.stack: - if key in scope: - return scope[key] - raise KeyError(key) - +class LocalNameSpace(collections.ChainMap): def __setitem__(self, key, value): - for scope in self.stack: + for scope in self.maps: if key in scope: scope[key] = value - break - else: - self.stack[0][key] = value - return value + return + self.maps[0][key] = value def __delitem__(self, key): raise NotImplementedError('Deleting is not supported') - def __iter__(self): - for scope in self.stack: - yield from scope - - def __len__(self, key): - return len(iter(self)) - - def __repr__(self): - return f'LocalNameSpace{self.stack}' - -class JSInterpreter(object): +class JSInterpreter: def __init__(self, code, objects=None): if objects is None: objects = {} @@ -232,7 +209,7 @@ class JSInterpreter(object): for default in (False, True): matched = False for item in items: - case, stmt = [i.strip() for i in self._separate(item, ':', 1)] + case, stmt = (i.strip() for i in self._separate(item, ':', 1)) if default: matched = matched or case == 'default' elif not matched: @@ -268,10 +245,10 @@ class JSInterpreter(object): expr = expr[:start] + json.dumps(ret) + expr[end:] for op, opfunc in _ASSIGN_OPERATORS: - m = re.match(r'''(?x) - (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])? - \s*%s - (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr) + m = re.match(rf'''(?x) + (?P<out>{_NAME_RE})(?:\[(?P<index>[^\]]+?)\])? + \s*{re.escape(op)} + (?P<expr>.*)$''', expr) if not m: continue right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion) @@ -305,10 +282,8 @@ class JSInterpreter(object): if var_m: return local_vars[var_m.group('name')] - try: + with contextlib.suppress(ValueError): return json.loads(expr) - except ValueError: - pass m = re.match( r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr) @@ -451,9 +426,9 @@ class JSInterpreter(object): m = re.match(r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') - argvals = tuple([ + argvals = tuple( int(v) if v.isdigit() else local_vars[v] - for v in self._separate(m.group('args'))]) + for v in self._separate(m.group('args'))) if fname in local_vars: return local_vars[fname](argvals) elif fname not in self._functions: @@ -524,14 +499,13 @@ class JSInterpreter(object): def build_function(self, argnames, code, *global_stack): global_stack = list(global_stack) or [{}] - local_vars = global_stack.pop(0) def resf(args, **kwargs): - local_vars.update({ + global_stack[0].update({ **dict(zip(argnames, args)), **kwargs }) - var_stack = LocalNameSpace(local_vars, *global_stack) + var_stack = LocalNameSpace(*global_stack) for stmt in self._separate(code.replace('\n', ''), ';'): ret, should_abort = self.interpret_statement(stmt, var_stack) if should_abort: diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index d7a8ffddd..a867fd289 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -1,7 +1,7 @@ import functools from threading import Lock -from .utils import supports_terminal_sequences, write_string +from .utils import supports_terminal_sequences, write_string CONTROL_SEQUENCES = { 'DOWN': '\n', @@ -69,6 +69,7 @@ def format_text(text, f): raise SyntaxError(f'Invalid format {" ".join(tokens)!r} in {f!r}') if fg_color or bg_color: + text = text.replace(CONTROL_SEQUENCES['RESET'], f'{fg_color}{bg_color}') return f'{fg_color}{bg_color}{text}{CONTROL_SEQUENCES["RESET"]}' else: return text diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index e47631eb6..f168be46a 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -1,27 +1,25 @@ # flake8: noqa: F401 -from ..utils import load_plugins - from .common import PostProcessor from .embedthumbnail import EmbedThumbnailPP -from .exec import ExecPP, ExecAfterDownloadPP +from .exec import ExecAfterDownloadPP, ExecPP from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegCopyStreamPP, FFmpegConcatPP, + FFmpegCopyStreamPP, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, - FFmpegFixupStretchedPP, - FFmpegFixupTimestampPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, FFmpegMergerPP, FFmpegMetadataPP, + FFmpegPostProcessor, + FFmpegSplitChaptersPP, FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, - FFmpegSplitChaptersPP, FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, ) @@ -35,6 +33,7 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP +from ..utils import load_plugins _PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index 8420ee864..addc46e5b 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -1,5 +1,3 @@ -from __future__ import unicode_literals - import functools import itertools import json @@ -8,10 +6,10 @@ import time import urllib.error from ..utils import ( + PostProcessingError, _configuration_args, encodeFilename, network_exceptions, - PostProcessingError, sanitized_Request, write_string, ) @@ -71,9 +69,9 @@ class PostProcessor(metaclass=PostProcessorMetaClass): return name[6:] if name[:6].lower() == 'ffmpeg' else name def to_screen(self, text, prefix=True, *args, **kwargs): - tag = '[%s] ' % self.PP_NAME if prefix else '' if self._downloader: - return self._downloader.to_screen('%s%s' % (tag, text), *args, **kwargs) + tag = '[%s] ' % self.PP_NAME if prefix else '' + return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs) def report_warning(self, text, *args, **kwargs): if self._downloader: @@ -85,7 +83,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): write_string(f'DeprecationWarning: {text}') def report_error(self, text, *args, **kwargs): - # Exists only for compatibility. Do not use + self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. ' + 'raise "yt_dlp.utils.PostProcessingError" instead') if self._downloader: return self._downloader.report_error(text, *args, **kwargs) @@ -93,6 +92,12 @@ class PostProcessor(metaclass=PostProcessorMetaClass): if self._downloader: return self._downloader.write_debug(text, *args, **kwargs) + def _delete_downloaded_files(self, *files_to_delete, **kwargs): + if self._downloader: + return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + for filename in set(filter(None, files_to_delete)): + os.remove(filename) + def get_param(self, name, default=None, *args, **kwargs): if self._downloader: return self._downloader.params.get(name, default, *args, **kwargs) diff --git a/yt_dlp/postprocessor/embedthumbnail.py b/yt_dlp/postprocessor/embedthumbnail.py index 057007f2e..d36e0008e 100644 --- a/yt_dlp/postprocessor/embedthumbnail.py +++ b/yt_dlp/postprocessor/embedthumbnail.py @@ -1,37 +1,29 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 import imghdr import os -import subprocess import re - -try: - from mutagen.flac import Picture, FLAC - from mutagen.mp4 import MP4, MP4Cover - from mutagen.oggopus import OggOpus - from mutagen.oggvorbis import OggVorbis - has_mutagen = True -except ImportError: - has_mutagen = False +import subprocess from .common import PostProcessor -from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegThumbnailsConvertorPP, -) +from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP +from ..dependencies import mutagen from ..utils import ( + Popen, + PostProcessingError, check_executable, encodeArgument, encodeFilename, error_to_compat_str, - Popen, - PostProcessingError, prepend_extension, shell_quote, ) +if mutagen: + from mutagen.flac import FLAC, Picture + from mutagen.mp4 import MP4, MP4Cover + from mutagen.oggopus import OggOpus + from mutagen.oggvorbis import OggVorbis + class EmbedThumbnailPPError(PostProcessingError): pass @@ -61,7 +53,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): return int(mobj.group('w')), int(mobj.group('h')) def _report_run(self, exe, filename): - self.to_screen('%s: Adding thumbnail to "%s"' % (exe, filename)) + self.to_screen(f'{exe}: Adding thumbnail to "{filename}"') @PostProcessor._restrict_to(images=False) def run(self, info): @@ -87,12 +79,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor): original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath'] - # Convert unsupported thumbnail formats to PNG (see #25687, #25717) - # Original behavior was to convert to JPG, but since JPG is a lossy - # format, there will be some additional data loss. - # PNG, on the other hand, is lossless. + # Convert unsupported thumbnail formats (see #25687, #25717) + # PNG is preferred since JPEG is lossy thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] - if thumbnail_ext not in ('jpg', 'jpeg', 'png'): + if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'): thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png') thumbnail_ext = 'png' @@ -110,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) - mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg') + mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}' old_stream, new_stream = self.get_stream_number( filename, ('tags', 'mimetype'), mimetype) if old_stream is not None: @@ -127,7 +117,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['m4a', 'mp4', 'mov']: prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', []) # Method 1: Use mutagen - if not has_mutagen or prefer_atomicparsley: + if not mutagen or prefer_atomicparsley: success = False else: try: @@ -200,7 +190,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}') elif info['ext'] in ['ogg', 'opus', 'flac']: - if not has_mutagen: + if not mutagen: raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python -m pip install mutagen`') self._report_run('mutagen', filename) @@ -230,11 +220,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) - - files_to_delete = [thumbnail_filename] - if self._already_have_thumbnail: - if original_thumbnail == thumbnail_filename: - files_to_delete = [] - elif original_thumbnail != thumbnail_filename: - files_to_delete.append(original_thumbnail) - return files_to_delete, info + converted = original_thumbnail != thumbnail_filename + self._delete_downloaded_files( + thumbnail_filename if converted or not self._already_have_thumbnail else None, + original_thumbnail if converted and not self._already_have_thumbnail else None, + info=info) + return [], info diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index 63f4d23f2..cfc83167c 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -1,14 +1,8 @@ -from __future__ import unicode_literals - import subprocess from .common import PostProcessor from ..compat import compat_shlex_quote -from ..utils import ( - encodeArgument, - PostProcessingError, - variadic, -) +from ..utils import PostProcessingError, encodeArgument, variadic class ExecPP(PostProcessor): diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 27d06cbde..d1d8e1687 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -1,30 +1,26 @@ -from __future__ import unicode_literals - import collections -import io import itertools +import json import os +import re import subprocess import time -import re -import json from .common import AudioConversionError, PostProcessor - from ..compat import compat_str from ..utils import ( + ISO639Utils, + Popen, + PostProcessingError, + _get_exe_version_output, + detect_exe_version, determine_ext, dfxp2srt, encodeArgument, encodeFilename, float_or_none, - _get_exe_version_output, - detect_exe_version, is_outdated_version, - ISO639Utils, orderedSet, - Popen, - PostProcessingError, prepend_extension, replace_extension, shell_quote, @@ -33,7 +29,6 @@ from ..utils import ( write_json_file, ) - EXT_TO_OUT_FORMATS = { 'aac': 'adts', 'flac': 'flac', @@ -73,11 +68,9 @@ class FFmpegPostProcessor(PostProcessor): raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location') required_version = '10-0' if self.basename == 'avconv' else '1.0' - if is_outdated_version( - self._versions[self.basename], required_version): - warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( - self.basename, self.basename, required_version) - self.report_warning(warning) + if is_outdated_version(self._versions[self.basename], required_version): + self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} ' + f'to version {required_version} or newer if you encounter any errors') @staticmethod def get_versions_and_features(downloader=None): @@ -147,13 +140,14 @@ class FFmpegPostProcessor(PostProcessor): if basename in ('ffmpeg', 'ffprobe'): prefer_ffmpeg = True - self._paths = dict( - (p, os.path.join(dirname, p)) for p in programs) + self._paths = { + p: os.path.join(dirname, p) for p in programs} if basename: self._paths[basename] = location self._versions = {} - executables = {'basename': ('ffmpeg', 'avconv'), 'probe_basename': ('ffprobe', 'avprobe')} + # NB: probe must be first for _features to be poulated correctly + executables = {'probe_basename': ('ffprobe', 'avprobe'), 'basename': ('ffmpeg', 'avconv')} if prefer_ffmpeg is False: executables = {k: v[::-1] for k, v in executables.items()} for var, prefs in executables.items(): @@ -194,8 +188,7 @@ class FFmpegPostProcessor(PostProcessor): yield from ('-dn', '-ignore_unknown') if copy: yield from ('-c', 'copy') - # For some reason, '-c copy -map 0' is not enough to copy subtitles - if ext in ('mp4', 'mov'): + if ext in ('mp4', 'mov', 'm4a'): yield from ('-c:s', 'mov_text') def get_audio_codec(self, path): @@ -211,13 +204,13 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self.executable, True), encodeArgument('-i')] cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) - self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd))) + self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}') handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout_data, stderr_data = handle.communicate_or_kill() expected_ret = 0 if self.probe_available else 1 if handle.wait() != expected_ret: return None - except (IOError, OSError): + except OSError: return None output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore') if self.probe_available: @@ -381,7 +374,7 @@ class FFmpegPostProcessor(PostProcessor): self.real_run_ffmpeg( [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], [(out_file, out_flags)]) - os.remove(concat_file) + self._delete_downloaded_files(concat_file) @classmethod def _concat_spec(cls, in_files, concat_opts=None): @@ -539,7 +532,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): _ACTION = 'converting' def __init__(self, downloader=None, preferedformat=None): - super(FFmpegVideoConvertorPP, self).__init__(downloader) + super().__init__(downloader) self._preferedformats = preferedformat.lower().split('/') def _target_ext(self, source_ext): @@ -584,14 +577,16 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') + def __init__(self, downloader=None, already_have_subtitle=False): - super(FFmpegEmbedSubtitlePP, self).__init__(downloader) + super().__init__(downloader) self._already_have_subtitle = already_have_subtitle @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in ('mp4', 'webm', 'mkv'): - self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files') + if info['ext'] not in self.SUPPORTED_EXTS: + self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') return [], info subtitles = info.get('requested_subtitles') if not subtitles: @@ -706,14 +701,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) - for file in filter(None, files_to_delete): - os.remove(file) # Don't obey --keep-files + self._delete_downloaded_files(*files_to_delete) os.replace(temp_filename, filename) return [], info @staticmethod def _get_chapter_opts(chapters, metadata_filename): - with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + with open(metadata_filename, 'wt', encoding='utf-8') as f: def ffmpeg_escape(text): return re.sub(r'([\\=;#\n])', r'\\\1', text) @@ -737,6 +731,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) if info.get(key) is not None), None) if value not in ('', None): + value = value.replace('\0', '') # nul character cannot be passed in command line metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) # See [1-4] for some info on media metadata/metadata supported @@ -804,8 +799,11 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-map', '-0:%d' % old_stream) new_stream -= 1 - yield ('-attach', infofn, - '-metadata:s:%d' % new_stream, 'mimetype=application/json') + yield ( + '-attach', infofn, + f'-metadata:s:{new_stream}', 'mimetype=application/json', + f'-metadata:s:{new_stream}', 'filename=info.json', + ) class FFmpegMergerPP(FFmpegPostProcessor): @@ -898,7 +896,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): def __init__(self, downloader=None, trim=0.001): # "trim" should be used when the video contains unintended packets - super(FFmpegFixupTimestampPP, self).__init__(downloader) + super().__init__(downloader) assert isinstance(trim, (int, float)) self.trim = str(trim) @@ -936,7 +934,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc') def __init__(self, downloader=None, format=None): - super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) + super().__init__(downloader) self.format = format def run(self, info): @@ -978,7 +976,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) - with io.open(srt_file, 'wt', encoding='utf-8') as f: + with open(srt_file, 'wt', encoding='utf-8') as f: f.write(srt_data) old_file = srt_file @@ -995,7 +993,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): self.run_ffmpeg(old_file, new_file, ['-f', new_format]) - with io.open(new_file, 'rt', encoding='utf-8') as f: + with open(new_file, encoding='utf-8') as f: subs[lang] = { 'ext': new_ext, 'data': f.read(), @@ -1050,7 +1048,7 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())]) if in_file != info['filepath']: - os.remove(in_file) + self._delete_downloaded_files(in_file, msg=None) return [], info @@ -1058,7 +1056,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): SUPPORTED_EXTS = ('jpg', 'png', 'webp') def __init__(self, downloader=None, format=None): - super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) + super().__init__(downloader) self.format = format @staticmethod @@ -1089,7 +1087,7 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): def convert_thumbnail(self, thumbnail_filename, target_ext): thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext) - self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext)) + self.to_screen(f'Converting thumbnail "{thumbnail_filename}" to {target_ext}') self.real_run_ffmpeg( [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])], [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))]) @@ -1156,7 +1154,7 @@ class FFmpegConcatPP(FFmpegPostProcessor): entries = info.get('entries') or [] if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'): return [], info - elif traverse_obj(entries, (..., 'requested_downloads', lambda _, v: len(v) > 1)): + elif traverse_obj(entries, (..., lambda k, v: k == 'requested_downloads' and len(v) > 1)): raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats') in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or [] diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index 5bc435da3..51b927b91 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -1,31 +1,27 @@ import re -from enum import Enum from .common import PostProcessor +from ..utils import Namespace class MetadataParserPP(PostProcessor): - class Actions(Enum): - INTERPRET = 'interpretter' - REPLACE = 'replacer' - def __init__(self, downloader, actions): - PostProcessor.__init__(self, downloader) + super().__init__(downloader) self._actions = [] for f in actions: - action = f[0] - assert isinstance(action, self.Actions) - self._actions.append(getattr(self, action.value)(*f[1:])) + action, *args = f + assert action in self.Actions + self._actions.append(action(self, *args)) @classmethod def validate_action(cls, action, *data): - ''' Each action can be: + """Each action can be: (Actions.INTERPRET, from, to) OR (Actions.REPLACE, field, search, replace) - ''' - if not isinstance(action, cls.Actions): + """ + if action not in cls.Actions: raise ValueError(f'{action!r} is not a valid action') - getattr(cls, action.value)(cls, *data) # So this can raise error to validate + action(cls, *data) # So this can raise error to validate @staticmethod def field_to_template(tmpl): @@ -99,6 +95,8 @@ class MetadataParserPP(PostProcessor): search_re = re.compile(search) return f + Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer) + class MetadataFromFieldPP(MetadataParserPP): @classmethod diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 22506bc21..8a2ef9065 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -3,17 +3,9 @@ import heapq import os from .common import PostProcessor -from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegSubtitlesConvertorPP -) +from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP from .sponsorblock import SponsorBlockPP -from ..utils import ( - orderedSet, - PostProcessingError, - prepend_extension, -) - +from ..utils import PostProcessingError, orderedSet, prepend_extension _TINY_CHAPTER_DURATION = 1 DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' @@ -322,7 +314,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): self.to_screen(f'Removing chapters from {filename}') self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) if in_file != filename: - os.remove(in_file) + self._delete_downloaded_files(in_file, msg=None) return out_file @staticmethod diff --git a/yt_dlp/postprocessor/movefilesafterdownload.py b/yt_dlp/postprocessor/movefilesafterdownload.py index 1064a8cb8..436d13227 100644 --- a/yt_dlp/postprocessor/movefilesafterdownload.py +++ b/yt_dlp/postprocessor/movefilesafterdownload.py @@ -1,13 +1,12 @@ -from __future__ import unicode_literals import os import shutil from .common import PostProcessor from ..utils import ( + PostProcessingError, decodeFilename, encodeFilename, make_dir, - PostProcessingError, ) @@ -47,7 +46,7 @@ class MoveFilesAfterDownloadPP(PostProcessor): % (oldfile, newfile)) continue make_dir(newfile, PostProcessingError) - self.to_screen('Moving file "%s" to "%s"' % (oldfile, newfile)) + self.to_screen(f'Moving file "{oldfile}" to "{newfile}"') shutil.move(oldfile, newfile) # os.rename cannot move between volumes info['filepath'] = finalpath diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 86149aeef..1a9f5dc66 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -1,19 +1,18 @@ -from __future__ import unicode_literals import os +import shlex import subprocess from .common import PostProcessor -from ..compat import compat_shlex_split from ..utils import ( + Popen, + PostProcessingError, check_executable, cli_option, encodeArgument, encodeFilename, + prepend_extension, shell_quote, str_or_none, - Popen, - PostProcessingError, - prepend_extension, ) @@ -79,7 +78,7 @@ class SponSkrubPP(PostProcessor): if not self.cutout: cmd += ['-chapter'] cmd += cli_option(self._downloader.params, '-proxy', 'proxy') - cmd += compat_shlex_split(self.args) # For backward compatibility + cmd += shlex.split(self.args) # For backward compatibility cmd += self._configuration_args(self._exe_name, use_compat=False) cmd += ['--', information['id'], filename, temp_filename] cmd = [encodeArgument(i) for i in cmd] diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py index 7943014e2..7f75561db 100644 --- a/yt_dlp/postprocessor/sponsorblock.py +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -1,4 +1,4 @@ -from hashlib import sha256 +import hashlib import json import re @@ -38,7 +38,7 @@ class SponsorBlockPP(FFmpegPostProcessor): return [], info self.to_screen('Fetching SponsorBlock segments') - info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) + info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration')) return [], info def _get_sponsor_chapters(self, info, duration): @@ -84,7 +84,7 @@ class SponsorBlockPP(FFmpegPostProcessor): return sponsor_chapters def _get_sponsor_segments(self, video_id, service): - hash = sha256(video_id.encode('ascii')).hexdigest() + hash = hashlib.sha256(video_id.encode('ascii')).hexdigest() # SponsorBlock API recommends using first 4 hash characters. url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ 'service': service, diff --git a/yt_dlp/postprocessor/xattrpp.py b/yt_dlp/postprocessor/xattrpp.py index 93acd6d13..f822eff41 100644 --- a/yt_dlp/postprocessor/xattrpp.py +++ b/yt_dlp/postprocessor/xattrpp.py @@ -1,78 +1,63 @@ -from __future__ import unicode_literals +import os from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( - hyphenate_date, - write_xattr, PostProcessingError, XAttrMetadataError, XAttrUnavailableError, + hyphenate_date, + write_xattr, ) class XAttrMetadataPP(PostProcessor): - # - # More info about extended attributes for media: - # http://freedesktop.org/wiki/CommonExtendedAttributes/ - # http://www.freedesktop.org/wiki/PhreedomDraft/ - # http://dublincore.org/documents/usageguide/elements.shtml - # - # TODO: - # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) - # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' - # + """Set extended attributes on downloaded file (if xattr support is found) + + More info about extended attributes for media: + http://freedesktop.org/wiki/CommonExtendedAttributes/ + http://www.freedesktop.org/wiki/PhreedomDraft/ + http://dublincore.org/documents/usageguide/elements.shtml + + TODO: + * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + """ + + XATTR_MAPPING = { + 'user.xdg.referrer.url': 'webpage_url', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } def run(self, info): - """ Set extended attributes on downloaded file (if xattr support is found). """ - - # Write the metadata to the file's xattrs + mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') - - filename = info['filepath'] - try: - xattr_mapping = { - 'user.xdg.referrer.url': 'webpage_url', - # 'user.xdg.comment': 'description', - 'user.dublincore.title': 'title', - 'user.dublincore.date': 'upload_date', - 'user.dublincore.description': 'description', - 'user.dublincore.contributor': 'uploader', - 'user.dublincore.format': 'format', - } - - num_written = 0 - for xattrname, infoname in xattr_mapping.items(): - + for xattrname, infoname in self.XATTR_MAPPING.items(): value = info.get(infoname) - if value: if infoname == 'upload_date': value = hyphenate_date(value) - - byte_value = value.encode('utf-8') - write_xattr(filename, xattrname, byte_value) - num_written += 1 - - return [], info + write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: raise PostProcessingError(str(e)) - except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self.report_warning( 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' - + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) + 'Some extended attributes are not written') elif e.reason == 'VALUE_TOO_LONG': - self.report_warning( - 'Unable to write extended attributes due to too long values.') + self.report_warning('Unable to write extended attributes due to too long values.') else: - msg = 'This filesystem doesn\'t support extended attributes. ' - if compat_os_name == 'nt': - msg += 'You need to use NTFS.' - else: - msg += '(You may have to enable them in your /etc/fstab)' - raise PostProcessingError(str(e)) - return [], info + tip = ('You need to use NTFS' if compat_os_name == 'nt' + else 'You may have to enable them in your "/etc/fstab"') + raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}') + + self.try_utime(info['filepath'], mtime, mtime) + return [], info diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py index 5d4adbe72..34ba1394a 100644 --- a/yt_dlp/socks.py +++ b/yt_dlp/socks.py @@ -1,8 +1,5 @@ # Public Domain SOCKS proxy protocol implementation # Adapted from https://gist.github.com/bluec0re/cafd3764412967417fd3 - -from __future__ import unicode_literals - # References: # SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol # SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol @@ -12,11 +9,7 @@ from __future__ import unicode_literals import collections import socket -from .compat import ( - compat_ord, - compat_struct_pack, - compat_struct_unpack, -) +from .compat import compat_ord, compat_struct_pack, compat_struct_unpack __author__ = 'Timo Schmid <coding@timoschmid.de>' @@ -33,7 +26,7 @@ SOCKS5_USER_AUTH_VERSION = 0x01 SOCKS5_USER_AUTH_SUCCESS = 0x00 -class Socks4Command(object): +class Socks4Command: CMD_CONNECT = 0x01 CMD_BIND = 0x02 @@ -42,14 +35,14 @@ class Socks5Command(Socks4Command): CMD_UDP_ASSOCIATE = 0x03 -class Socks5Auth(object): +class Socks5Auth: AUTH_NONE = 0x00 AUTH_GSSAPI = 0x01 AUTH_USER_PASS = 0x02 AUTH_NO_ACCEPTABLE = 0xFF # For server response -class Socks5AddressType(object): +class Socks5AddressType: ATYP_IPV4 = 0x01 ATYP_DOMAINNAME = 0x03 ATYP_IPV6 = 0x04 @@ -61,14 +54,14 @@ class ProxyError(socket.error): def __init__(self, code=None, msg=None): if code is not None and msg is None: msg = self.CODES.get(code) or 'unknown error' - super(ProxyError, self).__init__(code, msg) + super().__init__(code, msg) class InvalidVersionError(ProxyError): def __init__(self, expected_version, got_version): - msg = ('Invalid response version from server. Expected {0:02x} got ' - '{1:02x}'.format(expected_version, got_version)) - super(InvalidVersionError, self).__init__(0, msg) + msg = ('Invalid response version from server. Expected {:02x} got ' + '{:02x}'.format(expected_version, got_version)) + super().__init__(0, msg) class Socks4Error(ProxyError): @@ -98,7 +91,7 @@ class Socks5Error(ProxyError): } -class ProxyType(object): +class ProxyType: SOCKS4 = 0 SOCKS4A = 1 SOCKS5 = 2 @@ -111,7 +104,7 @@ Proxy = collections.namedtuple('Proxy', ( class sockssocket(socket.socket): def __init__(self, *args, **kwargs): self._proxy = None - super(sockssocket, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) def setproxy(self, proxytype, addr, port, rdns=True, username=None, password=None): assert proxytype in (ProxyType.SOCKS4, ProxyType.SOCKS4A, ProxyType.SOCKS5) @@ -123,13 +116,13 @@ class sockssocket(socket.socket): while len(data) < cnt: cur = self.recv(cnt - len(data)) if not cur: - raise EOFError('{0} bytes missing'.format(cnt - len(data))) + raise EOFError(f'{cnt - len(data)} bytes missing') data += cur return data def _recv_bytes(self, cnt): data = self.recvall(cnt) - return compat_struct_unpack('!{0}B'.format(cnt), data) + return compat_struct_unpack(f'!{cnt}B', data) @staticmethod def _len_and_data(data): @@ -143,7 +136,7 @@ class sockssocket(socket.socket): def _resolve_address(self, destaddr, default, use_remote_dns): try: return socket.inet_aton(destaddr) - except socket.error: + except OSError: if use_remote_dns and self._proxy.remote_dns: return default else: @@ -156,11 +149,11 @@ class sockssocket(socket.socket): packet = compat_struct_pack('!BBH', SOCKS4_VERSION, Socks4Command.CMD_CONNECT, port) + ipaddr - username = (self._proxy.username or '').encode('utf-8') + username = (self._proxy.username or '').encode() packet += username + b'\x00' if is_4a and self._proxy.remote_dns: - packet += destaddr.encode('utf-8') + b'\x00' + packet += destaddr.encode() + b'\x00' self.sendall(packet) @@ -185,7 +178,7 @@ class sockssocket(socket.socket): auth_methods.append(Socks5Auth.AUTH_USER_PASS) packet += compat_struct_pack('!B', len(auth_methods)) - packet += compat_struct_pack('!{0}B'.format(len(auth_methods)), *auth_methods) + packet += compat_struct_pack(f'!{len(auth_methods)}B', *auth_methods) self.sendall(packet) @@ -199,8 +192,8 @@ class sockssocket(socket.socket): raise Socks5Error(Socks5Auth.AUTH_NO_ACCEPTABLE) if method == Socks5Auth.AUTH_USER_PASS: - username = self._proxy.username.encode('utf-8') - password = self._proxy.password.encode('utf-8') + username = self._proxy.username.encode() + password = self._proxy.password.encode() packet = compat_struct_pack('!B', SOCKS5_USER_AUTH_VERSION) packet += self._len_and_data(username) + self._len_and_data(password) self.sendall(packet) @@ -223,7 +216,7 @@ class sockssocket(socket.socket): reserved = 0 packet = compat_struct_pack('!BBB', SOCKS5_VERSION, Socks5Command.CMD_CONNECT, reserved) if ipaddr is None: - destaddr = destaddr.encode('utf-8') + destaddr = destaddr.encode() packet += compat_struct_pack('!B', Socks5AddressType.ATYP_DOMAINNAME) packet += self._len_and_data(destaddr) else: diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 546264b1f..8c8ea384b 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1,9 +1,4 @@ #!/usr/bin/env python3 -# coding: utf-8 - -from __future__ import unicode_literals - -import asyncio import atexit import base64 import binascii @@ -13,8 +8,8 @@ import collections import contextlib import ctypes import datetime -import email.utils import email.header +import email.utils import errno import functools import gzip @@ -26,11 +21,13 @@ import itertools import json import locale import math +import mimetypes import operator import os import platform import random import re +import shlex import socket import ssl import subprocess @@ -38,58 +35,37 @@ import sys import tempfile import time import traceback +import urllib.parse import xml.etree.ElementTree import zlib -import mimetypes from .compat import ( - compat_HTMLParseError, - compat_HTMLParser, - compat_HTTPError, - compat_basestring, - compat_brotli, + asyncio, compat_chr, compat_cookiejar, - compat_ctypes_WINFUNCTYPE, compat_etree_fromstring, compat_expanduser, compat_html_entities, compat_html_entities_html5, + compat_HTMLParseError, + compat_HTMLParser, compat_http_client, - compat_integer_types, - compat_numeric_types, - compat_kwargs, + compat_HTTPError, compat_os_name, compat_parse_qs, - compat_shlex_split, compat_shlex_quote, compat_str, compat_struct_pack, compat_struct_unpack, compat_urllib_error, - compat_urllib_parse, + compat_urllib_parse_unquote_plus, compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, - compat_urllib_parse_urlunparse, - compat_urllib_parse_quote, - compat_urllib_parse_quote_plus, - compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, - compat_websockets, - compat_xpath, -) - -from .socks import ( - ProxyType, - sockssocket, ) - -try: - import certifi - has_certifi = True -except ImportError: - has_certifi = False +from .dependencies import brotli, certifi, websockets +from .socks import ProxyType, sockssocket def register_socks_protocols(): @@ -153,7 +129,7 @@ def random_user_agent(): SUPPORTED_ENCODINGS = [ 'gzip', 'deflate' ] -if compat_brotli: +if brotli: SUPPORTED_ENCODINGS.append('br') std_headers = { @@ -269,6 +245,8 @@ DATE_FORMATS_MONTH_FIRST.extend([ PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' +NUMBER_RE = r'\d+(?:\.\d+)?' + def preferredencoding(): """Get preferred encoding. @@ -288,37 +266,9 @@ def preferredencoding(): def write_json_file(obj, fn): """ Encode obj as JSON and write it to fn, atomically if possible """ - fn = encodeFilename(fn) - if sys.version_info < (3, 0) and sys.platform != 'win32': - encoding = get_filesystem_encoding() - # os.path.basename returns a bytes object, but NamedTemporaryFile - # will fail if the filename contains non ascii characters unless we - # use a unicode object - path_basename = lambda f: os.path.basename(fn).decode(encoding) - # the same for os.path.dirname - path_dirname = lambda f: os.path.dirname(fn).decode(encoding) - else: - path_basename = os.path.basename - path_dirname = os.path.dirname - - args = { - 'suffix': '.tmp', - 'prefix': path_basename(fn) + '.', - 'dir': path_dirname(fn), - 'delete': False, - } - - # In Python 2.x, json.dump expects a bytestream. - # In Python 3.x, it writes to a character stream - if sys.version_info < (3, 0): - args['mode'] = 'wb' - else: - args.update({ - 'mode': 'w', - 'encoding': 'utf-8', - }) - - tf = tempfile.NamedTemporaryFile(**compat_kwargs(args)) + tf = tempfile.NamedTemporaryFile( + prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn), + suffix='.tmp', delete=False, mode='w', encoding='utf-8') try: with tf: @@ -326,39 +276,24 @@ def write_json_file(obj, fn): if sys.platform == 'win32': # Need to remove existing file on Windows, else os.rename raises # WindowsError or FileExistsError. - try: + with contextlib.suppress(OSError): os.unlink(fn) - except OSError: - pass - try: + with contextlib.suppress(OSError): mask = os.umask(0) os.umask(mask) os.chmod(tf.name, 0o666 & ~mask) - except OSError: - pass os.rename(tf.name, fn) except Exception: - try: + with contextlib.suppress(OSError): os.remove(tf.name) - except OSError: - pass raise -if sys.version_info >= (2, 7): - def find_xpath_attr(node, xpath, key, val=None): - """ Find the xpath xpath[@key=val] """ - assert re.match(r'^[a-zA-Z_-]+$', key) - expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val)) - return node.find(expr) -else: - def find_xpath_attr(node, xpath, key, val=None): - for f in node.findall(compat_xpath(xpath)): - if key not in f.attrib: - continue - if val is None or f.attrib.get(key) == val: - return f - return None +def find_xpath_attr(node, xpath, key, val=None): + """ Find the xpath xpath[@key=val] """ + assert re.match(r'^[a-zA-Z_-]+$', key) + expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']") + return node.find(expr) # On python2.6 the xml.etree.ElementTree.Element methods don't support # the namespace parameter @@ -378,7 +313,7 @@ def xpath_with_ns(path, ns_map): def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT): def _find_xpath(xpath): - return node.find(compat_xpath(xpath)) + return node.find(xpath) if isinstance(xpath, (str, compat_str)): n = _find_xpath(xpath) @@ -420,7 +355,7 @@ def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT): if default is not NO_DEFAULT: return default elif fatal: - name = '%s[@%s]' % (xpath, key) if name is None else name + name = f'{xpath}[@{key}]' if name is None else name raise ExtractorError('Could not find XML attribute %s' % name) else: return None @@ -489,15 +424,15 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value attribute in the passed HTML document """ - value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?' + quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?' value = re.escape(value) if escape_value else value - partial_element_re = r'''(?x) + partial_element_re = rf'''(?x) <(?P<tag>[a-zA-Z0-9:._-]+) (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)? - \s%(attribute)s\s*=\s*(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q) - ''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional} + \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q) + ''' for m in re.finditer(partial_element_re, html): content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():]) @@ -626,16 +561,11 @@ def extract_attributes(html_element): 'empty': '', 'noval': None, 'entity': '&', 'sq': '"', 'dq': '\'' }. - NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions, - but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5. """ parser = HTMLAttributeParser() - try: + with contextlib.suppress(compat_HTMLParseError): parser.feed(html_element) parser.close() - # Older Python may throw HTMLParseError in case of malformed HTML - except compat_HTMLParseError: - pass return parser.attrs @@ -692,7 +622,7 @@ def sanitize_open(filename, open_mode): except LockingUnsupportedError: stream = open(filename, open_mode) return (stream, filename) - except (IOError, OSError) as err: + except OSError as err: if attempt or err.errno in (errno.EACCES,): raise old_filename, filename = filename, sanitize_path(filename) @@ -763,8 +693,6 @@ def sanitize_path(s, force=False): if sys.platform == 'win32': force = False drive_or_unc, _ = os.path.splitdrive(s) - if sys.version_info < (2, 7) and not drive_or_unc: - drive_or_unc, _ = os.path.splitunc(s) elif force: drive_or_unc = '' else: @@ -809,8 +737,8 @@ def extract_basic_auth(url): parts.hostname if parts.port is None else '%s:%d' % (parts.hostname, parts.port)))) auth_payload = base64.b64encode( - ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8')) - return url, 'Basic ' + auth_payload.decode('utf-8') + ('%s:%s' % (parts.username, parts.password or '')).encode()) + return url, f'Basic {auth_payload.decode()}' def sanitized_Request(url, *args, **kwargs): @@ -857,10 +785,8 @@ def _htmlentity_transform(entity_with_semicolon): else: base = 10 # See https://github.com/ytdl-org/youtube-dl/issues/7518 - try: + with contextlib.suppress(ValueError): return compat_chr(int(numstr, base)) - except ValueError: - pass # Unknown entity in name, return its literal representation return '&%s;' % entity @@ -869,7 +795,7 @@ def _htmlentity_transform(entity_with_semicolon): def unescapeHTML(s): if s is None: return None - assert type(s) == compat_str + assert isinstance(s, str) return re.sub( r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s) @@ -903,7 +829,7 @@ class Popen(subprocess.Popen): _startupinfo = None def __init__(self, *args, **kwargs): - super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo) + super().__init__(*args, **kwargs, startupinfo=self._startupinfo) def communicate_or_kill(self, *args, **kwargs): return process_communicate_or_kill(self, *args, **kwargs) @@ -922,51 +848,23 @@ def get_subprocess_encoding(): def encodeFilename(s, for_subprocess=False): - """ - @param s The name of the file - """ - - assert type(s) == compat_str - - # Python 3 has a Unicode API - if sys.version_info >= (3, 0): - return s - - # Pass '' directly to use Unicode APIs on Windows 2000 and up - # (Detecting Windows NT 4 is tricky because 'major >= 4' would - # match Windows 9x series as well. Besides, NT 4 is obsolete.) - if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5: - return s - - # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible - if sys.platform.startswith('java'): - return s - - return s.encode(get_subprocess_encoding(), 'ignore') + assert isinstance(s, str) + return s def decodeFilename(b, for_subprocess=False): - - if sys.version_info >= (3, 0): - return b - - if not isinstance(b, bytes): - return b - - return b.decode(get_subprocess_encoding(), 'ignore') + return b def encodeArgument(s): - if not isinstance(s, compat_str): - # Legacy code that uses byte strings - # Uncomment the following line after fixing all post processors - # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) - s = s.decode('ascii') - return encodeFilename(s, True) + # Legacy code that uses byte strings + # Uncomment the following line after fixing all post processors + # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s)) + return s if isinstance(s, str) else s.decode('ascii') def decodeArgument(b): - return decodeFilename(b, True) + return b def decodeOption(optval): @@ -1009,10 +907,8 @@ def _ssl_load_windows_store_certs(ssl_context, storename): except PermissionError: return for cert in certs: - try: + with contextlib.suppress(ssl.SSLError): ssl_context.load_verify_locations(cadata=cert) - except ssl.SSLError: - pass def make_HTTPS_handler(params, **kwargs): @@ -1021,6 +917,8 @@ def make_HTTPS_handler(params, **kwargs): context.check_hostname = opts_check_certificate if params.get('legacyserverconnect'): context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT + # Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998 + context.set_ciphers('DEFAULT') context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: if has_certifi and 'no-certifi' not in params.get('compat_opts', []): @@ -1034,12 +932,17 @@ def make_HTTPS_handler(params, **kwargs): except ssl.SSLError: # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'): - # Create a new context to discard any certificates that were already loaded - context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) - context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED for storename in ('CA', 'ROOT'): _ssl_load_windows_store_certs(context, storename) context.set_default_verify_paths() + client_certfile = params.get('client_certificate') + if client_certfile: + try: + context.load_cert_chain( + client_certfile, keyfile=params.get('client_certificate_key'), + password=params.get('client_certificate_password')) + except ssl.SSLError: + raise YoutubeDLError('Unable to load client certificate') return YoutubeDLHTTPSHandler(params, context=context, **kwargs) @@ -1091,7 +994,7 @@ class ExtractorError(YoutubeDLError): self.ie = ie self.exc_info = sys.exc_info() # preserve original exception - super(ExtractorError, self).__init__(''.join(( + super().__init__(''.join(( format_field(ie, template='[%s] '), format_field(video_id, template='%s: '), msg, @@ -1107,7 +1010,7 @@ class ExtractorError(YoutubeDLError): class UnsupportedError(ExtractorError): def __init__(self, url): - super(UnsupportedError, self).__init__( + super().__init__( 'Unsupported URL: %s' % url, expected=True) self.url = url @@ -1126,7 +1029,7 @@ class GeoRestrictedError(ExtractorError): def __init__(self, msg, countries=None, **kwargs): kwargs['expected'] = True - super(GeoRestrictedError, self).__init__(msg, **kwargs) + super().__init__(msg, **kwargs) self.countries = countries @@ -1140,7 +1043,7 @@ class DownloadError(YoutubeDLError): def __init__(self, msg, exc_info=None): """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """ - super(DownloadError, self).__init__(msg) + super().__init__(msg) self.exc_info = exc_info @@ -1234,9 +1137,7 @@ class ContentTooShortError(YoutubeDLError): """ def __init__(self, downloaded, expected): - super(ContentTooShortError, self).__init__( - 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected) - ) + super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes') # Both in bytes self.downloaded = downloaded self.expected = expected @@ -1244,7 +1145,7 @@ class ContentTooShortError(YoutubeDLError): class XAttrMetadataError(YoutubeDLError): def __init__(self, code=None, msg='Unknown error'): - super(XAttrMetadataError, self).__init__(msg) + super().__init__(msg) self.code = code self.msg = msg @@ -1263,12 +1164,7 @@ class XAttrUnavailableError(YoutubeDLError): def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): - # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting - # expected HTTP responses to meet HTTP/1.0 or later (see also - # https://github.com/ytdl-org/youtube-dl/issues/6727) - if sys.version_info < (3, 0): - kwargs['strict'] = True - hc = http_class(*args, **compat_kwargs(kwargs)) + hc = http_class(*args, **kwargs) source_address = ydl_handler._params.get('source_address') if source_address is not None: @@ -1285,7 +1181,7 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): ip_addrs = [addr for addr in addrs if addr[0] == af] if addrs and not ip_addrs: ip_version = 'v4' if af == socket.AF_INET else 'v6' - raise socket.error( + raise OSError( "No remote IP%s addresses available for connect, can't use '%s' as source address" % (ip_version, source_address[0])) for res in ip_addrs: @@ -1299,30 +1195,17 @@ def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs): sock.connect(sa) err = None # Explicitly break reference cycle return sock - except socket.error as _: + except OSError as _: err = _ if sock is not None: sock.close() if err is not None: raise err else: - raise socket.error('getaddrinfo returns an empty list') + raise OSError('getaddrinfo returns an empty list') if hasattr(hc, '_create_connection'): hc._create_connection = _create_connection - sa = (source_address, 0) - if hasattr(hc, 'source_address'): # Python 2.7+ - hc.source_address = sa - else: # Python 2.6 - def _hc_connect(self, *args, **kwargs): - sock = _create_connection( - (self.host, self.port), self.timeout, sa) - if is_https: - self.sock = ssl.wrap_socket( - sock, self.key_file, self.cert_file, - ssl_version=ssl.PROTOCOL_TLSv1) - else: - self.sock = sock - hc.connect = functools.partial(_hc_connect, hc) + hc.source_address = (source_address, 0) return hc @@ -1331,7 +1214,7 @@ def handle_youtubedl_headers(headers): filtered_headers = headers if 'Youtubedl-no-compression' in filtered_headers: - filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding') + filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'} del filtered_headers['Youtubedl-no-compression'] return filtered_headers @@ -1384,7 +1267,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): def brotli(data): if not data: return data - return compat_brotli.decompress(data) + return brotli.decompress(data) def http_request(self, req): # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not @@ -1413,11 +1296,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): req.headers = handle_youtubedl_headers(req.headers) - if sys.version_info < (2, 7) and '#' in req.get_full_url(): - # Python 2.6 is brain-dead when it comes to fragments - req._Request__original = req._Request__original.partition('#')[0] - req._Request__r_type = req._Request__r_type.partition('#')[0] - return req def http_response(self, req, resp): @@ -1428,14 +1306,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb') try: uncompressed = io.BytesIO(gz.read()) - except IOError as original_ioerror: + except OSError as original_ioerror: # There may be junk add the end of the file # See http://stackoverflow.com/q/4928560/35070 for details for i in range(1, 1024): try: gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb') uncompressed = io.BytesIO(gz.read()) - except IOError: + except OSError: continue break else: @@ -1461,15 +1339,10 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): location = resp.headers.get('Location') if location: # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 - if sys.version_info >= (3, 0): - location = location.encode('iso-8859-1').decode('utf-8') - else: - location = location.decode('utf-8') + location = location.encode('iso-8859-1').decode() location_escaped = escape_url(location) if location != location_escaped: del resp.headers['Location'] - if sys.version_info < (3, 0): - location_escaped = location_escaped.encode('utf-8') resp.headers['Location'] = location_escaped return resp @@ -1506,7 +1379,7 @@ def make_socks_conn_class(base_class, socks_proxy): def connect(self): self.sock = sockssocket() self.sock.setproxy(*proxy_args) - if type(self.timeout) in (int, float): + if isinstance(self.timeout, (int, float)): self.sock.settimeout(self.timeout) self.sock.connect((self.host, self.port)) @@ -1540,9 +1413,14 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): conn_class = make_socks_conn_class(conn_class, socks_proxy) del req.headers['Ytdl-socks-proxy'] - return self.do_open(functools.partial( - _create_http_connection, self, conn_class, True), - req, **kwargs) + try: + return self.do_open( + functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs) + except urllib.error.URLError as e: + if (isinstance(e.reason, ssl.SSLError) + and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'): + raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect') + raise class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): @@ -1561,57 +1439,71 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): 'CookieFileEntry', ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) - def save(self, filename=None, ignore_discard=False, ignore_expires=False): + def __init__(self, filename=None, *args, **kwargs): + super().__init__(None, *args, **kwargs) + if self.is_path(filename): + filename = os.fspath(filename) + self.filename = filename + + @staticmethod + def _true_or_false(cndn): + return 'TRUE' if cndn else 'FALSE' + + @staticmethod + def is_path(file): + return isinstance(file, (str, bytes, os.PathLike)) + + @contextlib.contextmanager + def open(self, file, *, write=False): + if self.is_path(file): + with open(file, 'w' if write else 'r', encoding='utf-8') as f: + yield f + else: + if write: + file.truncate(0) + yield file + + def _really_save(self, f, ignore_discard=False, ignore_expires=False): + now = time.time() + for cookie in self: + if (not ignore_discard and cookie.discard + or not ignore_expires and cookie.is_expired(now)): + continue + name, value = cookie.name, cookie.value + if value is None: + # cookies.txt regards 'Set-Cookie: foo' as a cookie + # with no name, whereas http.cookiejar regards it as a + # cookie with no value. + name, value = '', name + f.write('%s\n' % '\t'.join(( + cookie.domain, + self._true_or_false(cookie.domain.startswith('.')), + cookie.path, + self._true_or_false(cookie.secure), + str_or_none(cookie.expires, default=''), + name, value + ))) + + def save(self, filename=None, *args, **kwargs): """ Save cookies to a file. + Code is taken from CPython 3.6 + https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ - Most of the code is taken from CPython 3.8 and slightly adapted - to support cookie files with UTF-8 in both python 2 and 3. - """ if filename is None: if self.filename is not None: filename = self.filename else: raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) - # Store session cookies with `expires` set to 0 instead of an empty - # string + # Store session cookies with `expires` set to 0 instead of an empty string for cookie in self: if cookie.expires is None: cookie.expires = 0 - with io.open(filename, 'w', encoding='utf-8') as f: + with self.open(filename, write=True) as f: f.write(self._HEADER) - now = time.time() - for cookie in self: - if not ignore_discard and cookie.discard: - continue - if not ignore_expires and cookie.is_expired(now): - continue - if cookie.secure: - secure = 'TRUE' - else: - secure = 'FALSE' - if cookie.domain.startswith('.'): - initial_dot = 'TRUE' - else: - initial_dot = 'FALSE' - if cookie.expires is not None: - expires = compat_str(cookie.expires) - else: - expires = '' - if cookie.value is None: - # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas http.cookiejar regards it as a - # cookie with no value. - name = '' - value = cookie.name - else: - name = cookie.name - value = cookie.value - f.write( - '\t'.join([cookie.domain, initial_dot, cookie.path, - secure, expires, name, value]) + '\n') + self._really_save(f, *args, **kwargs) def load(self, filename=None, ignore_discard=False, ignore_expires=False): """Load cookies from a file.""" @@ -1636,14 +1528,16 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): return line cf = io.StringIO() - with io.open(filename, encoding='utf-8') as f: + with self.open(filename) as f: for line in f: try: cf.write(prepare_line(line)) except compat_cookiejar.LoadError as e: - write_string( - 'WARNING: skipping cookie file entry due to %s: %r\n' - % (e, line), sys.stderr) + if f'{line.strip()} '[0] in '[{"': + raise compat_cookiejar.LoadError( + 'Cookies file must be Netscape formatted, not JSON. See ' + 'https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl') + write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') continue cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) @@ -1668,19 +1562,6 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar) def http_response(self, request, response): - # Python 2 will choke on next HTTP request in row if there are non-ASCII - # characters in Set-Cookie HTTP header of last response (see - # https://github.com/ytdl-org/youtube-dl/issues/6769). - # In order to at least prevent crashing we will percent encode Set-Cookie - # header before HTTPCookieProcessor starts processing it. - # if sys.version_info < (3, 0) and response.headers: - # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'): - # set_cookie = response.headers.get(set_cookie_header) - # if set_cookie: - # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ") - # if set_cookie != set_cookie_escaped: - # del response.headers[set_cookie_header] - # response.headers[set_cookie_header] = set_cookie_escaped return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response) https_request = compat_urllib_request.HTTPCookieProcessor.http_request @@ -1724,12 +1605,6 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): # essentially all clients do redirect in this case, so we do # the same. - # On python 2 urlh.geturl() may sometimes return redirect URL - # as byte string instead of unicode. This workaround allows - # to force it always return unicode. - if sys.version_info[0] < 3: - newurl = compat_str(newurl) - # Be conciliant with URIs containing a space. This is mainly # redundant with the more complete encoding done in http_error_302(), # but it is kept for compatibility with other callers. @@ -1737,11 +1612,22 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): CONTENT_HEADERS = ("content-length", "content-type") # NB: don't use dict comprehension for python 2.6 compatibility - newheaders = dict((k, v) for k, v in req.headers.items() - if k.lower() not in CONTENT_HEADERS) + newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS} + + # A 303 must either use GET or HEAD for subsequent request + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 + if code == 303 and m != 'HEAD': + m = 'GET' + # 301 and 302 redirects are commonly turned into a GET from a POST + # for subsequent requests by browsers, so we'll do the same. + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 + # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 + if code in (301, 302) and m == 'POST': + m = 'GET' + return compat_urllib_request.Request( newurl, headers=newheaders, origin_req_host=req.origin_req_host, - unverifiable=True) + unverifiable=True, method=m) def extract_timezone(date_str): @@ -1781,12 +1667,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): if timezone is None: timezone, date_str = extract_timezone(date_str) - try: - date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) + with contextlib.suppress(ValueError): + date_format = f'%Y-%m-%d{delimiter}%H:%M:%S' dt = datetime.datetime.strptime(date_str, date_format) - timezone return calendar.timegm(dt.timetuple()) - except ValueError: - pass def date_formats(day_first=True): @@ -1806,17 +1690,13 @@ def unified_strdate(date_str, day_first=True): _, date_str = extract_timezone(date_str) for expression in date_formats(day_first): - try: + with contextlib.suppress(ValueError): upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') - except ValueError: - pass if upload_date is None: timetuple = email.utils.parsedate_tz(date_str) if timetuple: - try: + with contextlib.suppress(ValueError): upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d') - except ValueError: - pass if upload_date is not None: return compat_str(upload_date) @@ -1844,11 +1724,9 @@ def unified_timestamp(date_str, day_first=True): date_str = m.group(1) for expression in date_formats(day_first): - try: + with contextlib.suppress(ValueError): dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta) return calendar.timegm(dt.timetuple()) - except ValueError: - pass timetuple = email.utils.parsedate_tz(date_str) if timetuple: return calendar.timegm(timetuple) + pm_delta * 3600 @@ -1872,14 +1750,14 @@ def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None): def datetime_from_str(date_str, precision='auto', format='%Y%m%d'): - """ - Return a datetime object from a string in the format YYYYMMDD or - (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)? - - format: string date format used to return datetime object from - precision: round the time portion of a datetime object. - auto|microsecond|second|minute|hour|day. - auto: round to the unit provided in date_str (if applicable). + R""" + Return a datetime object from a string. + Supported format: + (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)? + + @param format strftime format of DATE + @param precision Round the datetime object: auto|microsecond|second|minute|hour|day + auto: round to the unit provided in date_str (if applicable). """ auto_precision = False if precision == 'auto': @@ -1891,7 +1769,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'): if date_str == 'yesterday': return today - datetime.timedelta(days=1) match = re.match( - r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?', + r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?', date_str) if match is not None: start_time = datetime_from_str(match.group('start'), precision, format) @@ -1914,16 +1792,14 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'): def date_from_str(date_str, format='%Y%m%d', strict=False): - """ - Return a datetime object from a string in the format YYYYMMDD or - (now|today|yesterday|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)? - - If "strict", only (now|today)[+-][0-9](day|week|month|year)(s)? is allowed + R""" + Return a date object from a string using datetime_from_str - format: string date format used to return datetime object from + @param strict Restrict allowed patterns to "YYYYMMDD" and + (now|today|yesterday)(-\d+(day|week|month|year)s?)? """ - if strict and not re.fullmatch(r'\d{8}|(now|today)[+-]\d+(day|week|month|year)(s)?', date_str): - raise ValueError(f'Invalid date format {date_str}') + if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str): + raise ValueError(f'Invalid date format "{date_str}"') return datetime_from_str(date_str, precision='microsecond', format=format).date() @@ -1964,7 +1840,7 @@ def hyphenate_date(date_str): return date_str -class DateRange(object): +class DateRange: """Represents a time interval between two dates""" def __init__(self, start=None, end=None): @@ -1992,7 +1868,7 @@ class DateRange(object): return self.start <= date <= self.end def __str__(self): - return '%s - %s' % (self.start.isoformat(), self.end.isoformat()) + return f'{self.start.isoformat()} - {self.end.isoformat()}' def platform_name(): @@ -2013,91 +1889,14 @@ def get_windows_version(): return None -def _windows_write_string(s, out): - """ Returns True if the string was written using special methods, - False if it has yet to be written out.""" - # Adapted from http://stackoverflow.com/a/3259271/35070 - - import ctypes.wintypes - - WIN_OUTPUT_IDS = { - 1: -11, - 2: -12, - } - - try: - fileno = out.fileno() - except AttributeError: - # If the output stream doesn't have a fileno, it's virtual - return False - except io.UnsupportedOperation: - # Some strange Windows pseudo files? - return False - if fileno not in WIN_OUTPUT_IDS: - return False - - GetStdHandle = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)( - ('GetStdHandle', ctypes.windll.kernel32)) - h = GetStdHandle(WIN_OUTPUT_IDS[fileno]) - - WriteConsoleW = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR, - ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD), - ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32)) - written = ctypes.wintypes.DWORD(0) - - GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32)) - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 - GetConsoleMode = compat_ctypes_WINFUNCTYPE( - ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, - ctypes.POINTER(ctypes.wintypes.DWORD))( - ('GetConsoleMode', ctypes.windll.kernel32)) - INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value - - def not_a_console(handle): - if handle == INVALID_HANDLE_VALUE or handle is None: - return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) - - if not_a_console(h): - return False - - def next_nonbmp_pos(s): - try: - return next(i for i, c in enumerate(s) if ord(c) > 0xffff) - except StopIteration: - return len(s) - - while s: - count = min(next_nonbmp_pos(s), 1024) - - ret = WriteConsoleW( - h, s, count if count else 2, ctypes.byref(written), None) - if ret == 0: - raise OSError('Failed to write string') - if not count: # We just wrote a non-BMP character - assert written.value == 2 - s = s[1:] - else: - assert written.value > 0 - s = s[written.value:] - return True - - def write_string(s, out=None, encoding=None): - if out is None: - out = sys.stderr - assert type(s) == compat_str + assert isinstance(s, str) + out = out or sys.stderr - if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'): - if _windows_write_string(s, out): - return + if compat_os_name == 'nt' and supports_terminal_sequences(out): + s = re.sub(r'([\r\n]+)', r' \1', s) - if ('b' in getattr(out, 'mode', '') - or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr + if 'b' in getattr(out, 'mode', ''): byt = s.encode(encoding or preferredencoding(), 'ignore') out.write(byt) elif hasattr(out, 'buffer'): @@ -2216,7 +2015,7 @@ else: raise LockingUnsupportedError() -class locked_file(object): +class locked_file: locked = False def __init__(self, filename, mode, block=True, encoding=None): @@ -2243,11 +2042,15 @@ class locked_file(object): try: _lock_file(self.f, exclusive, self.block) self.locked = True - except IOError: + except OSError: self.f.close() raise if 'w' in self.mode: - self.f.truncate() + try: + self.f.truncate() + except OSError as e: + if e.errno != 29: # Illegal seek, expected when self.f is a FIFO + raise e return self def unlock(self): @@ -2517,7 +2320,7 @@ def setproctitle(title): # a bytestring, but since unicode_literals turns # every string into a unicode string, it fails. return - title_bytes = title.encode('utf-8') + title_bytes = title.encode() buf = ctypes.create_string_buffer(len(title_bytes)) buf.value = title_bytes try: @@ -2559,13 +2362,13 @@ def base_url(url): def urljoin(base, path): if isinstance(path, bytes): - path = path.decode('utf-8') + path = path.decode() if not isinstance(path, compat_str) or not path: return None if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path): return path if isinstance(base, bytes): - base = base.decode('utf-8') + base = base.decode() if not isinstance(base, compat_str) or not re.match( r'^(?:https?:)?//', base): return None @@ -2597,7 +2400,7 @@ def str_or_none(v, default=None): def str_to_int(int_str): """ A more relaxed version of int_or_none """ - if isinstance(int_str, compat_integer_types): + if isinstance(int_str, int): return int_str elif isinstance(int_str, compat_str): int_str = re.sub(r'[,\.\+]', '', int_str) @@ -2638,7 +2441,7 @@ def request_to_url(req): def strftime_or_none(timestamp, date_format, default=None): datetime_object = None try: - if isinstance(timestamp, compat_numeric_types): # unix timestamp + if isinstance(timestamp, (int, float)): # unix timestamp datetime_object = datetime.datetime.utcfromtimestamp(timestamp) elif isinstance(timestamp, compat_str): # assume YYYYMMDD datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d') @@ -2648,7 +2451,7 @@ def strftime_or_none(timestamp, date_format, default=None): def parse_duration(s): - if not isinstance(s, compat_basestring): + if not isinstance(s, str): return None s = s.strip() if not s: @@ -2697,31 +2500,23 @@ def parse_duration(s): else: return None - duration = 0 - if secs: - duration += float(secs) - if mins: - duration += float(mins) * 60 - if hours: - duration += float(hours) * 60 * 60 - if days: - duration += float(days) * 24 * 60 * 60 if ms: - duration += float(ms.replace(':', '.')) - return duration + ms = ms.replace(':', '.') + return sum(float(part or 0) * mult for part, mult in ( + (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1))) def prepend_extension(filename, ext, expected_real_ext=None): name, real_ext = os.path.splitext(filename) return ( - '{0}.{1}{2}'.format(name, ext, real_ext) + f'{name}.{ext}{real_ext}' if not expected_real_ext or real_ext[1:] == expected_real_ext - else '{0}.{1}'.format(filename, ext)) + else f'{filename}.{ext}') def replace_extension(filename, ext, expected_real_ext=None): name, real_ext = os.path.splitext(filename) - return '{0}.{1}'.format( + return '{}.{}'.format( name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename, ext) @@ -2773,49 +2568,48 @@ def get_exe_version(exe, args=['--version'], class LazyList(collections.abc.Sequence): - ''' Lazy immutable list from an iterable - Note that slices of a LazyList are lists and not LazyList''' + """Lazy immutable list from an iterable + Note that slices of a LazyList are lists and not LazyList""" class IndexError(IndexError): pass def __init__(self, iterable, *, reverse=False, _cache=None): - self.__iterable = iter(iterable) - self.__cache = [] if _cache is None else _cache - self.__reversed = reverse + self._iterable = iter(iterable) + self._cache = [] if _cache is None else _cache + self._reversed = reverse def __iter__(self): - if self.__reversed: + if self._reversed: # We need to consume the entire iterable to iterate in reverse yield from self.exhaust() return - yield from self.__cache - for item in self.__iterable: - self.__cache.append(item) + yield from self._cache + for item in self._iterable: + self._cache.append(item) yield item - def __exhaust(self): - self.__cache.extend(self.__iterable) - # Discard the emptied iterable to make it pickle-able - self.__iterable = [] - return self.__cache + def _exhaust(self): + self._cache.extend(self._iterable) + self._iterable = [] # Discard the emptied iterable to make it pickle-able + return self._cache def exhaust(self): - ''' Evaluate the entire iterable ''' - return self.__exhaust()[::-1 if self.__reversed else 1] + """Evaluate the entire iterable""" + return self._exhaust()[::-1 if self._reversed else 1] @staticmethod - def __reverse_index(x): + def _reverse_index(x): return None if x is None else -(x + 1) def __getitem__(self, idx): if isinstance(idx, slice): - if self.__reversed: - idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1)) + if self._reversed: + idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1)) start, stop, step = idx.start, idx.stop, idx.step or 1 elif isinstance(idx, int): - if self.__reversed: - idx = self.__reverse_index(idx) + if self._reversed: + idx = self._reverse_index(idx) start, stop, step = idx, idx, 0 else: raise TypeError('indices must be integers or slices') @@ -2824,35 +2618,35 @@ class LazyList(collections.abc.Sequence): or (stop is None and step > 0)): # We need to consume the entire iterable to be able to slice from the end # Obviously, never use this with infinite iterables - self.__exhaust() + self._exhaust() try: - return self.__cache[idx] + return self._cache[idx] except IndexError as e: raise self.IndexError(e) from e - n = max(start or 0, stop or 0) - len(self.__cache) + 1 + n = max(start or 0, stop or 0) - len(self._cache) + 1 if n > 0: - self.__cache.extend(itertools.islice(self.__iterable, n)) + self._cache.extend(itertools.islice(self._iterable, n)) try: - return self.__cache[idx] + return self._cache[idx] except IndexError as e: raise self.IndexError(e) from e def __bool__(self): try: - self[-1] if self.__reversed else self[0] + self[-1] if self._reversed else self[0] except self.IndexError: return False return True def __len__(self): - self.__exhaust() - return len(self.__cache) + self._exhaust() + return len(self._cache) def __reversed__(self): - return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache) + return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache) def __copy__(self): - return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) + return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache) def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable @@ -2904,6 +2698,7 @@ class PagedList: class OnDemandPagedList(PagedList): """Download pages until a page with less than maximum results""" + def _getslice(self, start, end): for pagenum in itertools.count(start // self._pagesize): firstid = pagenum * self._pagesize @@ -2944,6 +2739,7 @@ class OnDemandPagedList(PagedList): class InAdvancePagedList(PagedList): """PagedList with total number of pages known in advance""" + def __init__(self, pagefunc, pagecount, pagesize): PagedList.__init__(self, pagefunc, pagesize, True) self._pagecount = pagecount @@ -2985,9 +2781,7 @@ def lowercase_escape(s): def escape_rfc3986(s): """Escape non-ASCII characters as suggested by RFC 3986""" - if sys.version_info < (3, 0) and isinstance(s, compat_str): - s = s.encode('utf-8') - return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") + return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") def escape_url(url): @@ -3066,9 +2860,9 @@ def _multipart_encode_impl(data, boundary): for k, v in data.items(): out += b'--' + boundary.encode('ascii') + b'\r\n' if isinstance(k, compat_str): - k = k.encode('utf-8') + k = k.encode() if isinstance(v, compat_str): - v = v.encode('utf-8') + v = v.encode() # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578 # suggests sending UTF-8 directly. Firefox sends UTF-8, too content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n' @@ -3171,9 +2965,10 @@ TV_PARENTAL_GUIDELINES = { def parse_age_limit(s): - if type(s) == int: + # isinstance(False, int) is True. So type() must be used instead + if type(s) is int: return s if 0 <= s <= 21 else None - if not isinstance(s, compat_basestring): + elif not isinstance(s, str): return None m = re.match(r'^(?P<age>\d{1,2})\+?$', s) if m: @@ -3200,10 +2995,10 @@ def strip_jsonp(code): def js_to_json(code, vars={}): # vars is a dict of var, val pairs to substitute COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n' - SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) + SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*' INTEGER_TABLE = ( - (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16), - (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8), + (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16), + (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8), ) def fix_kv(m): @@ -3257,7 +3052,7 @@ def qualities(quality_ids): return q -POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} +POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist') DEFAULT_OUTTMPL = { @@ -3335,12 +3130,7 @@ def args_to_str(args): def error_to_compat_str(err): - err_str = str(err) - # On python 2 error byte string must be decoded with proper - # encoding rather than ascii - if sys.version_info[0] < 3: - err_str = err_str.decode(preferredencoding()) - return err_str + return str(err) def error_to_str(err): @@ -3425,7 +3215,7 @@ def parse_codecs(codecs_str): return {} split_codecs = list(filter(None, map( str.strip, codecs_str.strip().strip(',').split(',')))) - vcodec, acodec, tcodec, hdr = None, None, None, None + vcodec, acodec, scodec, hdr = None, None, None, None for full_codec in split_codecs: parts = full_codec.split('.') codec = parts[0].replace('0', '') @@ -3443,16 +3233,16 @@ def parse_codecs(codecs_str): if not acodec: acodec = full_codec elif codec in ('stpp', 'wvtt',): - if not tcodec: - tcodec = full_codec + if not scodec: + scodec = full_codec else: - write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) - if vcodec or acodec or tcodec: + write_string(f'WARNING: Unknown codec {full_codec}\n') + if vcodec or acodec or scodec: return { 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', 'dynamic_range': hdr, - **({'tcodec': tcodec} if tcodec is not None else {}), + **({'scodec': scodec} if scodec is not None else {}), } elif len(split_codecs) == 2: return { @@ -3608,7 +3398,7 @@ def _match_one(filter_part, dct, incomplete): comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote']) actual_value = dct.get(m['key']) numeric_comparison = None - if isinstance(actual_value, compat_numeric_types): + if isinstance(actual_value, (int, float)): # If the original field is a string and matching comparisonvalue is # a number we should respect the origin of the original field # and process comparison value as a string (see @@ -3661,11 +3451,15 @@ def match_str(filter_str, dct, incomplete=False): def match_filter_func(filters): if not filters: return None - filters = variadic(filters) + filters = set(variadic(filters)) - def _match_func(info_dict, *args, **kwargs): - if any(match_str(f, info_dict, *args, **kwargs) for f in filters): - return None + interactive = '-' in filters + if interactive: + filters.remove('-') + + def _match_func(info_dict, incomplete=False): + if not filters or any(match_str(f, info_dict, incomplete) for f in filters): + return NO_DEFAULT if interactive and not incomplete else None else: video_title = info_dict.get('title') or info_dict.get('id') or 'video' filter_str = ') | ('.join(map(str.strip, filters)) @@ -3677,7 +3471,7 @@ def parse_dfxp_time_expr(time_expr): if not time_expr: return - mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr) + mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr) if mobj: return float(mobj.group('time_offset')) @@ -3729,7 +3523,7 @@ def dfxp2srt(dfxp_data): styles = {} default_style = {} - class TTMLPElementParser(object): + class TTMLPElementParser: _out = '' _unclosed_elements = [] _applied_styles = [] @@ -3914,7 +3708,7 @@ def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compa return cli_configuration_args(argdict, keys, default, use_compat) -class ISO639Utils(object): +class ISO639Utils: # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt _lang_map = { 'aa': 'aar', @@ -4119,7 +3913,7 @@ class ISO639Utils(object): return short_name -class ISO3166Utils(object): +class ISO3166Utils: # From http://data.okfn.org/data/core/country-list _country_map = { 'AF': 'Afghanistan', @@ -4379,7 +4173,7 @@ class ISO3166Utils(object): return cls._country_map.get(code.upper()) -class GeoUtils(object): +class GeoUtils: # Major IPv4 address blocks per country _country_ip_map = { 'AD': '46.172.224.0/19', @@ -4816,7 +4610,7 @@ def decode_png(png_data): header = png_data[8:] if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR': - raise IOError('Not a valid PNG file.') + raise OSError('Not a valid PNG file.') int_map = {1: '>B', 2: '>H', 4: '>I'} unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0] @@ -4853,7 +4647,7 @@ def decode_png(png_data): idat += chunk['data'] if not idat: - raise IOError('Unable to read PNG data.') + raise OSError('Unable to read PNG data.') decompressed_data = bytearray(zlib.decompress(idat)) @@ -4917,87 +4711,56 @@ def decode_png(png_data): def write_xattr(path, key, value): - # This mess below finds the best xattr tool for the job - try: - # try the pyxattr module... - import xattr - - if hasattr(xattr, 'set'): # pyxattr - # Unicode arguments are not supported in python-pyxattr until - # version 0.5.0 - # See https://github.com/ytdl-org/youtube-dl/issues/5498 - pyxattr_required_version = '0.5.0' - if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version): - # TODO: fallback to CLI tools - raise XAttrUnavailableError( - 'python-pyxattr is detected but is too old. ' - 'yt-dlp requires %s or above while your version is %s. ' - 'Falling back to other xattr implementations' % ( - pyxattr_required_version, xattr.__version__)) - - setxattr = xattr.set - else: # xattr - setxattr = xattr.setxattr + # Windows: Write xattrs to NTFS Alternate Data Streams: + # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 + if compat_os_name == 'nt': + assert ':' not in key + assert os.path.exists(path) try: - setxattr(path, key, value) - except EnvironmentError as e: + with open(f'{path}:{key}', 'wb') as f: + f.write(value) + except OSError as e: raise XAttrMetadataError(e.errno, e.strerror) + return - except ImportError: - if compat_os_name == 'nt': - # Write xattrs to NTFS Alternate Data Streams: - # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29 - assert ':' not in key - assert os.path.exists(path) - - ads_fn = path + ':' + key - try: - with open(ads_fn, 'wb') as f: - f.write(value) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - else: - user_has_setfattr = check_executable('setfattr', ['--version']) - user_has_xattr = check_executable('xattr', ['-h']) - - if user_has_setfattr or user_has_xattr: + # UNIX Method 1. Use xattrs/pyxattrs modules + from .dependencies import xattr - value = value.decode('utf-8') - if user_has_setfattr: - executable = 'setfattr' - opts = ['-n', key, '-v', value] - elif user_has_xattr: - executable = 'xattr' - opts = ['-w', key, value] + setxattr = None + if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr': + # Unicode arguments are not supported in pyxattr until version 0.5.0 + # See https://github.com/ytdl-org/youtube-dl/issues/5498 + if version_tuple(xattr.__version__) >= (0, 5, 0): + setxattr = xattr.set + elif xattr: + setxattr = xattr.setxattr - cmd = ([encodeFilename(executable, True)] - + [encodeArgument(o) for o in opts] - + [encodeFilename(path, True)]) + if setxattr: + try: + setxattr(path, key, value) + except OSError as e: + raise XAttrMetadataError(e.errno, e.strerror) + return - try: - p = Popen( - cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - except EnvironmentError as e: - raise XAttrMetadataError(e.errno, e.strerror) - stdout, stderr = p.communicate_or_kill() - stderr = stderr.decode('utf-8', 'replace') - if p.returncode != 0: - raise XAttrMetadataError(p.returncode, stderr) + # UNIX Method 2. Use setfattr/xattr executables + exe = ('setfattr' if check_executable('setfattr', ['--version']) + else 'xattr' if check_executable('xattr', ['-h']) else None) + if not exe: + raise XAttrUnavailableError( + 'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the ' + + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)')) - else: - # On Unix, and can't find pyxattr, setfattr, or xattr. - if sys.platform.startswith('linux'): - raise XAttrUnavailableError( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'pyxattr' or 'xattr' " - "modules, or the GNU 'attr' package " - "(which contains the 'setfattr' tool).") - else: - raise XAttrUnavailableError( - "Couldn't find a tool to set the xattrs. " - "Install either the python 'xattr' module, " - "or the 'xattr' binary.") + value = value.decode() + try: + p = Popen( + [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + except OSError as e: + raise XAttrMetadataError(e.errno, e.strerror) + stderr = p.communicate_or_kill()[1].decode('utf-8', 'replace') + if p.returncode: + raise XAttrMetadataError(p.returncode, stderr) def random_birthday(year_field, month_field, day_field): @@ -5013,12 +4776,12 @@ def random_birthday(year_field, month_field, day_field): # Templates for internet shortcut files, which are plain text files. -DOT_URL_LINK_TEMPLATE = ''' +DOT_URL_LINK_TEMPLATE = '''\ [InternetShortcut] URL=%(url)s -'''.lstrip() +''' -DOT_WEBLOC_LINK_TEMPLATE = ''' +DOT_WEBLOC_LINK_TEMPLATE = '''\ <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> <plist version="1.0"> @@ -5027,16 +4790,16 @@ DOT_WEBLOC_LINK_TEMPLATE = ''' \t<string>%(url)s</string> </dict> </plist> -'''.lstrip() +''' -DOT_DESKTOP_LINK_TEMPLATE = ''' +DOT_DESKTOP_LINK_TEMPLATE = '''\ [Desktop Entry] Encoding=UTF-8 Name=%(filename)s Type=Link URL=%(url)s Icon=text-html -'''.lstrip() +''' LINK_TEMPLATES = { 'url': DOT_URL_LINK_TEMPLATE, @@ -5062,29 +4825,29 @@ def iri_to_uri(iri): net_location = '' if iri_parts.username: - net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~") + net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~") if iri_parts.password is not None: - net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~") + net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~") net_location += '@' - net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames. + net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames. # The 'idna' encoding produces ASCII text. if iri_parts.port is not None and iri_parts.port != 80: net_location += ':' + str(iri_parts.port) - return compat_urllib_parse_urlunparse( + return urllib.parse.urlunparse( (iri_parts.scheme, net_location, - compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), + urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"), # Unsure about the `safe` argument, since this is a legacy way of handling parameters. - compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), + urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"), # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component. - compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), + urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"), - compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) + urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~"))) # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes. @@ -5092,7 +4855,7 @@ def iri_to_uri(iri): def to_high_limit_path(path): if sys.platform in ['win32', 'cygwin']: # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited. - return r'\\?\ '.rstrip() + os.path.abspath(path) + return '\\\\?\\' + os.path.abspath(path) return path @@ -5134,7 +4897,7 @@ def make_dir(path, to_screen=None): if dn and not os.path.exists(dn): os.makedirs(dn) return True - except (OSError, IOError) as err: + except OSError as err: if callable(to_screen) is not None: to_screen('unable to create directory ' + error_to_compat_str(err)) return False @@ -5144,7 +4907,7 @@ def get_executable_path(): from zipimport import zipimporter if hasattr(sys, 'frozen'): # Running from PyInstaller path = os.path.dirname(sys.executable) - elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP + elif isinstance(__loader__, zipimporter): # Running from ZIP path = os.path.join(os.path.dirname(__file__), '../..') else: path = os.path.join(os.path.dirname(__file__), '..') @@ -5153,7 +4916,7 @@ def get_executable_path(): def load_plugins(name, suffix, namespace): classes = {} - try: + with contextlib.suppress(FileNotFoundError): plugins_spec = importlib.util.spec_from_file_location( name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py')) plugins = importlib.util.module_from_spec(plugins_spec) @@ -5166,8 +4929,6 @@ def load_plugins(name, suffix, namespace): continue klass = getattr(plugins, name) classes[name] = namespace[name] = klass - except FileNotFoundError: - pass return classes @@ -5176,13 +4937,14 @@ def traverse_obj( casesense=True, is_user_input=False, traverse_string=False): ''' Traverse nested list/dict/tuple @param path_list A list of paths which are checked one by one. - Each path is a list of keys where each key is a string, - a function, a tuple of strings/None or "...". - When a fuction is given, it takes the key and value as arguments - and returns whether the key matches or not. When a tuple is given, - all the keys given in the tuple are traversed, and - "..." traverses all the keys in the object - "None" returns the object without traversal + Each path is a list of keys where each key is a: + - None: Do nothing + - string: A dictionary key + - int: An index into a list + - tuple: A list of keys all of which will be traversed + - Ellipsis: Fetch all values in the object + - Function: Takes the key and value as arguments + and returns whether the key matches or not @param default Default value to return @param expected_type Only accept final value of this type (Can also be any callable) @param get_all Return all the values obtained from a path or only the first one @@ -5311,9 +5073,9 @@ def jwt_encode_hs256(payload_data, key, headers={}): } if headers: header_data.update(headers) - header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8')) - payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8')) - h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256) + header_b64 = base64.b64encode(json.dumps(header_data).encode()) + payload_b64 = base64.b64encode(json.dumps(payload_data).encode()) + h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256) signature_b64 = base64.b64encode(h.digest()) token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64 return token @@ -5366,7 +5128,7 @@ def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re): """ _keys = ('width', 'height') max_dimensions = max( - [tuple(format.get(k) or 0 for k in _keys) for format in formats], + (tuple(format.get(k) or 0 for k in _keys) for format in formats), default=(0, 0)) if not max_dimensions[0]: return thumbnails @@ -5431,21 +5193,19 @@ class Config: def read_file(filename, default=[]): try: optionf = open(filename) - except IOError: + except OSError: return default # silently skip if file is not present try: # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 contents = optionf.read() - if sys.version_info < (3,): - contents = contents.decode(preferredencoding()) - res = compat_shlex_split(contents, comments=True) + res = shlex.split(contents, comments=True) finally: optionf.close() return res @staticmethod def hide_login_info(opts): - PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']) + PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'} eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') def _scrub_eq(o): @@ -5474,15 +5234,17 @@ class Config: yield from self.own_args or [] def parse_args(self): - return self._parser.parse_args(list(self.all_args)) + return self._parser.parse_args(self.all_args) class WebSocketsWrapper(): """Wraps websockets module to use in non-async scopes""" + pool = None def __init__(self, url, headers=None, connect=True): - self.loop = asyncio.events.new_event_loop() - self.conn = compat_websockets.connect( + self.loop = asyncio.new_event_loop() + # XXX: "loop" is deprecated + self.conn = websockets.connect( url, extra_headers=headers, ping_interval=None, close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf')) if connect: @@ -5511,7 +5273,7 @@ class WebSocketsWrapper(): # for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class @staticmethod def run_with_loop(main, loop): - if not asyncio.coroutines.iscoroutine(main): + if not asyncio.iscoroutine(main): raise ValueError(f'a coroutine was expected, got {main!r}') try: @@ -5523,7 +5285,7 @@ class WebSocketsWrapper(): @staticmethod def _cancel_all_tasks(loop): - to_cancel = asyncio.tasks.all_tasks(loop) + to_cancel = asyncio.all_tasks(loop) if not to_cancel: return @@ -5531,8 +5293,9 @@ class WebSocketsWrapper(): for task in to_cancel: task.cancel() + # XXX: "loop" is removed in python 3.10+ loop.run_until_complete( - asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True)) + asyncio.gather(*to_cancel, loop=loop, return_exceptions=True)) for task in to_cancel: if task.cancelled(): @@ -5545,9 +5308,6 @@ class WebSocketsWrapper(): }) -has_websockets = bool(compat_websockets) - - def merge_headers(*dicts): """Merge dicts of http headers case insensitively, prioritizing the latter ones""" return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))} @@ -5555,7 +5315,17 @@ def merge_headers(*dicts): class classproperty: def __init__(self, f): + functools.update_wrapper(self, f) self.f = f def __get__(self, _, cls): return self.f(cls) + + +def Namespace(**kwargs): + return collections.namedtuple('Namespace', kwargs)(**kwargs) + + +# Deprecated +has_certifi = bool(certifi) +has_websockets = bool(websockets) diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 962aa57ad..b8974f883 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals, print_function, division - """ A partial parser for WebVTT segments. Interprets enough of the WebVTT stream to be able to assemble a single stand-alone subtitle file, suitably adjusting @@ -11,17 +8,13 @@ Regular expressions based on the W3C WebVTT specification in RFC 8216 ยง3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>. """ -import re import io + +from .compat import re from .utils import int_or_none, timetuple_from_msec -from .compat import ( - compat_str as str, - compat_Pattern, - compat_Match, -) -class _MatchParser(object): +class _MatchParser: """ An object that maintains the current parsing position and allows conveniently advancing it as syntax elements are successfully parsed. @@ -32,7 +25,7 @@ class _MatchParser(object): self._pos = 0 def match(self, r): - if isinstance(r, compat_Pattern): + if isinstance(r, re.Pattern): return r.match(self._data, self._pos) if isinstance(r, str): if self._data.startswith(r, self._pos): @@ -43,7 +36,7 @@ class _MatchParser(object): def advance(self, by): if by is None: amt = 0 - elif isinstance(by, compat_Match): + elif isinstance(by, re.Match): amt = len(by.group(0)) elif isinstance(by, str): amt = len(by) @@ -70,7 +63,7 @@ class _MatchChildParser(_MatchParser): """ def __init__(self, parent): - super(_MatchChildParser, self).__init__(parent._data) + super().__init__(parent._data) self.__parent = parent self._pos = parent._pos @@ -84,7 +77,7 @@ class _MatchChildParser(_MatchParser): class ParseError(Exception): def __init__(self, parser): - super(ParseError, self).__init__("Parse error at position %u (near %r)" % ( + super().__init__("Parse error at position %u (near %r)" % ( parser._pos, parser._data[parser._pos:parser._pos + 20] )) @@ -109,14 +102,8 @@ def _parse_ts(ts): Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS) into an MPEG PES timestamp: a tick counter at 90 kHz resolution. """ - - h, min, s, ms = ts.groups() - return 90 * ( - int(h or 0) * 3600000 + # noqa: W504,E221,E222 - int(min) * 60000 + # noqa: W504,E221,E222 - int(s) * 1000 + # noqa: W504,E221,E222 - int(ms) # noqa: W504,E221,E222 - ) + return 90 * sum( + int(part or 0) * mult for part, mult in zip(ts.groups(), (3600_000, 60_000, 1000, 1))) def _format_ts(ts): @@ -127,7 +114,7 @@ def _format_ts(ts): return '%02u:%02u:%02u.%03u' % timetuple_from_msec(int((ts + 45) // 90)) -class Block(object): +class Block: """ An abstract WebVTT block. """ @@ -359,7 +346,7 @@ def parse_fragment(frag_content): a bytes object containing the raw contents of a WebVTT file. """ - parser = _MatchParser(frag_content.decode('utf-8')) + parser = _MatchParser(frag_content.decode()) yield Magic.parse(parser) |