aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2022-03-30 01:24:15 +0800
committerJesús <heckyel@hyperbola.info>2022-03-30 01:24:15 +0800
commit950cc067b8c41ac246deb4725177a372c95d8341 (patch)
tree66d5284ff86faec8c3000be1e7d0bd856b4c4dbe
parent7a74bc5d1e54299e51b73492e09c70da994f4b35 (diff)
parente7870111e83033e0ac728d5a2d565d1eb146c335 (diff)
downloadhypervideo-pre-950cc067b8c41ac246deb4725177a372c95d8341.tar.lz
hypervideo-pre-950cc067b8c41ac246deb4725177a372c95d8341.tar.xz
hypervideo-pre-950cc067b8c41ac246deb4725177a372c95d8341.zip
updated from upstream | 30/03/2022 at 01:24
-rw-r--r--devscripts/make_supportedsites.py5
-rw-r--r--pyinst.py2
-rw-r--r--requirements.txt3
-rw-r--r--test/helper.py10
-rw-r--r--test/test_YoutubeDL.py4
-rw-r--r--test/test_utils.py8
-rw-r--r--yt_dlp/YoutubeDL.py81
-rw-r--r--yt_dlp/__init__.py4
-rw-r--r--yt_dlp/downloader/external.py45
-rw-r--r--yt_dlp/extractor/adobepass.py26
-rw-r--r--yt_dlp/extractor/banbye.py153
-rw-r--r--yt_dlp/extractor/bilibili.py67
-rw-r--r--yt_dlp/extractor/common.py12
-rw-r--r--yt_dlp/extractor/ellentube.py3
-rw-r--r--yt_dlp/extractor/extractors.py20
-rw-r--r--yt_dlp/extractor/fc2.py1
-rw-r--r--yt_dlp/extractor/generic.py22
-rw-r--r--yt_dlp/extractor/go.py8
-rw-r--r--yt_dlp/extractor/itprotv.py141
-rw-r--r--yt_dlp/extractor/lastfm.py129
-rw-r--r--yt_dlp/extractor/rai.py5
-rw-r--r--yt_dlp/extractor/tver.py19
-rw-r--r--yt_dlp/extractor/viki.py2
-rw-r--r--yt_dlp/extractor/vimeo.py100
-rw-r--r--yt_dlp/extractor/viu.py219
-rw-r--r--yt_dlp/extractor/wasdtv.py161
-rw-r--r--yt_dlp/extractor/youtube.py89
-rw-r--r--yt_dlp/options.py25
-rw-r--r--yt_dlp/postprocessor/common.py31
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py46
-rw-r--r--yt_dlp/postprocessor/sponsorblock.py28
-rw-r--r--yt_dlp/utils.py146
32 files changed, 1229 insertions, 386 deletions
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 4c11e25f2..729f60a0e 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -24,10 +24,9 @@ def main():
def gen_ies_md(ies):
for ie in ies:
ie_md = '**{0}**'.format(ie.IE_NAME)
- ie_desc = getattr(ie, 'IE_DESC', None)
- if ie_desc is False:
+ if ie.IE_DESC is False:
continue
- if ie_desc is not None:
+ if ie.IE_DESC is not None:
ie_md += ': {0}'.format(ie.IE_DESC)
search_key = getattr(ie, 'SEARCH_KEY', None)
if search_key is not None:
diff --git a/pyinst.py b/pyinst.py
index 7b336aa9e..1033cda8f 100644
--- a/pyinst.py
+++ b/pyinst.py
@@ -74,7 +74,7 @@ def version_to_list(version):
def dependency_options():
- dependencies = [pycryptodome_module(), 'mutagen', 'brotli'] + collect_submodules('websockets')
+ dependencies = [pycryptodome_module(), 'mutagen', 'brotli', 'certifi'] + collect_submodules('websockets')
excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc']
yield from (f'--hidden-import={module}' for module in dependencies)
diff --git a/requirements.txt b/requirements.txt
index 7818aca78..b65d25456 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ mutagen
pycryptodome
websockets
brotli; platform_python_implementation=='CPython'
-brotlicffi; platform_python_implementation!='CPython' \ No newline at end of file
+brotlicffi; platform_python_implementation!='CPython'
+certifi \ No newline at end of file
diff --git a/test/helper.py b/test/helper.py
index 1070e0668..28c21b2eb 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -196,15 +196,7 @@ def expect_dict(self, got_dict, expected_dict):
def sanitize_got_info_dict(got_dict):
IGNORED_FIELDS = (
- # Format keys
- 'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution',
- 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize',
- 'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference',
- 'language', 'language_preference', 'quality', 'source_preference', 'http_headers',
- 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
-
- # RTMP formats
- 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time',
+ *YoutubeDL._format_fields,
# Lists
'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 7637297be..c9108c5b6 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -818,6 +818,8 @@ class TestYoutubeDL(unittest.TestCase):
test('%(id&foo)s.bar', 'foo.bar')
test('%(title&foo)s.bar', 'NA.bar')
test('%(title&foo|baz)s.bar', 'baz.bar')
+ test('%(x,id&foo|baz)s.bar', 'foo.bar')
+ test('%(x,title&foo|baz)s.bar', 'baz.bar')
# Laziness
def gen():
@@ -931,7 +933,7 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos()
self.assertEqual(res, ['1', '2'])
- def f(v):
+ def f(v, incomplete):
if v['id'] == '1':
return None
else:
diff --git a/test/test_utils.py b/test/test_utils.py
index a7f1b0e94..31f168998 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -160,10 +160,12 @@ class TestUtil(unittest.TestCase):
sanitize_filename('New World record at 0:12:34'),
'New World record at 0_12_34')
- self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf')
self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf')
- self.assertEqual(sanitize_filename('.gasdgf'), 'gasdgf')
+ self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf')
self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf')
+ self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf')
forbidden = '"\0\\/'
for fc in forbidden:
@@ -625,6 +627,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(parse_duration('3h 11m 53s'), 11513)
self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513)
self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513)
+ self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513)
+ self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513)
self.assertEqual(parse_duration('62m45s'), 3765)
self.assertEqual(parse_duration('6m59s'), 419)
self.assertEqual(parse_duration('49s'), 49)
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index a5c7348b2..e57716e00 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -65,6 +65,7 @@ from .utils import (
ExistingVideoReached,
expand_path,
ExtractorError,
+ filter_dict,
float_or_none,
format_bytes,
format_field,
@@ -72,6 +73,7 @@ from .utils import (
formatSeconds,
GeoRestrictedError,
get_domain,
+ has_certifi,
HEADRequest,
InAdvancePagedList,
int_or_none,
@@ -86,6 +88,7 @@ from .utils import (
MaxDownloadsReached,
merge_headers,
network_exceptions,
+ NO_DEFAULT,
number_of_digits,
orderedSet,
OUTTMPL_TYPES,
@@ -511,6 +514,16 @@ class YoutubeDL(object):
'track_number', 'disc_number', 'release_year',
))
+ _format_fields = {
+ # NB: Keep in sync with the docstring of extractor/common.py
+ 'url', 'manifest_url', 'ext', 'format', 'format_id', 'format_note',
+ 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr',
+ 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx',
+ 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start',
+ 'preference', 'language', 'language_preference', 'quality', 'source_preference',
+ 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options',
+ 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time'
+ }
_format_selection_exts = {
'audio': {'m4a', 'mp3', 'ogg', 'aac'},
'video': {'mp4', 'flv', 'webm', '3gp'},
@@ -786,7 +799,7 @@ class YoutubeDL(object):
def to_stdout(self, message, skip_eol=False, quiet=None):
"""Print message to stdout"""
if quiet is not None:
- self.deprecation_warning('"ydl.to_stdout" no longer accepts the argument quiet. Use "ydl.to_screen" instead')
+ self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. Use "YoutubeDL.to_screen" instead')
self._write_string(
'%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
self._out_files['print'])
@@ -1087,10 +1100,11 @@ class YoutubeDL(object):
(?P<fields>{field})
(?P<maths>(?:{math_op}{math_field})*)
(?:>(?P<strf_format>.+?))?
- (?P<alternate>(?<!\\),[^|&)]+)?
- (?:&(?P<replacement>.*?))?
- (?:\|(?P<default>.*?))?
- $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+ (?P<remaining>
+ (?P<alternate>(?<!\\),[^|&)]+)?
+ (?:&(?P<replacement>.*?))?
+ (?:\|(?P<default>.*?))?
+ )$'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
def _traverse_infodict(k):
k = k.split('.')
@@ -1137,8 +1151,10 @@ class YoutubeDL(object):
na = self.params.get('outtmpl_na_placeholder', 'NA')
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
- return sanitize_filename(str(value), restricted=restricted,
- is_id=re.search(r'(^|[_.])id(\.|$)', key))
+ return sanitize_filename(str(value), restricted=restricted, is_id=(
+ bool(re.search(r'(^|[_.])id(\.|$)', key))
+ if 'filename-sanitization' in self.params.get('compat_opts', [])
+ else NO_DEFAULT))
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
sanitize = bool(sanitize)
@@ -1161,7 +1177,7 @@ class YoutubeDL(object):
value = get_value(mobj)
replacement = mobj['replacement']
if value is None and mobj['alternate']:
- mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:])
+ mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:])
else:
break
@@ -1558,13 +1574,9 @@ class YoutubeDL(object):
if not info:
return info
- force_properties = dict(
- (k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
- if f in force_properties:
- del force_properties[f]
new_result = info.copy()
- new_result.update(force_properties)
+ new_result.update(filter_dict(ie_result, lambda k, v: (
+ v is not None and k not in {'_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'})))
# Extracted info may not be a video result (i.e.
# info.get('_type', 'video') != video) but rather an url or
@@ -1802,7 +1814,7 @@ class YoutubeDL(object):
ie_result['entries'] = playlist_results
# Write the updated info to json
- if _infojson_written and self._write_info_json(
+ if _infojson_written is True and self._write_info_json(
'updated playlist', ie_result,
self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None:
return
@@ -2443,6 +2455,11 @@ class YoutubeDL(object):
info_dict['__has_drm'] = any(f.get('has_drm') for f in formats)
if not self.params.get('allow_unplayable_formats'):
formats = [f for f in formats if not f.get('has_drm')]
+ if info_dict['__has_drm'] and all(
+ f.get('acodec') == f.get('vcodec') == 'none' for f in formats):
+ self.report_warning(
+ 'This video is DRM protected and only images are available for download. '
+ 'Use --list-formats to see them')
get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start'))
if not get_from_start:
@@ -2539,7 +2556,7 @@ class YoutubeDL(object):
info_dict, _ = self.pre_process(info_dict)
- if self._match_entry(info_dict) is not None:
+ if self._match_entry(info_dict, incomplete=self._format_fields) is not None:
return info_dict
self.post_extract(info_dict)
@@ -2615,8 +2632,9 @@ class YoutubeDL(object):
if not formats_to_download:
if not self.params.get('ignore_no_formats_error'):
- raise ExtractorError('Requested format is not available', expected=True,
- video_id=info_dict['id'], ie=info_dict['extractor'])
+ raise ExtractorError(
+ 'Requested format is not available. Use --list-formats for a list of available formats',
+ expected=True, video_id=info_dict['id'], ie=info_dict['extractor'])
self.report_warning('Requested format is not available')
# Process what we can, even without any available formats.
formats_to_download = [{}]
@@ -3675,6 +3693,7 @@ class YoutubeDL(object):
lib_str = join_nonempty(
compat_brotli and compat_brotli.__name__,
+ has_certifi and 'certifi',
compat_pycrypto_AES and compat_pycrypto_AES.__name__.split('.')[0],
SECRETSTORAGE_AVAILABLE and 'secretstorage',
has_mutagen and 'mutagen',
@@ -3766,7 +3785,7 @@ class YoutubeDL(object):
return encoding
def _write_info_json(self, label, ie_result, infofn, overwrite=None):
- ''' Write infojson and returns True = written, False = skip, None = error '''
+ ''' Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error '''
if overwrite is None:
overwrite = self.params.get('overwrites', True)
if not self.params.get('writeinfojson'):
@@ -3778,14 +3797,15 @@ class YoutubeDL(object):
return None
elif not overwrite and os.path.exists(infofn):
self.to_screen(f'[info] {label.title()} metadata is already present')
- else:
- self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
- try:
- write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
- except (OSError, IOError):
- self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
- return None
- return True
+ return 'exists'
+
+ self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}')
+ try:
+ write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+ return True
+ except (OSError, IOError):
+ self.report_error(f'Cannot write {label} metadata to JSON file {infofn}')
+ return None
def _write_description(self, label, ie_result, descfn):
''' Write description and returns True = written, False = skip, None = error '''
@@ -3856,9 +3876,12 @@ class YoutubeDL(object):
sub_info['filepath'] = sub_filename
ret.append((sub_filename, sub_filename_final))
except (DownloadError, ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
+ msg = f'Unable to download video subtitles for {sub_lang!r}: {err}'
if self.params.get('ignoreerrors') is not True: # False or 'only_download'
- raise DownloadError(f'Unable to download video subtitles for {sub_lang!r}: {err}', err)
- self.report_warning(f'Unable to download video subtitles for {sub_lang!r}: {err}')
+ if not self.params.get('ignoreerrors'):
+ self.report_error(msg)
+ raise DownloadError(msg)
+ self.report_warning(msg)
return ret
def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None):
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 6c6ac7adf..6d5a64336 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -93,9 +93,9 @@ def print_extractor_information(opts, urls):
for ie in list_extractors(opts.age_limit):
if not ie.working():
continue
- desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
- if desc is False:
+ if ie.IE_DESC is False:
continue
+ desc = ie.IE_DESC or ie.IE_NAME
if getattr(ie, 'SEARCH_KEY', None) is not None:
_SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
_COUNTS = ('', '5', '10', 'all')
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index be6202eef..71af705ea 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -13,6 +13,7 @@ from ..compat import (
)
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import (
+ classproperty,
cli_option,
cli_valueless_option,
cli_bool_option,
@@ -73,17 +74,23 @@ class ExternalFD(FragmentFD):
def get_basename(cls):
return cls.__name__[:-2].lower()
+ @classproperty
+ def EXE_NAME(cls):
+ return cls.get_basename()
+
@property
def exe(self):
- return self.get_basename()
+ return self.EXE_NAME
@classmethod
def available(cls, path=None):
- path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
- if path:
- cls.exe = path
- return path
- return False
+ path = check_executable(
+ cls.EXE_NAME if path in (None, cls.get_basename()) else path,
+ [cls.AVAILABLE_OPT])
+ if not path:
+ return False
+ cls.exe = path
+ return path
@classmethod
def supports(cls, info_dict):
@@ -106,7 +113,7 @@ class ExternalFD(FragmentFD):
def _configuration_args(self, keys=None, *args, **kwargs):
return _configuration_args(
- self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
+ self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs)
def _call_downloader(self, tmpfilename, info_dict):
@@ -169,7 +176,7 @@ class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '--location', '-o', tmpfilename]
+ cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
@@ -219,7 +226,7 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
+ cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
@@ -230,7 +237,10 @@ class WgetFD(ExternalFD):
retry[1] = '0'
cmd += retry
cmd += self._option('--bind-address', 'source_address')
- cmd += self._option('--proxy', 'proxy')
+ proxy = self.params.get('proxy')
+ if proxy:
+ for var in ('http_proxy', 'https_proxy'):
+ cmd += ['--execute', '%s=%s' % (var, proxy)]
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
@@ -303,10 +313,7 @@ class Aria2cFD(ExternalFD):
class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version'
-
- @classmethod
- def available(cls, path=None):
- return super().available(path or 'http')
+ EXE_NAME = 'http'
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
@@ -507,11 +514,13 @@ class AVconvFD(FFmpegFD):
pass
-_BY_NAME = dict(
- (klass.get_basename(), klass)
+_BY_NAME = {
+ klass.get_basename(): klass
for name, klass in globals().items()
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
-)
+}
+
+_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
def list_external_downloaders():
@@ -523,4 +532,4 @@ def get_external_downloader(external_downloader):
downloader . """
# Drop .exe extension on Windows
bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME.get(bn)
+ return _BY_NAME.get(bn, _BY_EXE.get(bn))
diff --git a/yt_dlp/extractor/adobepass.py b/yt_dlp/extractor/adobepass.py
index f0eba8844..5d98301b8 100644
--- a/yt_dlp/extractor/adobepass.py
+++ b/yt_dlp/extractor/adobepass.py
@@ -1650,21 +1650,27 @@ class AdobePassIE(InfoExtractor):
hidden_data = self._hidden_inputs(first_bookend_page)
hidden_data['history_val'] = 1
- provider_login_redirect_page = self._download_webpage(
+ provider_login_redirect_page_res = self._download_webpage_handle(
urlh.geturl(), video_id, 'Sending First Bookend',
query=hidden_data)
- provider_tryauth_url = self._html_search_regex(
- r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
+ provider_login_redirect_page, urlh = provider_login_redirect_page_res
- provider_tryauth_page = self._download_webpage(
- provider_tryauth_url, video_id, 'Submitting TryAuth',
- query=hidden_data)
+ # Some website partners seem to not have the extra ajaxurl redirect step, so we check if we already
+ # have the login prompt or not
+ if 'id="password" type="password" name="password"' in provider_login_redirect_page:
+ provider_login_page_res = provider_login_redirect_page_res
+ else:
+ provider_tryauth_url = self._html_search_regex(
+ r'url:\s*[\'"]([^\'"]+)', provider_login_redirect_page, 'ajaxurl')
+ provider_tryauth_page = self._download_webpage(
+ provider_tryauth_url, video_id, 'Submitting TryAuth',
+ query=hidden_data)
- provider_login_page_res = self._download_webpage_handle(
- f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
- video_id, 'Getting Login Page',
- query=hidden_data)
+ provider_login_page_res = self._download_webpage_handle(
+ f'https://authorize.suddenlink.net/saml/module.php/authSynacor/login.php?AuthState={provider_tryauth_page}',
+ video_id, 'Getting Login Page',
+ query=hidden_data)
provider_association_redirect, urlh = post_form(
provider_login_page_res, 'Logging in', {
diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py
new file mode 100644
index 000000000..3d4d36ec3
--- /dev/null
+++ b/yt_dlp/extractor/banbye.py
@@ -0,0 +1,153 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import math
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ format_field,
+ InAdvancePagedList,
+ traverse_obj,
+ unified_timestamp,
+)
+
+
+class BanByeBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.banbye.com'
+ _CDN_BASE = 'https://cdn.banbye.com'
+ _VIDEO_BASE = 'https://banbye.com/watch'
+
+ @staticmethod
+ def _extract_playlist_id(url, param='playlist'):
+ return compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get(param, [None])[0]
+
+ def _extract_playlist(self, playlist_id):
+ data = self._download_json(f'{self._API_BASE}/playlists/{playlist_id}', playlist_id)
+ return self.playlist_result([
+ self.url_result(f'{self._VIDEO_BASE}/{video_id}', BanByeIE)
+ for video_id in data['videoIds']], playlist_id, data.get('name'))
+
+
+class BanByeIE(BanByeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?watch/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://banbye.com/watch/v_ytfmvkVYLE8T',
+ 'md5': '2f4ea15c5ca259a73d909b2cfd558eb5',
+ 'info_dict': {
+ 'id': 'v_ytfmvkVYLE8T',
+ 'ext': 'mp4',
+ 'title': 'md5:5ec098f88a0d796f987648de6322ba0f',
+ 'description': 'md5:4d94836e73396bc18ef1fa0f43e5a63a',
+ 'uploader': 'wRealu24',
+ 'channel_id': 'ch_wrealu24',
+ 'channel_url': 'https://banbye.com/channel/ch_wrealu24',
+ 'timestamp': 1647604800,
+ 'upload_date': '20220318',
+ 'duration': 1931,
+ 'thumbnail': r're:https?://.*\.webp',
+ 'tags': 'count:5',
+ 'like_count': int,
+ 'dislike_count': int,
+ 'view_count': int,
+ 'comment_count': int,
+ },
+ }, {
+ 'url': 'https://banbye.com/watch/v_2JjQtqjKUE_F?playlistId=p_Ld82N6gBw_OJ',
+ 'info_dict': {
+ 'title': 'Krzysztof Karoń',
+ 'id': 'p_Ld82N6gBw_OJ',
+ },
+ 'playlist_count': 9,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ playlist_id = self._extract_playlist_id(url, 'playlistId')
+
+ if self._yes_playlist(playlist_id, video_id):
+ return self._extract_playlist(playlist_id)
+
+ data = self._download_json(f'{self._API_BASE}/videos/{video_id}', video_id)
+ thumbnails = [{
+ 'id': f'{quality}p',
+ 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.webp',
+ } for quality in [48, 96, 144, 240, 512, 1080]]
+ formats = [{
+ 'format_id': f'http-{quality}p',
+ 'quality': quality,
+ 'url': f'{self._CDN_BASE}/video/{video_id}/{quality}.mp4',
+ } for quality in data['quality']]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': data.get('title'),
+ 'description': data.get('desc'),
+ 'uploader': traverse_obj(data, ('channel', 'name')),
+ 'channel_id': data.get('channelId'),
+ 'channel_url': format_field(data, 'channelId', 'https://banbye.com/channel/%s'),
+ 'timestamp': unified_timestamp(data.get('publishedAt')),
+ 'duration': data.get('duration'),
+ 'tags': data.get('tags'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'like_count': data.get('likes'),
+ 'dislike_count': data.get('dislikes'),
+ 'view_count': data.get('views'),
+ 'comment_count': data.get('commentCount'),
+ }
+
+
+class BanByeChannelIE(BanByeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?banbye.com/(?:en/)?channel/(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://banbye.com/channel/ch_wrealu24',
+ 'info_dict': {
+ 'title': 'wRealu24',
+ 'id': 'ch_wrealu24',
+ 'description': 'md5:da54e48416b74dfdde20a04867c0c2f6',
+ },
+ 'playlist_mincount': 791,
+ }, {
+ 'url': 'https://banbye.com/channel/ch_wrealu24?playlist=p_Ld82N6gBw_OJ',
+ 'info_dict': {
+ 'title': 'Krzysztof Karoń',
+ 'id': 'p_Ld82N6gBw_OJ',
+ },
+ 'playlist_count': 9,
+ }]
+ _PAGE_SIZE = 100
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ playlist_id = self._extract_playlist_id(url)
+
+ if playlist_id:
+ return self._extract_playlist(playlist_id)
+
+ def page_func(page_num):
+ data = self._download_json(f'{self._API_BASE}/videos', channel_id, query={
+ 'channelId': channel_id,
+ 'sort': 'new',
+ 'limit': self._PAGE_SIZE,
+ 'offset': page_num * self._PAGE_SIZE,
+ }, note=f'Downloading page {page_num+1}')
+ return [
+ self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE)
+ for video in data['items']
+ ]
+
+ channel_data = self._download_json(f'{self._API_BASE}/channels/{channel_id}', channel_id)
+ entries = InAdvancePagedList(
+ page_func,
+ math.ceil(channel_data['videoCount'] / self._PAGE_SIZE),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, channel_id, channel_data.get('name'), channel_data.get('description'))
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index b4eb20642..dd1ff512e 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -15,6 +15,7 @@ from ..compat import (
)
from ..utils import (
ExtractorError,
+ filter_dict,
int_or_none,
float_or_none,
mimetype2ext,
@@ -755,15 +756,21 @@ class BiliIntlBaseIE(InfoExtractor):
for i, line in enumerate(json['body']) if line.get('content'))
return data
- def _get_subtitles(self, ep_id):
- sub_json = self._call_api(f'/web/v2/subtitle?episode_id={ep_id}&platform=web', ep_id)
+ def _get_subtitles(self, *, ep_id=None, aid=None):
+ sub_json = self._call_api(
+ '/web/v2/subtitle', ep_id or aid, note='Downloading subtitles list',
+ errnote='Unable to download subtitles list', query=filter_dict({
+ 'platform': 'web',
+ 'episode_id': ep_id,
+ 'aid': aid,
+ }))
subtitles = {}
for sub in sub_json.get('subtitles') or []:
sub_url = sub.get('url')
if not sub_url:
continue
sub_data = self._download_json(
- sub_url, ep_id, errnote='Unable to download subtitles', fatal=False,
+ sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
if not sub_data:
continue
@@ -773,9 +780,14 @@ class BiliIntlBaseIE(InfoExtractor):
})
return subtitles
- def _get_formats(self, ep_id):
- video_json = self._call_api(f'/web/playurl?ep_id={ep_id}&platform=web', ep_id,
- note='Downloading video formats', errnote='Unable to download video formats')
+ def _get_formats(self, *, ep_id=None, aid=None):
+ video_json = self._call_api(
+ '/web/playurl', ep_id or aid, note='Downloading video formats',
+ errnote='Unable to download video formats', query=filter_dict({
+ 'platform': 'web',
+ 'ep_id': ep_id,
+ 'aid': aid,
+ }))
video_json = video_json['playurl']
formats = []
for vid in video_json.get('video') or []:
@@ -809,15 +821,15 @@ class BiliIntlBaseIE(InfoExtractor):
self._sort_formats(formats)
return formats
- def _extract_ep_info(self, episode_data, ep_id):
+ def _extract_video_info(self, video_data, *, ep_id=None, aid=None):
return {
- 'id': ep_id,
- 'title': episode_data.get('title_display') or episode_data['title'],
- 'thumbnail': episode_data.get('cover'),
+ 'id': ep_id or aid,
+ 'title': video_data.get('title_display') or video_data.get('title'),
+ 'thumbnail': video_data.get('cover'),
'episode_number': int_or_none(self._search_regex(
- r'^E(\d+)(?:$| - )', episode_data.get('title_display'), 'episode number', default=None)),
- 'formats': self._get_formats(ep_id),
- 'subtitles': self._get_subtitles(ep_id),
+ r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
+ 'formats': self._get_formats(ep_id=ep_id, aid=aid),
+ 'subtitles': self._get_subtitles(ep_id=ep_id, aid=aid),
'extractor_key': BiliIntlIE.ie_key(),
}
@@ -854,7 +866,7 @@ class BiliIntlBaseIE(InfoExtractor):
class BiliIntlIE(BiliIntlBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
_TESTS = [{
# Bstation page
'url': 'https://www.bilibili.tv/en/play/34613/341736',
@@ -889,24 +901,35 @@ class BiliIntlIE(BiliIntlBaseIE):
}, {
'url': 'https://www.biliintl.com/en/play/34613/341736',
'only_matching': True,
+ }, {
+ # User-generated content (as opposed to a series licensed from a studio)
+ 'url': 'https://bilibili.tv/en/video/2019955076',
+ 'only_matching': True,
+ }, {
+ # No language in URL
+ 'url': 'https://www.bilibili.tv/video/2019955076',
+ 'only_matching': True,
}]
def _real_extract(self, url):
- season_id, video_id = self._match_valid_url(url).groups()
+ season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
+ video_id = ep_id or aid
webpage = self._download_webpage(url, video_id)
# Bstation layout
initial_data = self._parse_json(self._search_regex(
- r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
+ r'window\.__INITIAL_(?:DATA|STATE)__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), video_id, fatal=False) or {}
- episode_data = traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
+ video_data = (
+ traverse_obj(initial_data, ('OgvVideo', 'epDetail'), expected_type=dict)
+ or traverse_obj(initial_data, ('UgcVideo', 'videoData'), expected_type=dict) or {})
- if not episode_data:
+ if season_id and not video_data:
# Non-Bstation layout, read through episode list
season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
- episode_data = next(
+ video_data = next(
episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict)
- if str(episode.get('episode_id')) == video_id)
- return self._extract_ep_info(episode_data, video_id)
+ if str(episode.get('episode_id')) == ep_id)
+ return self._extract_video_info(video_data, ep_id=ep_id, aid=aid)
class BiliIntlSeriesIE(BiliIntlBaseIE):
@@ -934,7 +957,7 @@ class BiliIntlSeriesIE(BiliIntlBaseIE):
series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict, default=[]):
episode_id = str(episode.get('episode_id'))
- yield self._extract_ep_info(episode, episode_id)
+ yield self._extract_video_info(episode, ep_id=episode_id)
def _real_extract(self, url):
series_id = self._match_id(url)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index f3ae3fd4c..d0e57da23 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -49,6 +49,7 @@ from ..utils import (
error_to_compat_str,
extract_attributes,
ExtractorError,
+ filter_dict,
fix_xml_ampersands,
float_or_none,
format_field,
@@ -248,14 +249,14 @@ class InfoExtractor(object):
license: License name the video is licensed under.
creator: The creator of the video.
timestamp: UNIX timestamp of the moment the video was uploaded
- upload_date: Video upload date (YYYYMMDD).
+ upload_date: Video upload date in UTC (YYYYMMDD).
If not explicitly set, calculated from timestamp
release_timestamp: UNIX timestamp of the moment the video was released.
If it is not clear whether to use timestamp or this, use the former
- release_date: The date (YYYYMMDD) when the video was released.
+ release_date: The date (YYYYMMDD) when the video was released in UTC.
If not explicitly set, calculated from release_timestamp
modified_timestamp: UNIX timestamp of the moment the video was last modified.
- modified_date: The date (YYYYMMDD) when the video was last modified.
+ modified_date: The date (YYYYMMDD) when the video was last modified in UTC.
If not explicitly set, calculated from modified_timestamp
uploader_id: Nickname or id of the video uploader.
uploader_url: Full URL to a personal webpage of the video uploader.
@@ -469,6 +470,7 @@ class InfoExtractor(object):
_GEO_IP_BLOCKS = None
_WORKING = True
_NETRC_MACHINE = None
+ IE_DESC = None
_LOGIN_HINTS = {
'any': 'Use --cookies, --cookies-from-browser, --username and --password, or --netrc to provide account credentials',
@@ -1033,7 +1035,7 @@ class InfoExtractor(object):
if transform_source:
json_string = transform_source(json_string)
try:
- return json.loads(json_string)
+ return json.loads(json_string, strict=False)
except ValueError as ve:
errmsg = '%s: Failed to parse JSON ' % video_id
if fatal:
@@ -1587,7 +1589,7 @@ class InfoExtractor(object):
break
traverse_json_ld(json_ld)
- return dict((k, v) for k, v in info.items() if v is not None)
+ return filter_dict(info)
def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw):
return self._parse_json(
diff --git a/yt_dlp/extractor/ellentube.py b/yt_dlp/extractor/ellentube.py
index 544473274..d451bc048 100644
--- a/yt_dlp/extractor/ellentube.py
+++ b/yt_dlp/extractor/ellentube.py
@@ -26,7 +26,7 @@ class EllenTubeBaseIE(InfoExtractor):
duration = None
for entry in data.get('media'):
if entry.get('id') == 'm3u8':
- formats = self._extract_m3u8_formats(
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
entry['url'], video_id, 'mp4',
entry_protocol='m3u8_native', m3u8_id='hls')
duration = int_or_none(entry.get('duration'))
@@ -48,6 +48,7 @@ class EllenTubeBaseIE(InfoExtractor):
'view_count': get_insight('view'),
'like_count': get_insight('like'),
'formats': formats,
+ 'subtitles': subtitles,
}
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 4eda27cdc..e5ae12a7d 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -122,6 +122,10 @@ from .awaan import (
)
from .azmedien import AZMedienIE
from .baidu import BaiduVideoIE
+from .banbye import (
+ BanByeIE,
+ BanByeChannelIE,
+)
from .bandaichannel import BandaiChannelIE
from .bandcamp import (
BandcampIE,
@@ -674,6 +678,12 @@ from .iqiyi import (
IqIE,
IqAlbumIE
)
+
+from .itprotv import (
+ ITProTVIE,
+ ITProTVCourseIE
+)
+
from .itv import (
ITVIE,
ITVBTCCIE,
@@ -731,6 +741,11 @@ from .laola1tv import (
EHFTVIE,
ITTFIE,
)
+from .lastfm import (
+ LastFMIE,
+ LastFMPlaylistIE,
+ LastFMUserIE,
+)
from .lbry import (
LBRYIE,
LBRYChannelIE,
@@ -1962,6 +1977,11 @@ from .washingtonpost import (
WashingtonPostIE,
WashingtonPostArticleIE,
)
+from .wasdtv import (
+ WASDTVStreamIE,
+ WASDTVRecordIE,
+ WASDTVClipIE,
+)
from .wat import WatIE
from .watchbox import WatchBoxIE
from .watchindianporn import WatchIndianPornIE
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
index 7fc6b0e3d..54a83aa16 100644
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -212,7 +212,6 @@ class FC2LiveIE(InfoExtractor):
'Accept': '*/*',
'User-Agent': std_headers['User-Agent'],
})
- ws.__enter__()
self.write_debug('[debug] Sending HLS server request')
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 97e34808f..4a2e30158 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -17,6 +17,7 @@ from ..compat import (
)
from ..utils import (
determine_ext,
+ dict_get,
ExtractorError,
float_or_none,
HEADRequest,
@@ -31,6 +32,7 @@ from ..utils import (
parse_resolution,
sanitized_Request,
smuggle_url,
+ str_or_none,
unescapeHTML,
unified_timestamp,
unsmuggle_url,
@@ -3778,11 +3780,12 @@ class GenericIE(InfoExtractor):
# Video.js embed
mobj = re.search(
- r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ r'(?s)\bvideojs\s*\(.+?([a-zA-Z0-9_$]+)\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
webpage)
if mobj is not None:
+ varname = mobj.group(1)
sources = self._parse_json(
- mobj.group(1), video_id, transform_source=js_to_json,
+ mobj.group(2), video_id, transform_source=js_to_json,
fatal=False) or []
if not isinstance(sources, list):
sources = [sources]
@@ -3819,6 +3822,21 @@ class GenericIE(InfoExtractor):
'Referer': full_response.geturl(),
},
})
+ # https://docs.videojs.com/player#addRemoteTextTrack
+ # https://html.spec.whatwg.org/multipage/media.html#htmltrackelement
+ for sub_match in re.finditer(rf'(?s){re.escape(varname)}' r'\.addRemoteTextTrack\(({.+?})\s*,\s*(?:true|false)\)', webpage):
+ sub = self._parse_json(
+ sub_match.group(1), video_id, transform_source=js_to_json, fatal=False) or {}
+ src = str_or_none(sub.get('src'))
+ if not src:
+ continue
+ subtitles.setdefault(dict_get(sub, ('language', 'srclang')) or 'und', []).append({
+ 'url': compat_urlparse.urljoin(url, src),
+ 'name': sub.get('label'),
+ 'http_headers': {
+ 'Referer': full_response.geturl(),
+ },
+ })
if formats or subtitles:
self.report_detected('video.js embed')
self._sort_formats(formats)
diff --git a/yt_dlp/extractor/go.py b/yt_dlp/extractor/go.py
index 2ccc6df21..f92e16600 100644
--- a/yt_dlp/extractor/go.py
+++ b/yt_dlp/extractor/go.py
@@ -217,6 +217,7 @@ class GoIE(AdobePassIE):
title = video_data['title']
formats = []
+ subtitles = {}
for asset in video_data.get('assets', {}).get('asset', []):
asset_url = asset.get('value')
if not asset_url:
@@ -256,8 +257,10 @@ class GoIE(AdobePassIE):
error_message = ', '.join([error['message'] for error in errors])
raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
asset_url += '?' + entitlement['uplynkData']['sessionKey']
- formats.extend(self._extract_m3u8_formats(
- asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
+ fmts, subs = self._extract_m3u8_formats_and_subtitles(
+ asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
+ formats.extend(fmts)
+ self._merge_subtitles(subs, target=subtitles)
else:
f = {
'format_id': format_id,
@@ -281,7 +284,6 @@ class GoIE(AdobePassIE):
formats.append(f)
self._sort_formats(formats)
- subtitles = {}
for cc in video_data.get('closedcaption', {}).get('src', []):
cc_url = cc.get('value')
if not cc_url:
diff --git a/yt_dlp/extractor/itprotv.py b/yt_dlp/extractor/itprotv.py
new file mode 100644
index 000000000..64cb4e69a
--- /dev/null
+++ b/yt_dlp/extractor/itprotv.py
@@ -0,0 +1,141 @@
+# coding: utf-8
+
+import re
+
+from .common import InfoExtractor
+
+from ..utils import (
+ int_or_none,
+ str_or_none,
+ traverse_obj,
+ urljoin
+)
+
+
+class ITProTVBaseIE(InfoExtractor):
+ _ENDPOINTS = {
+ 'course': 'course?url={}&brand=00002560-0000-3fa9-0000-1d61000035f3',
+ 'episode': 'brand/00002560-0000-3fa9-0000-1d61000035f3/episode?url={}'
+ }
+
+ def _call_api(self, ep, item_id, webpage):
+ return self._download_json(
+ f'https://api.itpro.tv/api/urza/v3/consumer-web/{self._ENDPOINTS[ep].format(item_id)}',
+ item_id, note=f'Fetching {ep} data API',
+ headers={'Authorization': f'Bearer {self._fetch_jwt(webpage)}'})[ep]
+
+ def _fetch_jwt(self, webpage):
+ return self._search_regex(r'{"passedToken":"([\w-]+\.[\w-]+\.[\w-]+)",', webpage, 'jwt')
+
+ def _check_if_logged_in(self, webpage):
+ if re.match(r'{\s*member\s*:\s*null', webpage):
+ self.raise_login_required()
+
+
+class ITProTVIE(ITProTVBaseIE):
+ _VALID_URL = r'https://app.itpro.tv/course/(?P<course>[\w-]+)/(?P<id>[\w-]+)'
+ _TESTS = [{
+ 'url': 'https://app.itpro.tv/course/guided-tour/introductionitprotv',
+ 'md5': 'bca4a28c2667fd1a63052e71a94bb88c',
+ 'info_dict': {
+ 'id': 'introductionitprotv',
+ 'ext': 'mp4',
+ 'title': 'An Introduction to ITProTV 101',
+ 'thumbnail': 'https://itprotv-image-bucket.s3.amazonaws.com/getting-started/itprotv-101-introduction-PGM.11_39_56_02.Still001.png',
+ 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e',
+ 'duration': 269,
+ 'series': 'ITProTV 101',
+ 'series_id': 'guided-tour',
+ 'availability': 'needs_auth',
+ 'chapter': 'ITProTV 101',
+ 'chapter_number': 1,
+ 'chapter_id': '5dbb3de426b46c0010b5d1b6'
+ },
+ },
+ {
+ 'url': 'https://app.itpro.tv/course/beyond-tech/job-interview-tips',
+ 'md5': '101a299b98c47ccf4c67f9f0951defa8',
+ 'info_dict': {
+ 'id': 'job-interview-tips',
+ 'ext': 'mp4',
+ 'title': 'Job Interview Tips',
+ 'thumbnail': 'https://s3.amazonaws.com:443/production-itprotv-thumbnails/2f370bf5-294d-4bbe-ab80-c0b5781630ea.png',
+ 'description': 'md5:30d8ba483febdf89ec85623aad3c3cb6',
+ 'duration': 267,
+ 'series': 'Beyond Tech',
+ 'series_id': 'beyond-tech',
+ 'availability': 'needs_auth',
+ 'chapter': 'Job Development',
+ 'chapter_number': 2,
+ 'chapter_id': '5f7c78d424330c000edf04d9'
+ },
+ }]
+
+ def _real_extract(self, url):
+ episode_id, course_name = self._match_valid_url(url).group('id', 'course')
+ webpage = self._download_webpage(url, episode_id)
+ self._check_if_logged_in(webpage)
+ course = self._call_api('course', course_name, webpage)
+ episode = self._call_api('episode', episode_id, webpage)
+
+ chapter_number, chapter = next((
+ (i, topic) for i, topic in enumerate(course.get('topics') or [], 1)
+ if traverse_obj(topic, 'id') == episode.get('topic')), {})
+
+ return {
+ 'id': episode_id,
+ 'title': episode.get('title'),
+ 'description': episode.get('description'),
+ 'thumbnail': episode.get('thumbnail'),
+ 'formats': [
+ {'url': episode[f'jwVideo{h}Embed'], 'height': h}
+ for h in (320, 480, 720, 1080) if episode.get(f'jwVideo{h}Embed')
+ ],
+ 'duration': int_or_none(episode.get('length')),
+ 'series': course.get('name'),
+ 'series_id': course.get('url'),
+ 'chapter': str_or_none(chapter.get('title')),
+ 'chapter_number': chapter_number,
+ 'chapter_id': str_or_none(chapter.get('id')),
+ 'subtitles': {
+ 'en': [{'ext': 'vtt', 'data': episode['enCaptionData']}]
+ } if episode.get('enCaptionData') else None,
+ }
+
+
+class ITProTVCourseIE(ITProTVBaseIE):
+ _VALID_URL = r'https?://app.itpro.tv/course/(?P<id>[\w-]+)/?(?:$|[#?])'
+ _TESTS = [
+ {
+ 'url': 'https://app.itpro.tv/course/guided-tour',
+ 'info_dict': {
+ 'id': 'guided-tour',
+ 'description': 'md5:b175c2c3061ce35a4dd33865b2c1da4e',
+ 'title': 'ITProTV 101',
+ },
+ 'playlist_count': 6
+ },
+ {
+ 'url': 'https://app.itpro.tv/course/beyond-tech',
+ 'info_dict': {
+ 'id': 'beyond-tech',
+ 'description': 'md5:44cd99855e7f81a15ce1269bd0621fed',
+ 'title': 'Beyond Tech'
+ },
+ 'playlist_count': 15
+ },
+ ]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+ webpage = self._download_webpage(url, course_id)
+ self._check_if_logged_in(webpage)
+ course = self._call_api('course', course_id, webpage)
+
+ entries = [self.url_result(
+ urljoin(url, f'{course_id}/{episode["url"]}'), ITProTVIE,
+ episode['url'], episode.get('title'), url_transparent=True)
+ for episode in course['episodes']]
+
+ return self.playlist_result(
+ entries, course_id, course.get('name'), course.get('description'))
diff --git a/yt_dlp/extractor/lastfm.py b/yt_dlp/extractor/lastfm.py
new file mode 100644
index 000000000..5215717e8
--- /dev/null
+++ b/yt_dlp/extractor/lastfm.py
@@ -0,0 +1,129 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import int_or_none, format_field
+
+
+class LastFMPlaylistBaseIE(InfoExtractor):
+ def _entries(self, url, playlist_id):
+ webpage = self._download_webpage(url, playlist_id)
+ start_page_number = int_or_none(self._search_regex(
+ r'\bpage=(\d+)', url, 'page', default=None)) or 1
+ last_page_number = int_or_none(self._search_regex(
+ r'>(\d+)</a>[^<]*</li>[^<]*<li[^>]+class="pagination-next', webpage, 'last_page', default=None))
+
+ for page_number in range(start_page_number, (last_page_number or start_page_number) + 1):
+ webpage = self._download_webpage(
+ url, playlist_id,
+ note='Downloading page %d%s' % (page_number, format_field(last_page_number, template=' of %d')),
+ query={'page': page_number})
+ page_entries = [
+ self.url_result(player_url, 'Youtube')
+ for player_url in set(re.findall(r'data-youtube-url="([^"]+)"', webpage))
+ ]
+
+ for e in page_entries:
+ yield e
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ return self.playlist_result(self._entries(url, playlist_id), playlist_id)
+
+
+class LastFMPlaylistIE(LastFMPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?last\.fm/(music|tag)/(?P<id>[^/]+)(?:/[^/]+)?/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.last.fm/music/Oasis/(What%27s+the+Story)+Morning+Glory%3F',
+ 'info_dict': {
+ 'id': 'Oasis',
+ },
+ 'playlist_count': 11,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis?top_tracks_date_preset=ALL#top-tracks',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/+tracks',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/+tracks?page=2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/+tracks?date_preset=LAST_90_DAYS#top-tracks',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/tag/rock',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/tag/rock/tracks',
+ 'only_matching': True,
+ }]
+
+
+class LastFMUserIE(LastFMPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?last\.fm/user/[^/]+/playlists/(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'https://www.last.fm/user/mehq/playlists/12319471',
+ 'info_dict': {
+ 'id': '12319471',
+ },
+ 'playlist_count': 30,
+ }]
+
+
+class LastFMIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?last\.fm/music(?:/[^/]+){2}/(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'https://www.last.fm/music/Oasis/_/Wonderwall',
+ 'md5': '9c4a70c2e84c03d54fe24229b9e13b7b',
+ 'info_dict': {
+ 'id': '6hzrDeceEKc',
+ 'ext': 'mp4',
+ 'title': 'Oasis - Wonderwall (Official Video)',
+ 'thumbnail': r're:^https?://i.ytimg.com/.*\.jpg$',
+ 'description': 'md5:0848669853c10687cc28e88b5756738f',
+ 'uploader': 'Oasis',
+ 'uploader_id': 'oasisinetofficial',
+ 'upload_date': '20080207',
+ 'album': '(What\'s The Story) Morning Glory? (Remastered)',
+ 'track': 'Wonderwall (Remastered)',
+ 'channel_id': 'UCUDVBtnOQi4c7E8jebpjc9Q',
+ 'view_count': int,
+ 'live_status': 'not_live',
+ 'channel_url': 'https://www.youtube.com/channel/UCUDVBtnOQi4c7E8jebpjc9Q',
+ 'tags': 'count:39',
+ 'creator': 'Oasis',
+ 'uploader_url': 're:^https?://www.youtube.com/user/oasisinetofficial',
+ 'duration': 279,
+ 'alt_title': 'Wonderwall (Remastered)',
+ 'age_limit': 0,
+ 'channel': 'Oasis',
+ 'channel_follower_count': int,
+ 'categories': ['Music'],
+ 'availability': 'public',
+ 'like_count': int,
+ 'playable_in_embed': True,
+ 'artist': 'Oasis',
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'https://www.last.fm/music/Oasis/_/Don%27t+Look+Back+In+Anger+-+Remastered/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.last.fm/music/Guns+N%27+Roses/_/Sweet+Child+o%27+Mine',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ player_url = self._search_regex(r'(?s)class="header-new-playlink"\s+href="([^"]+)"', webpage, 'player_url')
+ return self.url_result(player_url, 'Youtube')
diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py
index 34f127285..9d243b2be 100644
--- a/yt_dlp/extractor/rai.py
+++ b/yt_dlp/extractor/rai.py
@@ -11,6 +11,7 @@ from ..compat import (
from ..utils import (
determine_ext,
ExtractorError,
+ filter_dict,
find_xpath_attr,
fix_xml_ampersands,
GeoRestrictedError,
@@ -110,11 +111,11 @@ class RaiBaseIE(InfoExtractor):
if not audio_only:
formats.extend(self._create_http_urls(relinker_url, formats))
- return dict((k, v) for k, v in {
+ return filter_dict({
'is_live': is_live,
'duration': duration,
'formats': formats,
- }.items() if v is not None)
+ })
def _create_http_urls(self, relinker_url, fmts):
_RELINKER_REG = r'https?://(?P<host>[^/]+?)/(?:i/)?(?P<extra>[^/]+?)/(?P<path>.+?)/(?P<id>\d+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4|/playlist\.m3u8).+?'
diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py
index b8ac41483..9ff3136e2 100644
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@@ -14,7 +14,7 @@ from ..utils import (
class TVerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))'
+ _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>corner|episode|feature|lp|tokyo2020/video)/(?P<id>[fc]?\d+)'
# videos are only available for 7 days
_TESTS = [{
'url': 'https://tver.jp/corner/f0062178',
@@ -29,6 +29,15 @@ class TVerIE(InfoExtractor):
# subtitle = ' '
'url': 'https://tver.jp/corner/f0068870',
'only_matching': True,
+ }, {
+ 'url': 'https://tver.jp/lp/f0009694',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tver.jp/lp/c0000239',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tver.jp/tokyo2020/video/6264525510001',
+ 'only_matching': True,
}]
_TOKEN = None
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
@@ -39,9 +48,11 @@ class TVerIE(InfoExtractor):
def _real_extract(self, url):
path, video_id = self._match_valid_url(url).groups()
- api_response = self._download_json(
- 'https://api.tver.jp/v4/' + path, video_id,
- query={'token': self._TOKEN})
+ if path == 'lp':
+ webpage = self._download_webpage(url, video_id)
+ redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path')
+ path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups()
+ api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN})
p_id = traverse_obj(api_response, ('main', 'publisher_id'))
if not p_id:
error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True
diff --git a/yt_dlp/extractor/viki.py b/yt_dlp/extractor/viki.py
index 8234ba7df..8a930798d 100644
--- a/yt_dlp/extractor/viki.py
+++ b/yt_dlp/extractor/viki.py
@@ -261,7 +261,7 @@ class VikiIE(VikiBaseIE):
mpd_content = self._download_webpage(mpd_url, video_id, note='Downloading initial MPD manifest')
mpd_url = self._search_regex(
r'(?mi)<BaseURL>(http.+.mpd)', mpd_content, 'new manifest', default=mpd_url)
- if 'mpdhd_high' not in mpd_url:
+ if 'mpdhd_high' not in mpd_url and 'sig=' not in mpd_url:
# Modify the URL to get 1080p
mpd_url = mpd_url.replace('mpdhd', 'mpdhd_high')
formats = self._extract_mpd_formats(mpd_url, video_id)
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index 051cf1b17..972fb480b 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -327,7 +327,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '56015672',
'ext': 'mp4',
- 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+ 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
'description': 'md5:2d3305bad981a06ff79f027f19865021',
'timestamp': 1355990239,
'upload_date': '20121220',
@@ -340,6 +340,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'params': {
'format': 'best[protocol=https]',
},
+ 'skip': 'No longer available'
},
{
'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
@@ -357,6 +358,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'upload_date': '20130610',
'timestamp': 1370893156,
'license': 'by',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440260469-19b0d92fca3bd84066623b53f1eb8aaa3980c6c809e2d67b6b39ab7b4a77a344-d_960',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
},
'params': {
'format': 'best[protocol=https]',
@@ -364,7 +369,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
{
'url': 'http://player.vimeo.com/video/54469442',
- 'md5': '619b811a4417aa4abe78dc653becf511',
+ 'md5': 'b3e7f4d2cbb53bd7dc3bb6ff4ed5cfbd',
'note': 'Videos that embed the url in the player page',
'info_dict': {
'id': '54469442',
@@ -375,6 +380,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'businessofsoftware',
'duration': 3610,
'description': None,
+ 'thumbnail': 'https://i.vimeocdn.com/video/376682406-f34043e7b766af6bef2af81366eacd6724f3fc3173179a11a97a1e26587c9529-d_1280',
},
'params': {
'format': 'best[protocol=https]',
@@ -395,6 +401,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
},
'params': {
'format': 'best[protocol=https]',
@@ -417,6 +427,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'timestamp': 1380339469,
'upload_date': '20130928',
'duration': 187,
+ 'thumbnail': 'https://i.vimeocdn.com/video/450239872-a05512d9b1e55d707a7c04365c10980f327b06d966351bc403a5d5d65c95e572-d_1280',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
},
'params': {'format': 'http-1080p'},
},
@@ -425,7 +439,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'note': 'Video with subtitles',
'info_dict': {
'id': '76979871',
- 'ext': 'mp4',
+ 'ext': 'mov',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
'timestamp': 1381846109,
@@ -454,6 +468,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Tulio Gonçalves',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
'uploader_id': 'user28849593',
+ 'duration': 118,
+ 'thumbnail': 'https://i.vimeocdn.com/video/478636036-c18440305ef3df9decfb6bf207a61fe39d2d17fa462a96f6f2d93d30492b037d-d_1280',
},
},
{
@@ -470,6 +486,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
'timestamp': 1324343742,
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
+ 'duration': 60,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/231174622-dd07f015e9221ff529d451e1cc31c982b5d87bfafa48c4189b1da72824ee289a-d_1280',
+ 'like_count': int,
},
},
{
@@ -485,6 +506,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Framework Studio',
'description': 'md5:f2edc61af3ea7a5592681ddbb683db73',
'upload_date': '20200225',
+ 'duration': 176,
+ 'thumbnail': 'https://i.vimeocdn.com/video/859377297-836494a4ef775e9d4edbace83937d9ad34dc846c688c0c419c0e87f7ab06c4b3-d_1280',
+ 'uploader_url': 'https://vimeo.com/frameworkla',
},
},
{
@@ -503,6 +527,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
'timestamp': 1250886430,
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
+ 'duration': 321,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/22728298-bfc22146f930de7cf497821c7b0b9f168099201ecca39b00b6bd31fcedfca7a6-d_1280',
+ 'like_count': int,
},
'params': {
'skip_download': True,
@@ -535,10 +564,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
+ 'timestamp': 1371200155,
+ 'upload_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
+ 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
+ 'thumbnail': 'https://i.vimeocdn.com/video/440665496-b2c5aee2b61089442c794f64113a8e8f7d5763c3e6b3ebfaf696ae6413f8b1f4-d_960',
+ 'view_count': int,
+ 'comment_count': int,
+ 'like_count': int,
},
'params': {
'format': 'best[protocol=https]',
@@ -568,12 +604,18 @@ class VimeoIE(VimeoBaseInfoExtractor):
'info_dict': {
'id': '119195465',
'ext': 'mp4',
- 'title': 'youtube-dl test video \'ä"BaW_jenozKc',
+ 'title': "youtube-dl test video '' ä↭𝕐-BaW jenozKc",
'uploader': 'Philipp Hagemeister',
'uploader_id': 'user20132939',
'description': 'md5:fa7b6c6d8db0bdc353893df2f111855b',
'upload_date': '20150209',
'timestamp': 1423518307,
+ 'thumbnail': 'https://i.vimeocdn.com/video/default_1280',
+ 'duration': 10,
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/user20132939',
+ 'view_count': int,
+ 'comment_count': int,
},
'params': {
'format': 'best[protocol=https]',
@@ -596,6 +638,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': 'Harrisville New Hampshire',
'timestamp': 1459259666,
'upload_date': '20160329',
+ 'release_timestamp': 1459259666,
+ 'license': 'by-nc',
+ 'duration': 159,
+ 'comment_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/562802436-585eeb13b5020c6ac0f171a2234067938098f84737787df05ff0d767f6d54ee9-d_1280',
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/aliniamedia',
+ 'release_date': '20160329',
},
'params': {'skip_download': True},
},
@@ -627,6 +677,14 @@ class VimeoIE(VimeoBaseInfoExtractor):
'title': 'The Shoes - Submarine Feat. Blaine Harrison',
'uploader_id': 'karimhd',
'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843',
+ 'channel_id': 'staffpicks',
+ 'duration': 336,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/541243181-b593db36a16db2f0096f655da3f5a4dc46b8766d77b0f440df937ecb0c418347-d_1280',
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/karimhd',
+ 'channel_url': 'https://vimeo.com/channels/staffpicks',
},
'params': {'skip_download': 'm3u8'},
},
@@ -641,13 +699,19 @@ class VimeoIE(VimeoBaseInfoExtractor):
'url': 'https://vimeo.com/581039021/9603038895',
'info_dict': {
'id': '581039021',
- # these have to be provided but we don't care
'ext': 'mp4',
'timestamp': 1627621014,
- 'title': 're:.+',
- 'uploader_id': 're:.+',
- 'uploader': 're:.+',
- 'upload_date': r're:\d+',
+ 'release_timestamp': 1627621014,
+ 'duration': 976,
+ 'comment_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/1202249320-4ddb2c30398c0dc0ee059172d1bd5ea481ad12f0e0e3ad01d2266f56c744b015-d_1280',
+ 'like_count': int,
+ 'uploader_url': 'https://vimeo.com/txwestcapital',
+ 'release_date': '20210730',
+ 'uploader': 'Christopher Inks',
+ 'title': 'Thursday, July 29, 2021 BMA Evening Video Update',
+ 'uploader_id': 'txwestcapital',
+ 'upload_date': '20210730',
},
'params': {
'skip_download': True,
@@ -961,9 +1025,15 @@ class VimeoOndemandIE(VimeoIE):
'uploader': 'גם סרטים',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
'uploader_id': 'gumfilms',
- 'description': 'md5:4c027c965e439de4baab621e48b60791',
+ 'description': 'md5:aeeba3dbd4d04b0fa98a4fdc9c639998',
'upload_date': '20140906',
'timestamp': 1410032453,
+ 'thumbnail': 'https://i.vimeocdn.com/video/488238335-d7bf151c364cff8d467f1b73784668fe60aae28a54573a35d53a1210ae283bd8-d_1280',
+ 'comment_count': int,
+ 'license': 'https://creativecommons.org/licenses/by-nc-nd/3.0/',
+ 'duration': 53,
+ 'view_count': int,
+ 'like_count': int,
},
'params': {
'format': 'best[protocol=https]',
@@ -982,6 +1052,11 @@ class VimeoOndemandIE(VimeoIE):
'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
'upload_date': '20150502',
'timestamp': 1430586422,
+ 'duration': 121,
+ 'comment_count': int,
+ 'view_count': int,
+ 'thumbnail': 'https://i.vimeocdn.com/video/517077723-7066ae1d9a79d3eb361334fb5d58ec13c8f04b52f8dd5eadfbd6fb0bcf11f613-d_1280',
+ 'like_count': int,
},
'params': {
'skip_download': True,
@@ -1011,7 +1086,7 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
'id': 'tributes',
'title': 'Vimeo Tributes',
},
- 'playlist_mincount': 25,
+ 'playlist_mincount': 22,
}]
_BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
@@ -1196,6 +1271,9 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'uploader': 'Richard Hardwick',
'uploader_id': 'user21297594',
'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
+ 'duration': 304,
+ 'thumbnail': 'https://i.vimeocdn.com/video/450115033-43303819d9ebe24c2630352e18b7056d25197d09b3ae901abdac4c4f1d68de71-d_1280',
+ 'uploader_url': 'https://vimeo.com/user21297594',
},
}, {
'note': 'video player needs Referer',
diff --git a/yt_dlp/extractor/viu.py b/yt_dlp/extractor/viu.py
index b633df95d..b0a1fca68 100644
--- a/yt_dlp/extractor/viu.py
+++ b/yt_dlp/extractor/viu.py
@@ -1,55 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
-import json
import re
+import json
+import uuid
+import random
+import urllib.parse
from .common import InfoExtractor
-from ..compat import (
- compat_kwargs,
- compat_str,
- compat_urlparse,
- compat_urllib_request,
-)
+from ..compat import compat_str
from ..utils import (
ExtractorError,
int_or_none,
+ strip_or_none,
try_get,
smuggle_url,
unsmuggle_url,
+ url_or_none,
)
class ViuBaseIE(InfoExtractor):
- def _real_initialize(self):
- viu_auth_res = self._request_webpage(
- 'https://www.viu.com/api/apps/v2/authenticate', None,
- 'Requesting Viu auth', query={
- 'acct': 'test',
- 'appid': 'viu_desktop',
- 'fmt': 'json',
- 'iid': 'guest',
- 'languageid': 'default',
- 'platform': 'desktop',
- 'userid': 'guest',
- 'useridtype': 'guest',
- 'ver': '1.0'
- }, headers=self.geo_verification_headers())
- self._auth_token = viu_auth_res.info()['X-VIU-AUTH']
-
- def _call_api(self, path, *args, **kwargs):
- headers = self.geo_verification_headers()
- headers.update({
- 'X-VIU-AUTH': self._auth_token
- })
- headers.update(kwargs.get('headers', {}))
- kwargs['headers'] = headers
+ def _call_api(self, path, *args, headers={}, **kwargs):
response = self._download_json(
- 'https://www.viu.com/api/' + path, *args,
- **compat_kwargs(kwargs))['response']
+ f'https://www.viu.com/api/{path}', *args, **kwargs,
+ headers={**self.geo_verification_headers(), **headers})['response']
if response.get('status') != 'success':
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, response['message']), expected=True)
+ raise ExtractorError(f'{self.IE_NAME} said: {response["message"]}', expected=True)
return response
@@ -101,6 +78,7 @@ class ViuIE(ViuBaseIE):
tdirforwhole = video_data.get('tdirforwhole')
# #EXT-X-BYTERANGE is not supported by native hls downloader
# and ffmpeg (#10955)
+ # FIXME: It is supported in yt-dlp
# hls_file = video_data.get('hlsfile')
hls_file = video_data.get('jwhlsfile')
if url_path and tdirforwhole and hls_file:
@@ -110,10 +88,9 @@ class ViuIE(ViuBaseIE):
# r'(/hlsc_)[a-z]+(\d+\.m3u8)',
# r'\1whe\2', video_data['href'])
m3u8_url = video_data['href']
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
self._sort_formats(formats)
- subtitles = {}
for key, value in video_data.items():
mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
if not mobj:
@@ -227,42 +204,63 @@ class ViuOTTIE(InfoExtractor):
'zh-cn': 2,
'en-us': 3,
}
- _user_info = None
+
+ _user_token = None
+ _auth_codes = {}
def _detect_error(self, response):
- code = response.get('status', {}).get('code')
- if code > 0:
+ code = try_get(response, lambda x: x['status']['code'])
+ if code and code > 0:
message = try_get(response, lambda x: x['status']['message'])
- raise ExtractorError('%s said: %s (%s)' % (
- self.IE_NAME, message, code), expected=True)
- return response['data']
-
- def _raise_login_required(self):
- raise ExtractorError(
- 'This video requires login. '
- 'Specify --username and --password or --netrc (machine: %s) '
- 'to provide account credentials.' % self._NETRC_MACHINE,
- expected=True)
+ raise ExtractorError(f'{self.IE_NAME} said: {message} ({code})', expected=True)
+ return response.get('data') or {}
def _login(self, country_code, video_id):
- if not self._user_info:
+ if self._user_token is None:
username, password = self._get_login_info()
- if username is None or password is None:
+ if username is None:
return
+ headers = {
+ 'Authorization': f'Bearer {self._auth_codes[country_code]}',
+ 'Content-Type': 'application/json'
+ }
+ data = self._download_json(
+ 'https://api-gateway-global.viu.com/api/account/validate',
+ video_id, 'Validating email address', headers=headers,
+ data=json.dumps({
+ 'principal': username,
+ 'provider': 'email'
+ }).encode())
+ if not data.get('exists'):
+ raise ExtractorError('Invalid email address')
data = self._download_json(
- compat_urllib_request.Request(
- 'https://www.viu.com/ott/%s/index.php' % country_code, method='POST'),
- video_id, 'Logging in', errnote=False, fatal=False,
- query={'r': 'user/login'},
+ 'https://api-gateway-global.viu.com/api/auth/login',
+ video_id, 'Logging in', headers=headers,
data=json.dumps({
- 'username': username,
+ 'email': username,
'password': password,
- 'platform_flag_label': 'web',
+ 'provider': 'email',
}).encode())
- self._user_info = self._detect_error(data)['user']
-
- return self._user_info
+ self._detect_error(data)
+ self._user_token = data.get('identity')
+ # need to update with valid user's token else will throw an error again
+ self._auth_codes[country_code] = data.get('token')
+ return self._user_token
+
+ def _get_token(self, country_code, video_id):
+ rand = ''.join(random.choice('0123456789') for _ in range(10))
+ return self._download_json(
+ f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id,
+ headers={'Content-Type': 'application/json'}, note='Getting bearer token',
+ data=json.dumps({
+ 'countryCode': country_code.upper(),
+ 'platform': 'browser',
+ 'platformFlagLabel': 'web',
+ 'language': 'en',
+ 'uuid': str(uuid.uuid4()),
+ 'carrierId': '0'
+ }).encode('utf-8'))['token']
def _real_extract(self, url):
url, idata = unsmuggle_url(url, {})
@@ -279,16 +277,16 @@ class ViuOTTIE(InfoExtractor):
query['area_id'] = area_id
product_data = self._download_json(
- 'http://www.viu.com/ott/%s/index.php' % country_code, video_id,
+ f'http://www.viu.com/ott/{country_code}/index.php', video_id,
'Downloading video info', query=query)['data']
video_data = product_data.get('current_product')
if not video_data:
- raise ExtractorError('This video is not available in your region.', expected=True)
+ self.raise_geo_restricted()
series_id = video_data.get('series_id')
if self._yes_playlist(series_id, video_id, idata):
- series = product_data.get('series', {})
+ series = product_data.get('series') or {}
product = series.get('product')
if product:
entries = []
@@ -296,14 +294,10 @@ class ViuOTTIE(InfoExtractor):
item_id = entry.get('product_id')
if not item_id:
continue
- item_id = compat_str(item_id)
entries.append(self.url_result(
- smuggle_url(
- 'http://www.viu.com/ott/%s/%s/vod/%s/' % (country_code, lang_code, item_id),
- {'force_noplaylist': True}), # prevent infinite recursion
- 'ViuOTT',
- item_id,
- entry.get('synopsis', '').strip()))
+ smuggle_url(f'http://www.viu.com/ott/{country_code}/{lang_code}/vod/{item_id}/',
+ {'force_noplaylist': True}),
+ ViuOTTIE, str(item_id), entry.get('synopsis', '').strip()))
return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
@@ -312,69 +306,65 @@ class ViuOTTIE(InfoExtractor):
'ccs_product_id': video_data['ccs_product_id'],
'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3',
}
- headers = {
- 'Referer': url,
- 'Origin': url,
- }
- try:
+
+ def download_playback():
stream_data = self._download_json(
- 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
- video_id, 'Downloading stream info', query=query, headers=headers)
- stream_data = self._detect_error(stream_data)['stream']
- except (ExtractorError, KeyError):
- stream_data = None
- if video_data.get('user_level', 0) > 0:
- user = self._login(country_code, video_id)
- if user:
- query['identity'] = user['identity']
- stream_data = self._download_json(
- 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
- video_id, 'Downloading stream info', query=query, headers=headers)
- stream_data = self._detect_error(stream_data).get('stream')
- else:
- # preview is limited to 3min for non-members
- # try to bypass the duration limit
- duration_limit = True
- query['duration'] = '180'
- stream_data = self._download_json(
- 'https://d1k2us671qcoau.cloudfront.net/distribute_web_%s.php' % country_code,
- video_id, 'Downloading stream info', query=query, headers=headers)
- try:
- stream_data = self._detect_error(stream_data)['stream']
- except (ExtractorError, KeyError): # if still not working, give up
- self._raise_login_required()
+ 'https://api-gateway-global.viu.com/api/playback/distribute',
+ video_id=video_id, query=query, fatal=False, note='Downloading stream info',
+ headers={
+ 'Authorization': f'Bearer {self._auth_codes[country_code]}',
+ 'Referer': url,
+ 'Origin': url
+ })
+ return self._detect_error(stream_data).get('stream')
+
+ if not self._auth_codes.get(country_code):
+ self._auth_codes[country_code] = self._get_token(country_code, video_id)
+ stream_data = None
+ try:
+ stream_data = download_playback()
+ except (ExtractorError, KeyError):
+ token = self._login(country_code, video_id)
+ if token is not None:
+ query['identity'] = token
+ else:
+ # preview is limited to 3min for non-members. But we can try to bypass it
+ duration_limit, query['duration'] = True, '180'
+ try:
+ stream_data = download_playback()
+ except (ExtractorError, KeyError):
+ if token is not None:
+ raise
+ self.raise_login_required(method='password')
if not stream_data:
raise ExtractorError('Cannot get stream info', expected=True)
- stream_sizes = stream_data.get('size', {})
formats = []
- for vid_format, stream_url in stream_data.get('url', {}).items():
- height = int_or_none(self._search_regex(
- r's(\d+)p', vid_format, 'height', default=None))
+ for vid_format, stream_url in (stream_data.get('url') or {}).items():
+ height = int(self._search_regex(r's(\d+)p', vid_format, 'height', default=None))
# bypass preview duration limit
if duration_limit:
- stream_url = compat_urlparse.urlparse(stream_url)
- query = dict(compat_urlparse.parse_qsl(stream_url.query, keep_blank_values=True))
- time_duration = int_or_none(video_data.get('time_duration'))
+ stream_url = urllib.parse.urlparse(stream_url)
query.update({
- 'duration': time_duration if time_duration > 0 else '9999999',
+ 'duration': video_data.get('time_duration') or '9999999',
'duration_start': '0',
})
- stream_url = stream_url._replace(query=compat_urlparse.urlencode(query)).geturl()
+ stream_url = stream_url._replace(query=urllib.parse.urlencode(dict(
+ urllib.parse.parse_qsl(stream_url.query, keep_blank_values=True)))).geturl()
formats.append({
'format_id': vid_format,
'url': stream_url,
'height': height,
'ext': 'mp4',
- 'filesize': int_or_none(stream_sizes.get(vid_format))
+ 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
})
self._sort_formats(formats)
subtitles = {}
- for sub in video_data.get('subtitle', []):
+ for sub in video_data.get('subtitle') or []:
sub_url = sub.get('url')
if not sub_url:
continue
@@ -383,17 +373,16 @@ class ViuOTTIE(InfoExtractor):
'ext': 'srt',
})
- title = video_data['synopsis'].strip()
-
+ title = strip_or_none(video_data.get('synopsis'))
return {
'id': video_id,
'title': title,
'description': video_data.get('description'),
- 'series': product_data.get('series', {}).get('name'),
+ 'series': try_get(product_data, lambda x: x['series']['name']),
'episode': title,
'episode_number': int_or_none(video_data.get('number')),
'duration': int_or_none(stream_data.get('duration')),
- 'thumbnail': video_data.get('cover_image_url'),
+ 'thumbnail': url_or_none(video_data.get('cover_image_url')),
'formats': formats,
'subtitles': subtitles,
}
diff --git a/yt_dlp/extractor/wasdtv.py b/yt_dlp/extractor/wasdtv.py
new file mode 100644
index 000000000..38c10dc62
--- /dev/null
+++ b/yt_dlp/extractor/wasdtv.py
@@ -0,0 +1,161 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ traverse_obj,
+ try_get,
+)
+
+
+class WASDTVBaseIE(InfoExtractor):
+
+ def _fetch(self, path, video_id, description, query={}):
+ response = self._download_json(
+ f'https://wasd.tv/api/{path}', video_id, query=query,
+ note=f'Downloading {description} metadata',
+ errnote=f'Unable to download {description} metadata')
+ error = response.get('error')
+ if error:
+ raise ExtractorError(f'{self.IE_NAME} returned error: {error}', expected=True)
+ return response.get('result')
+
+ def _extract_thumbnails(self, thumbnails_dict):
+ return [{
+ 'url': url,
+ 'preference': index,
+ } for index, url in enumerate(
+ traverse_obj(thumbnails_dict, (('small', 'medium', 'large'),))) if url]
+
+ def _real_extract(self, url):
+ container = self._get_container(url)
+ stream = traverse_obj(container, ('media_container_streams', 0))
+ media = try_get(stream, lambda x: x['stream_media'][0])
+ if not media:
+ raise ExtractorError('Can not extract media data.', expected=True)
+ media_meta = media.get('media_meta')
+ media_url, is_live = self._get_media_url(media_meta)
+ video_id = media.get('media_id') or container.get('media_container_id')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(media_url, video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': str(video_id),
+ 'title': container.get('media_container_name') or self._og_search_title(self._download_webpage(url, video_id)),
+ 'description': container.get('media_container_description'),
+ 'thumbnails': self._extract_thumbnails(media_meta.get('media_preview_images')),
+ 'timestamp': parse_iso8601(container.get('created_at')),
+ 'view_count': int_or_none(stream.get('stream_current_viewers' if is_live else 'stream_total_viewers')),
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _get_container(self, url):
+ raise NotImplementedError('Subclass for get media container')
+
+ def _get_media_url(self, media_meta):
+ raise NotImplementedError('Subclass for get media url')
+
+
+class WASDTVStreamIE(WASDTVBaseIE):
+ IE_NAME = 'wasdtv:stream'
+ _VALID_URL = r'https?://wasd\.tv/(?P<id>[^/#?]+)$'
+ _TESTS = [{
+ 'url': 'https://wasd.tv/24_7',
+ 'info_dict': {
+ 'id': '559738',
+ 'ext': 'mp4',
+ 'title': 'Live 24/7 Music',
+ 'description': '24&#x2F;7 Music',
+ 'timestamp': int,
+ 'upload_date': r're:^\d{8}$',
+ 'is_live': True,
+ 'view_count': int,
+ },
+ }]
+
+ def _get_container(self, url):
+ nickname = self._match_id(url)
+ channel = self._fetch(f'channels/nicknames/{nickname}', video_id=nickname, description='channel')
+ channel_id = channel.get('channel_id')
+ containers = self._fetch(
+ 'v2/media-containers', channel_id, 'running media containers',
+ query={
+ 'channel_id': channel_id,
+ 'media_container_type': 'SINGLE',
+ 'media_container_status': 'RUNNING',
+ })
+ if not containers:
+ raise ExtractorError(f'{nickname} is offline', expected=True)
+ return containers[0]
+
+ def _get_media_url(self, media_meta):
+ return media_meta['media_url'], True
+
+
+class WASDTVRecordIE(WASDTVBaseIE):
+ IE_NAME = 'wasdtv:record'
+ _VALID_URL = r'https?://wasd\.tv/[^/#?]+/videos\?record=(?P<id>\d+)$'
+ _TESTS = [{
+ 'url': 'https://wasd.tv/spacemita/videos?record=907755',
+ 'md5': 'c9899dd85be4cc997816ff9f9ca516ce',
+ 'info_dict': {
+ 'id': '906825',
+ 'ext': 'mp4',
+ 'title': 'Музыкальный',
+ 'description': 'md5:f510388d929ff60ae61d4c3cab3137cc',
+ 'timestamp': 1645812079,
+ 'upload_date': '20220225',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'is_live': False,
+ 'view_count': int,
+ },
+ }]
+
+ def _get_container(self, url):
+ container_id = self._match_id(url)
+ return self._fetch(
+ f'v2/media-containers/{container_id}', container_id, 'media container')
+
+ def _get_media_url(self, media_meta):
+ media_archive_url = media_meta.get('media_archive_url')
+ if media_archive_url:
+ return media_archive_url, False
+ return media_meta['media_url'], True
+
+
+class WASDTVClipIE(WASDTVBaseIE):
+ IE_NAME = 'wasdtv:clip'
+ _VALID_URL = r'https?://wasd\.tv/[^/#?]+/clips\?clip=(?P<id>\d+)$'
+ _TESTS = [{
+ 'url': 'https://wasd.tv/spacemita/clips?clip=26804',
+ 'md5': '818885e720143d7a4e776ff66fcff148',
+ 'info_dict': {
+ 'id': '26804',
+ 'ext': 'mp4',
+ 'title': 'Пуш флексит на голове стримера',
+ 'timestamp': 1646682908,
+ 'upload_date': '20220307',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'view_count': int,
+ },
+ }]
+
+ def _real_extract(self, url):
+ clip_id = self._match_id(url)
+ clip = self._fetch(f'v2/clips/{clip_id}', video_id=clip_id, description='clip')
+ clip_data = clip.get('clip_data')
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(clip_data.get('url'), video_id=clip_id, ext='mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': clip_id,
+ 'title': clip.get('clip_title') or self._og_search_title(self._download_webpage(url, clip_id, fatal=False)),
+ 'thumbnails': self._extract_thumbnails(clip_data.get('preview')),
+ 'timestamp': parse_iso8601(clip.get('created_at')),
+ 'view_count': int_or_none(clip.get('clip_views_count')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d74d5b0e9..19b4985f6 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -217,15 +217,35 @@ INNERTUBE_CLIENTS = {
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 2
- }
+ },
+ # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
+ # See: https://github.com/zerodytrash/YouTube-Internal-Clients
+ 'tv_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
+ 'clientVersion': '2.0',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 85
+ },
}
+def _split_innertube_client(client_name):
+ variant, *base = client_name.rsplit('.', 1)
+ if base:
+ return variant, base[0], variant
+ base, *variant = client_name.split('_', 1)
+ return client_name, base, variant[0] if variant else None
+
+
def build_innertube_clients():
THIRD_PARTY = {
- 'embedUrl': 'https://google.com', # Can be any valid URL
+ 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL
}
- BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
+ BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -234,15 +254,15 @@ def build_innertube_clients():
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
- base_client, *variant = client.split('_')
+ _, base_client, variant = _split_innertube_client(client)
ytcfg['priority'] = 10 * priority(base_client)
if not variant:
- INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
- agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
- agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
- agegate_ytcfg['priority'] -= 1
- elif variant == ['embedded']:
+ INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg)
+ embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
+ embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
+ embedscreen['priority'] -= 3
+ elif variant == 'embedded':
ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
ytcfg['priority'] -= 2
else:
@@ -807,6 +827,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
description = self._get_text(renderer, 'descriptionSnippet')
duration = parse_duration(self._get_text(
renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text')))
+ if duration is None:
+ duration = parse_duration(self._search_regex(
+ r'(?i)(ago)(?!.*\1)\s+(?P<duration>[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$',
+ traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
+ video_id, default=None, group='duration'))
+
view_count = self._get_count(renderer, 'viewCountText')
uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
@@ -818,12 +844,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str)
badges = self._extract_badges(renderer)
thumbnails = self._extract_thumbnails(renderer, 'thumbnail')
+ navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
+ renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str))
+ url = f'https://www.youtube.com/watch?v={video_id}'
+ if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url):
+ url = f'https://www.youtube.com/shorts/{video_id}'
return {
'_type': 'url',
'ie_key': YoutubeIE.ie_key(),
'id': video_id,
- 'url': f'https://www.youtube.com/watch?v={video_id}',
+ 'url': url,
'title': title,
'description': description,
'duration': duration,
@@ -2940,13 +2971,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE,
video_id, 'initial player response')
- original_clients = clients
+ all_clients = set(clients)
clients = clients[::-1]
prs = []
- def append_client(client_name):
- if client_name in INNERTUBE_CLIENTS and client_name not in original_clients:
- clients.append(client_name)
+ def append_client(*client_names):
+ """ Append the first client name that exists but not already used """
+ for client_name in client_names:
+ actual_client = _split_innertube_client(client_name)[0]
+ if actual_client in INNERTUBE_CLIENTS:
+ if actual_client not in all_clients:
+ clients.append(client_name)
+ all_clients.add(actual_client)
+ return
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
@@ -2961,7 +2998,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
tried_iframe_fallback = False
player_url = None
while clients:
- client = clients.pop()
+ client, base_client, variant = _split_innertube_client(clients.pop())
player_ytcfg = master_ytcfg if client == 'web' else {}
if 'configs' not in self._configuration_arg('player_skip'):
player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
@@ -2989,10 +3026,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
- if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated:
- append_client(client.replace('_agegate', '_creator'))
+ if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
+ append_client(f'{base_client}_creator')
elif self._is_agegated(pr):
- append_client(f'{client}_agegate')
+ if variant == 'tv_embedded':
+ append_client(f'{base_client}_embedded')
+ elif not variant:
+ append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded')
if last_error:
if not len(prs):
@@ -3013,7 +3053,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
for fmt in streaming_formats:
- if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
+ if fmt.get('targetDurationSec'):
continue
itag = str_or_none(fmt.get('itag'))
@@ -3095,6 +3135,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'fps': int_or_none(fmt.get('fps')) or None,
'height': height,
'quality': q(quality),
+ 'has_drm': bool(fmt.get('drmFamilies')),
'tbr': tbr,
'url': fmt_url,
'width': int_or_none(fmt.get('width')),
@@ -3468,6 +3509,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
subtitles, automatic_captions = {}, {}
for lang_code, caption_track in captions.items():
base_url = caption_track.get('baseUrl')
+ orig_lang = parse_qs(base_url).get('lang', [None])[-1]
if not base_url:
continue
lang_name = self._get_text(caption_track, 'name', max_runs=1)
@@ -3481,19 +3523,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for trans_code, trans_name in translation_languages.items():
if not trans_code:
continue
+ orig_trans_code = trans_code
if caption_track.get('kind') != 'asr':
+ if 'translated_subs' in self._configuration_arg('skip'):
+ continue
trans_code += f'-{lang_code}'
trans_name += format_field(lang_name, template=' from %s')
# Add an "-orig" label to the original language so that it can be distinguished.
# The subs are returned without "-orig" as well for compatibility
- if lang_code == f'a-{trans_code}':
+ if lang_code == f'a-{orig_trans_code}':
process_language(
automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
# Setting tlang=lang returns damaged subtitles.
- # Not using lang_code == f'a-{trans_code}' here for future-proofing
- orig_lang = parse_qs(base_url).get('lang', [None])[-1]
process_language(automatic_captions, base_url, trans_code, trans_name,
- {} if orig_lang == trans_code else {'tlang': trans_code})
+ {} if orig_lang == orig_trans_code else {'tlang': trans_code})
info['automatic_captions'] = automatic_captions
info['subtitles'] = subtitles
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 9f6b45ec6..936cc8b6f 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -334,10 +334,10 @@ def create_parser():
action='callback', callback=_set_from_options_callback,
callback_kwargs={
'allowed_values': {
- 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
+ 'filename', 'filename-sanitization', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles',
'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge',
'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata',
- 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs',
+ 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', 'no-certifi',
}, 'aliases': {
'youtube-dl': ['-multistreams', 'all'],
'youtube-dlc': ['-no-youtube-channel-redirect', '-no-live-chat', 'all'],
@@ -461,19 +461,18 @@ def create_parser():
metavar='COUNT', dest='max_views', default=None, type=int,
help=optparse.SUPPRESS_HELP)
selection.add_option(
- '--match-filter',
- metavar='FILTER', dest='match_filter', default=None,
+ '--match-filters',
+ metavar='FILTER', dest='match_filter', action='append',
help=(
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
'number or a string using the operators defined in "Filtering formats". '
- 'You can also simply specify a field to match if the field is present '
- 'and "!field" to check if the field is not present. In addition, '
- 'Python style regular expression matching can be done using "~=", '
- 'and multiple filters can be checked with "&". '
- 'Use a "\\" to escape "&" or quotes if needed. Eg: --match-filter '
- '"!is_live & like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
- 'matches only videos that are not live, has a like count more than 100 '
- '(or the like field is not available), and also has a description '
+ 'You can also simply specify a field to match if the field is present, '
+ 'use "!field" to check if the field is not present, and "&" to check multiple conditions. '
+ 'Use a "\\" to escape "&" or quotes if needed. If used multiple times, '
+ 'the filter matches if atleast one of the conditions are met. Eg: --match-filter '
+ '!is_live --match-filter "like_count>?100 & description~=\'(?i)\\bcats \\& dogs\\b\'" '
+ 'matches only videos that are not live OR those that have a like count more than 100 '
+ '(or the like field is not available) and also has a description '
'that contains the phrase "cats & dogs" (ignoring case)'))
selection.add_option(
'--no-match-filter',
@@ -1312,7 +1311,7 @@ def create_parser():
postproc.add_option(
'--audio-quality', metavar='QUALITY',
dest='audioquality', default='5',
- help='Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)')
+ help='Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)')
postproc.add_option(
'--remux-video',
metavar='FORMAT', dest='remuxvideo', default=None,
diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
index d761c9303..8420ee864 100644
--- a/yt_dlp/postprocessor/common.py
+++ b/yt_dlp/postprocessor/common.py
@@ -1,13 +1,18 @@
from __future__ import unicode_literals
import functools
+import itertools
+import json
import os
+import time
+import urllib.error
-from ..compat import compat_str
from ..utils import (
_configuration_args,
encodeFilename,
+ network_exceptions,
PostProcessingError,
+ sanitized_Request,
write_string,
)
@@ -63,7 +68,7 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
@classmethod
def pp_key(cls):
name = cls.__name__[:-2]
- return compat_str(name[6:]) if name[:6].lower() == 'ffmpeg' else name
+ return name[6:] if name[:6].lower() == 'ffmpeg' else name
def to_screen(self, text, prefix=True, *args, **kwargs):
tag = '[%s] ' % self.PP_NAME if prefix else ''
@@ -180,6 +185,28 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
progress_template.get('postprocess-title') or 'yt-dlp %(progress._default_template)s',
progress_dict))
+ def _download_json(self, url, *, expected_http_errors=(404,)):
+ # While this is not an extractor, it behaves similar to one and
+ # so obey extractor_retries and sleep_interval_requests
+ max_retries = self.get_param('extractor_retries', 3)
+ sleep_interval = self.get_param('sleep_interval_requests') or 0
+
+ self.write_debug(f'{self.PP_NAME} query: {url}')
+ for retries in itertools.count():
+ try:
+ rsp = self._downloader.urlopen(sanitized_Request(url))
+ return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
+ except network_exceptions as e:
+ if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors:
+ return None
+ if retries < max_retries:
+ self.report_warning(f'{e}. Retrying...')
+ if sleep_interval > 0:
+ self.to_screen(f'Sleeping {sleep_interval} seconds ...')
+ time.sleep(sleep_interval)
+ continue
+ raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
+
class AudioConversionError(PostProcessingError):
pass
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 234ddeff0..0b18e8774 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -86,13 +86,18 @@ class FFmpegPostProcessor(PostProcessor):
@staticmethod
def get_versions(downloader=None):
- return FFmpegPostProcessor.get_version_and_features(downloader)[0]
+ return FFmpegPostProcessor.get_versions_and_features(downloader)[0]
+
+ _version_cache, _features_cache = {}, {}
def _determine_executables(self):
programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
def get_ffmpeg_version(path, prog):
- out = _get_exe_version_output(path, ['-bsfs'])
+ if path in self._version_cache:
+ self._versions[path], self._features = self._version_cache[path], self._features_cache.get(path, {})
+ return
+ out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug)
ver = detect_exe_version(out) if out else False
if ver:
regexs = [
@@ -104,13 +109,13 @@ class FFmpegPostProcessor(PostProcessor):
mobj = re.match(regex, ver)
if mobj:
ver = mobj.group(1)
- self._versions[prog] = ver
+ self._versions[prog] = self._version_cache[path] = ver
if prog != 'ffmpeg' or not out:
return
mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out)
lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None
- self._features = {
+ self._features = self._features_cache[path] = {
'fdk': '--enable-libfdk-aac' in out,
'setts': 'setts' in out.splitlines(),
'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False),
@@ -148,26 +153,15 @@ class FFmpegPostProcessor(PostProcessor):
self._paths[basename] = location
self._versions = {}
- for p in programs:
- get_ffmpeg_version(self._paths[p], p)
-
+ executables = {'basename': ('ffmpeg', 'avconv'), 'probe_basename': ('ffprobe', 'avprobe')}
if prefer_ffmpeg is False:
- prefs = ('avconv', 'ffmpeg')
- else:
- prefs = ('ffmpeg', 'avconv')
- for p in prefs:
- if self._versions[p]:
- self.basename = p
- break
-
- if prefer_ffmpeg is False:
- prefs = ('avprobe', 'ffprobe')
- else:
- prefs = ('ffprobe', 'avprobe')
- for p in prefs:
- if self._versions[p]:
- self.probe_basename = p
- break
+ executables = {k: v[::-1] for k, v in executables.items()}
+ for var, prefs in executables.items():
+ for p in prefs:
+ get_ffmpeg_version(self._paths[p], p)
+ if self._versions[p]:
+ setattr(self, var, p)
+ break
if self.basename == 'avconv':
self.deprecation_warning(
@@ -553,9 +547,9 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor):
@staticmethod
def _options(target_ext):
+ yield from FFmpegPostProcessor.stream_copy_opts(False)
if target_ext == 'avi':
- return ['-c:v', 'libxvid', '-vtag', 'XVID']
- return []
+ yield from ('-c:v', 'libxvid', '-vtag', 'XVID')
@PostProcessor._restrict_to(images=False)
def run(self, info):
@@ -1129,6 +1123,8 @@ class FFmpegConcatPP(FFmpegPostProcessor):
super().__init__(downloader)
def concat_files(self, in_files, out_file):
+ if not self._downloader._ensure_dir_exists(out_file):
+ return
if len(in_files) == 1:
if os.path.realpath(in_files[0]) != os.path.realpath(out_file):
self.to_screen(f'Moving "{in_files[0]}" to "{out_file}"')
diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py
index e7e04e86e..7943014e2 100644
--- a/yt_dlp/postprocessor/sponsorblock.py
+++ b/yt_dlp/postprocessor/sponsorblock.py
@@ -1,12 +1,9 @@
from hashlib import sha256
-import itertools
import json
import re
-import time
from .ffmpeg import FFmpegPostProcessor
-from ..compat import compat_urllib_parse_urlencode, compat_HTTPError
-from ..utils import PostProcessingError, network_exceptions, sanitized_Request
+from ..compat import compat_urllib_parse_urlencode
class SponsorBlockPP(FFmpegPostProcessor):
@@ -94,28 +91,7 @@ class SponsorBlockPP(FFmpegPostProcessor):
'categories': json.dumps(self._categories),
'actionTypes': json.dumps(['skip', 'poi'])
})
- self.write_debug(f'SponsorBlock query: {url}')
- for d in self._get_json(url):
+ for d in self._download_json(url) or []:
if d['videoID'] == video_id:
return d['segments']
return []
-
- def _get_json(self, url):
- # While this is not an extractor, it behaves similar to one and
- # so obey extractor_retries and sleep_interval_requests
- max_retries = self.get_param('extractor_retries', 3)
- sleep_interval = self.get_param('sleep_interval_requests') or 0
- for retries in itertools.count():
- try:
- rsp = self._downloader.urlopen(sanitized_Request(url))
- return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))
- except network_exceptions as e:
- if isinstance(e, compat_HTTPError) and e.code == 404:
- return []
- if retries < max_retries:
- self.report_warning(f'{e}. Retrying...')
- if sleep_interval > 0:
- self.to_screen(f'Sleeping {sleep_interval} seconds ...')
- time.sleep(sleep_interval)
- continue
- raise PostProcessingError(f'Unable to communicate with SponsorBlock API: {e}')
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index da6f27801..62a1800d4 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -85,6 +85,12 @@ from .socks import (
sockssocket,
)
+try:
+ import certifi
+ has_certifi = True
+except ImportError:
+ has_certifi = False
+
def register_socks_protocols():
# "Register" SOCKS protocols
@@ -153,7 +159,6 @@ if compat_brotli:
std_headers = {
'User-Agent': random_user_agent(),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Encoding': ', '.join(SUPPORTED_ENCODINGS),
'Accept-Language': 'en-us,en;q=0.5',
'Sec-Fetch-Mode': 'navigate',
}
@@ -700,36 +705,40 @@ def timeconvert(timestr):
return timestamp
-def sanitize_filename(s, restricted=False, is_id=False):
+def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
"""Sanitizes a string so it could be used as part of a filename.
- If restricted is set, use a stricter subset of allowed characters.
- Set is_id if this is not an arbitrary string, but an ID that should be kept
- if possible.
+ @param restricted Use a stricter subset of allowed characters
+ @param is_id Whether this is an ID that should be kept unchanged if possible.
+ If unset, yt-dlp's new sanitization rules are in effect
"""
+ if s == '':
+ return ''
+
def replace_insane(char):
if restricted and char in ACCENT_CHARS:
return ACCENT_CHARS[char]
elif not restricted and char == '\n':
- return ' '
+ return '\0 '
elif char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
return '' if restricted else '\''
elif char == ':':
- return '_-' if restricted else ' -'
+ return '\0_\0-' if restricted else '\0 \0-'
elif char in '\\/|*<>':
- return '_'
- if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
- return '_'
- if restricted and ord(char) > 127:
- return '_'
+ return '\0_'
+ if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
+ return '\0_'
return char
- if s == '':
- return ''
- # Handle timestamps
- s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
+ s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))
+ if is_id is NO_DEFAULT:
+ result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
+ STRIP_RE = '(?:\0.|[ _-])*'
+ result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
+ result = result.replace('\0', '') or '_'
+
if not is_id:
while '__' in result:
result = result.replace('__', '_')
@@ -1010,20 +1019,23 @@ def make_HTTPS_handler(params, **kwargs):
context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT
context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
if opts_check_certificate:
- try:
- context.load_default_certs()
- # Work around the issue in load_default_certs when there are bad certificates. See:
- # https://github.com/yt-dlp/yt-dlp/issues/1060,
- # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
- except ssl.SSLError:
- # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
- if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
- # Create a new context to discard any certificates that were already loaded
- context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
- context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
- for storename in ('CA', 'ROOT'):
- _ssl_load_windows_store_certs(context, storename)
- context.set_default_verify_paths()
+ if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
+ context.load_verify_locations(cafile=certifi.where())
+ else:
+ try:
+ context.load_default_certs()
+ # Work around the issue in load_default_certs when there are bad certificates. See:
+ # https://github.com/yt-dlp/yt-dlp/issues/1060,
+ # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
+ except ssl.SSLError:
+ # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
+ if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
+ # Create a new context to discard any certificates that were already loaded
+ context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+ context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
+ for storename in ('CA', 'ROOT'):
+ _ssl_load_windows_store_certs(context, storename)
+ context.set_default_verify_paths()
return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
@@ -1392,6 +1404,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
if h.capitalize() not in req.headers:
req.add_header(h, v)
+ if 'Accept-encoding' not in req.headers:
+ req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
+
req.headers = handle_youtubedl_headers(req.headers)
if sys.version_info < (2, 7) and '#' in req.get_full_url():
@@ -2629,23 +2644,23 @@ def parse_duration(s):
m = re.match(
r'''(?ix)(?:P?
(?:
- [0-9]+\s*y(?:ears?)?\s*
+ [0-9]+\s*y(?:ears?)?,?\s*
)?
(?:
- [0-9]+\s*m(?:onths?)?\s*
+ [0-9]+\s*m(?:onths?)?,?\s*
)?
(?:
- [0-9]+\s*w(?:eeks?)?\s*
+ [0-9]+\s*w(?:eeks?)?,?\s*
)?
(?:
- (?P<days>[0-9]+)\s*d(?:ays?)?\s*
+ (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
)?
T)?
(?:
- (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
+ (?P<hours>[0-9]+)\s*h(?:ours?)?,?\s*
)?
(?:
- (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
+ (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
)?
(?:
(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
@@ -2698,7 +2713,9 @@ def check_executable(exe, args=[]):
return exe
-def _get_exe_version_output(exe, args):
+def _get_exe_version_output(exe, args, *, to_screen=None):
+ if to_screen:
+ to_screen(f'Checking exe version: {shell_quote([exe] + args)}')
try:
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if yt-dlp is run in the background.
@@ -3090,16 +3107,16 @@ def try_get(src, getter, expected_type=None):
return v
+def filter_dict(dct, cndn=lambda _, v: v is not None):
+ return {k: v for k, v in dct.items() if cndn(k, v)}
+
+
def merge_dicts(*dicts):
merged = {}
for a_dict in dicts:
for k, v in a_dict.items():
- if v is None:
- continue
- if (k not in merged
- or (isinstance(v, compat_str) and v
- and isinstance(merged[k], compat_str)
- and not merged[k])):
+ if (v is not None and k not in merged
+ or isinstance(v, str) and merged[k] == ''):
merged[k] = v
return merged
@@ -3534,6 +3551,11 @@ def _match_one(filter_part, dct, incomplete):
'=': operator.eq,
}
+ if isinstance(incomplete, bool):
+ is_incomplete = lambda _: incomplete
+ else:
+ is_incomplete = lambda k: k in incomplete
+
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
@@ -3572,7 +3594,7 @@ def _match_one(filter_part, dct, incomplete):
if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
raise ValueError('Operator %s only supports string values!' % m['op'])
if actual_value is None:
- return incomplete or m['none_inclusive']
+ return is_incomplete(m['key']) or m['none_inclusive']
return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
UNARY_OPERATORS = {
@@ -3587,7 +3609,7 @@ def _match_one(filter_part, dct, incomplete):
if m:
op = UNARY_OPERATORS[m.group('op')]
actual_value = dct.get(m.group('key'))
- if incomplete and actual_value is None:
+ if is_incomplete(m.group('key')) and actual_value is None:
return True
return op(actual_value)
@@ -3595,24 +3617,29 @@ def _match_one(filter_part, dct, incomplete):
def match_str(filter_str, dct, incomplete=False):
- """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
- When incomplete, all conditions passes on missing fields
+ """ Filter a dictionary with a simple string syntax.
+ @returns Whether the filter passes
+ @param incomplete Set of keys that is expected to be missing from dct.
+ Can be True/False to indicate all/none of the keys may be missing.
+ All conditions on incomplete keys pass if the key is missing
"""
return all(
_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
for filter_part in re.split(r'(?<!\\)&', filter_str))
-def match_filter_func(filter_str):
- if filter_str is None:
+def match_filter_func(filters):
+ if not filters:
return None
+ filters = variadic(filters)
def _match_func(info_dict, *args, **kwargs):
- if match_str(filter_str, info_dict, *args, **kwargs):
+ if any(match_str(f, info_dict, *args, **kwargs) for f in filters):
return None
else:
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ video_title = info_dict.get('title') or info_dict.get('id') or 'video'
+ filter_str = ') | ('.join(map(str.strip, filters))
+ return f'{video_title} does not pass filter ({filter_str}), skipping ..'
return _match_func
@@ -5423,15 +5450,18 @@ class Config:
class WebSocketsWrapper():
"""Wraps websockets module to use in non-async scopes"""
- def __init__(self, url, headers=None):
+ def __init__(self, url, headers=None, connect=True):
self.loop = asyncio.events.new_event_loop()
self.conn = compat_websockets.connect(
url, extra_headers=headers, ping_interval=None,
close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
+ if connect:
+ self.__enter__()
atexit.register(self.__exit__, None, None, None)
def __enter__(self):
- self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
+ if not self.pool:
+ self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
return self
def send(self, *args):
@@ -5491,3 +5521,11 @@ has_websockets = bool(compat_websockets)
def merge_headers(*dicts):
"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
+
+
+class classproperty:
+ def __init__(self, f):
+ self.f = f
+
+ def __get__(self, _, cls):
+ return self.f(cls)