diff options
32 files changed, 1020 insertions, 1910 deletions
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py index a144499bb..4ef96510f 100644 --- a/devscripts/prepare_manpage.py +++ b/devscripts/prepare_manpage.py @@ -13,12 +13,14 @@ PREFIX = r'''%YT-DLP(1) # NAME -youtube\-dl \- download videos from youtube.com or other video platforms +yt\-dlp \- A youtube-dl fork with additional features and patches # SYNOPSIS **yt-dlp** \[OPTIONS\] URL [URL...] +# DESCRIPTION + ''' @@ -33,47 +35,63 @@ def main(): with io.open(README_FILE, encoding='utf-8') as f: readme = f.read() - readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme) - readme = re.sub(r'\s+yt-dlp \[OPTIONS\] URL \[URL\.\.\.\]', '', readme) - readme = PREFIX + readme - + readme = filter_excluded_sections(readme) + readme = move_sections(readme) readme = filter_options(readme) with io.open(outfile, 'w', encoding='utf-8') as outf: - outf.write(readme) + outf.write(PREFIX + readme) + + +def filter_excluded_sections(readme): + EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->') + EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->') + return re.sub( + rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n', + '', readme) + + +def move_sections(readme): + MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->' + sections = re.findall(r'(?m)^%s$' % ( + re.escape(MOVE_TAG_TEMPLATE).replace(r'\%', '%') % '(.+)'), readme) + + for section_name in sections: + move_tag = MOVE_TAG_TEMPLATE % section_name + if readme.count(move_tag) > 1: + raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected') + + sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme) + if len(sections) < 1: + raise Exception(f'The section {section_name} does not exist') + elif len(sections) > 1: + raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled') + + readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1) + return readme def filter_options(readme): - ret = '' - in_options = False - for line in readme.split('\n'): - if line.startswith('# '): - if line[2:].startswith('OPTIONS'): - in_options = True - else: - in_options = False - - if in_options: - if line.lstrip().startswith('-'): - split = re.split(r'\s{2,}', line.lstrip()) - # Description string may start with `-` as well. If there is - # only one piece then it's a description bit not an option. - if len(split) > 1: - option, description = split - split_option = option.split(' ') - - if not split_option[-1].startswith('-'): # metavar - option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]]) - - # Pandoc's definition_lists. See http://pandoc.org/README.html - # for more information. - ret += '\n%s\n: %s\n' % (option, description) - continue - ret += line.lstrip() + '\n' - else: - ret += line + '\n' - - return ret + section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) + options = '# OPTIONS\n' + for line in section.split('\n')[1:]: + if line.lstrip().startswith('-'): + split = re.split(r'\s{2,}', line.lstrip()) + # Description string may start with `-` as well. If there is + # only one piece then it's a description bit not an option. + if len(split) > 1: + option, description = split + split_option = option.split(' ') + + if not split_option[-1].startswith('-'): # metavar + option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*']) + + # Pandoc's definition_lists. See http://pandoc.org/README.html + options += f'\n{option}\n: {description}\n' + continue + options += line.lstrip() + '\n' + + return readme.replace(section, options, 1) if __name__ == '__main__': @@ -40,7 +40,7 @@ def main(): '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', '--noconfirm', - *dependancy_options(), + *dependency_options(), *opts, 'yt_dlp/__main__.py', ] @@ -73,11 +73,11 @@ def version_to_list(version): return list(map(int, version_list)) + [0] * (4 - len(version_list)) -def dependancy_options(): - dependancies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') +def dependency_options(): + dependencies = [pycryptodome_module(), 'mutagen'] + collect_submodules('websockets') excluded_modules = ['test', 'ytdlp_plugins', 'youtube-dl', 'youtube-dlc'] - yield from (f'--hidden-import={module}' for module in dependancies) + yield from (f'--hidden-import={module}' for module in dependencies) yield from (f'--exclude-module={module}' for module in excluded_modules) diff --git a/test/helper.py b/test/helper.py index 9fb4f2120..b63a5c897 100644 --- a/test/helper.py +++ b/test/helper.py @@ -194,20 +194,8 @@ def expect_dict(self, got_dict, expected_dict): expect_value(self, got, expected, info_field) -def expect_info_dict(self, got_dict, expected_dict): - expect_dict(self, got_dict, expected_dict) - # Check for the presence of mandatory fields - if got_dict.get('_type') not in ('playlist', 'multi_video'): - mandatory_fields = ['id', 'title'] - if expected_dict.get('ext'): - mandatory_fields.extend(('url', 'ext')) - for key in mandatory_fields: - self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) - # Check for mandatory fields that are automatically set by YoutubeDL - for key in ['webpage_url', 'extractor', 'extractor_key']: - self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) - - ignored_fields = ( +def sanitize_got_info_dict(got_dict): + IGNORED_FIELDS = ( # Format keys 'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize', @@ -222,14 +210,14 @@ def expect_info_dict(self, got_dict, expected_dict): 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', # Auto-generated - 'playlist', 'format_index', 'webpage_url', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'fulltitle', - 'extractor', 'extractor_key', 'original_url', 'webpage_url_basename', 'webpage_url_domain', 'filepath', 'infojson_filename', + 'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', + 'fulltitle', 'extractor', 'extractor_key', 'filepath', 'infojson_filename', 'original_url', # Only live_status needs to be checked 'is_live', 'was_live', ) - ignored_prefixes = ('', 'playlist', 'requested') + IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage') def sanitize(key, value): if isinstance(value, str) and len(value) > 100: @@ -240,14 +228,32 @@ def expect_info_dict(self, got_dict, expected_dict): test_info_dict = { key: sanitize(key, value) for key, value in got_dict.items() - if value is not None and key not in ignored_fields and not any( - key.startswith(f'{prefix}_') for prefix in ignored_prefixes) + if value is not None and key not in IGNORED_FIELDS and not any( + key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES) } # display_id may be generated from id if test_info_dict.get('display_id') == test_info_dict['id']: test_info_dict.pop('display_id') + return test_info_dict + + +def expect_info_dict(self, got_dict, expected_dict): + expect_dict(self, got_dict, expected_dict) + # Check for the presence of mandatory fields + if got_dict.get('_type') not in ('playlist', 'multi_video'): + mandatory_fields = ['id', 'title'] + if expected_dict.get('ext'): + mandatory_fields.extend(('url', 'ext')) + for key in mandatory_fields: + self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key) + # Check for mandatory fields that are automatically set by YoutubeDL + for key in ['webpage_url', 'extractor', 'extractor_key']: + self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) + + test_info_dict = sanitize_got_info_dict(got_dict) + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index cbca22c91..cf06dbde4 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -99,10 +99,10 @@ class TestInfoExtractor(unittest.TestCase): self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_search_json_ld_realworld(self): - # https://github.com/ytdl-org/youtube-dl/issues/23306 - expect_dict( - self, - self.ie._search_json_ld(r'''<script type="application/ld+json"> + _TESTS = [ + # https://github.com/ytdl-org/youtube-dl/issues/23306 + ( + r'''<script type="application/ld+json"> { "@context": "http://schema.org/", "@type": "VideoObject", @@ -135,17 +135,86 @@ class TestInfoExtractor(unittest.TestCase): "name": "Kleio Valentien", "url": "https://www.eporner.com/pornstar/kleio-valentien/" }]} -</script>''', None), - { - 'title': '1 On 1 With Kleio', - 'description': 'Kleio Valentien', - 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', - 'timestamp': 1449347075, - 'duration': 743.0, - 'view_count': 1120958, - 'width': 1920, - 'height': 1080, - }) + </script>''', + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }, + {}, + ), + ( + r'''<script type="application/ld+json"> + { + "@context": "https://schema.org", + "@graph": [ + { + "@type": "NewsArticle", + "mainEntityOfPage": { + "@type": "WebPage", + "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn" + }, + "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", + "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", + "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.", + "image": { + "@type": "ImageObject", + "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg", + "width": 1100, + "height": 756 }, + "datePublished": "2021-11-10T08:50:00+03:00", + "dateModified": "2021-11-10T08:52:53+03:00", + "author": { + "@type": "Person", + "@id": "https://www.ant1news.gr/", + "name": "Ant1news", + "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", + "url": "https://www.ant1news.gr/" + }, + "publisher": { + "@type": "Organization", + "@id": "https://www.ant1news.gr#publisher", + "name": "Ant1news", + "url": "https://www.ant1news.gr", + "logo": { + "@type": "ImageObject", + "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", + "width": 400, + "height": 400 }, + "sameAs": [ + "https://www.facebook.com/Ant1news.gr", + "https://twitter.com/antennanews", + "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw", + "https://www.instagram.com/ant1news/" + ] + }, + + "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news", + + + "articleSection": "Κοινωνία" + } + ] + } + </script>''', + { + 'timestamp': 1636523400, + 'title': 'md5:91fe569e952e4d146485740ae927662b', + }, + {'expected_type': 'NewsArticle'}, + ), + ] + for html, expected_dict, search_json_ld_kwargs in _TESTS: + expect_dict( + self, + self.ie._search_json_ld(html, None, **search_json_ld_kwargs), + expected_dict + ) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 63ef50e1a..6c2530046 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -836,6 +836,11 @@ class TestYoutubeDL(unittest.TestCase): test('%(title3)s', ('foo/bar\\test', 'foo_bar_test')) test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep)) + # Replacement + test('%(id&foo)s.bar', 'foo.bar') + test('%(title&foo)s.bar', 'NA.bar') + test('%(title&foo|baz)s.bar', 'baz.bar') + def test_format_note(self): ydl = YoutubeDL() self.assertEqual(ydl._format_note({}), '') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 0a5a2611b..165f2ecc3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -5,7 +5,6 @@ from __future__ import absolute_import, unicode_literals import collections import contextlib -import copy import datetime import errno import fileinput @@ -144,6 +143,7 @@ from .downloader.rtmp import rtmpdump_version from .postprocessor import ( get_postprocessor, EmbedThumbnailPP, + FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, @@ -1054,7 +1054,8 @@ class YoutubeDL(object): (?P<fields>{field}) (?P<maths>(?:{math_op}{math_field})*) (?:>(?P<strf_format>.+?))? - (?P<alternate>(?<!\\),[^|)]+)? + (?P<alternate>(?<!\\),[^|&)]+)? + (?:&(?P<replacement>.*?))? (?:\|(?P<default>.*?))? $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE)) @@ -1105,7 +1106,7 @@ class YoutubeDL(object): def _dumpjson_default(obj): if isinstance(obj, (set, LazyList)): return list(obj) - raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable') + return repr(obj) def create_key(outer_mobj): if not outer_mobj.group('has_key'): @@ -1113,11 +1114,12 @@ class YoutubeDL(object): key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) initial_field = mobj.group('fields').split('.')[-1] if mobj else '' - value, default = None, na + value, replacement, default = None, None, na while mobj: mobj = mobj.groupdict() default = mobj['default'] if mobj['default'] is not None else default value = get_value(mobj) + replacement = mobj['replacement'] if value is None and mobj['alternate']: mobj = re.match(INTERNAL_FORMAT_RE, mobj['alternate'][1:]) else: @@ -1127,7 +1129,7 @@ class YoutubeDL(object): if fmt == 's' and value is not None and key in field_size_compat_map.keys(): fmt = '0{:d}d'.format(field_size_compat_map[key]) - value = default if value is None else value + value = default if value is None else value if replacement is None else replacement flags = outer_mobj.group('conversion') or '' str_fmt = f'{fmt[:-1]}s' @@ -2068,8 +2070,7 @@ class YoutubeDL(object): selector_1, selector_2 = map(_build_selector_function, selector.selector) def selector_function(ctx): - for pair in itertools.product( - selector_1(copy.deepcopy(ctx)), selector_2(copy.deepcopy(ctx))): + for pair in itertools.product(selector_1(ctx), selector_2(ctx)): yield _merge(pair) elif selector.type == SINGLE: # atom @@ -2139,7 +2140,7 @@ class YoutubeDL(object): filters = [self._build_format_filter(f) for f in selector.filters] def final_selector(ctx): - ctx_copy = copy.deepcopy(ctx) + ctx_copy = dict(ctx) for _filter in filters: ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) return selector_function(ctx_copy) @@ -2351,6 +2352,10 @@ class YoutubeDL(object): if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm')] + if info_dict.get('is_live'): + get_from_start = bool(self.params.get('live_from_start')) + formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] + if not formats: self.raise_no_formats(info_dict) @@ -2657,7 +2662,9 @@ class YoutubeDL(object): urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']]) self.write_debug('Invoking downloader on "%s"' % urls) - new_info = copy.deepcopy(self._copy_infodict(info)) + # Note: Ideally info should be a deep-copied so that hooks cannot modify it. + # But it may contain objects that are not deep-copyable + new_info = self._copy_infodict(info) if new_info.get('http_headers') is None: new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) @@ -2672,7 +2679,7 @@ class YoutubeDL(object): if self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() - if info_dict.get('is_live'): + if info_dict.get('is_live') and not self.params.get('live_from_start'): info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') # TODO: backward compatibility, to be removed @@ -2727,7 +2734,7 @@ class YoutubeDL(object): _infojson_written = self._write_info_json('video', info_dict, infofn) if _infojson_written: info_dict['infojson_filename'] = infofn - # For backward compatability, even though it was a private field + # For backward compatibility, even though it was a private field info_dict['__infojson_filename'] = infofn elif _infojson_written is None: return @@ -2886,15 +2893,22 @@ class YoutubeDL(object): dl_filename = existing_file(full_filename, temp_filename) info_dict['__real_download'] = False + downloaded = [] + merger = FFmpegMergerPP(self) + + fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') if dl_filename is not None: self.report_file_already_downloaded(dl_filename) - elif get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-'): + elif fd: + for f in requested_formats if fd != FFmpegFD else []: + f['filepath'] = fname = prepend_extension( + correct_ext(temp_filename, info_dict['ext']), + 'f%s' % f['format_id'], info_dict['ext']) + downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in requested_formats) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: - downloaded = [] - merger = FFmpegMergerPP(self) if self.params.get('allow_unplayable_formats'): self.report_warning( 'You have requested merging of multiple formats ' @@ -2906,7 +2920,7 @@ class YoutubeDL(object): 'The formats won\'t be merged.') if temp_filename == '-': - reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict) + reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) else 'but the formats are incompatible for simultaneous download' if merger.available else 'but ffmpeg is not installed') self.report_warning( @@ -2928,14 +2942,15 @@ class YoutubeDL(object): partial_success, real_download = self.dl(fname, new_info) info_dict['__real_download'] = info_dict['__real_download'] or real_download success = success and partial_success - if merger.available and not self.params.get('allow_unplayable_formats'): - info_dict['__postprocessors'].append(merger) - info_dict['__files_to_merge'] = downloaded - # Even if there were no downloads, it is being merged only now - info_dict['__real_download'] = True - else: - for file in downloaded: - files_to_move[file] = None + + if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): + info_dict['__postprocessors'].append(merger) + info_dict['__files_to_merge'] = downloaded + # Even if there were no downloads, it is being merged only now + info_dict['__real_download'] = True + else: + for file in downloaded: + files_to_move[file] = None else: # Just a single file dl_filename = existing_file(full_filename, temp_filename) @@ -3002,9 +3017,14 @@ class YoutubeDL(object): downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None downloader = downloader.__name__ if downloader else None - ffmpeg_fixup(info_dict.get('requested_formats') is None and downloader == 'HlsFD', - 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', - FFmpegFixupM3u8PP) + + if info_dict.get('requested_formats') is None: # Not necessary if doing merger + ffmpeg_fixup(downloader == 'HlsFD', + 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', + FFmpegFixupM3u8PP) + ffmpeg_fixup(info_dict.get('is_live') and downloader == 'DashSegmentsFD', + 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) + ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'WebSocketFragmentFD', 'Malformed duration detected', FFmpegFixupDurationPP) @@ -3101,10 +3121,17 @@ class YoutubeDL(object): k.startswith('_') or k in remove_keys or v in empty_values) else: reject = lambda k, v: k in remove_keys - filter_fn = lambda obj: ( - list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set)) - else obj if not isinstance(obj, dict) - else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v))) + + def filter_fn(obj): + if isinstance(obj, dict): + return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} + elif isinstance(obj, (list, tuple, set, LazyList)): + return list(map(filter_fn, obj)) + elif obj is None or isinstance(obj, (str, int, float, bool)): + return obj + else: + return repr(obj) + return filter_fn(info_dict) @staticmethod @@ -3682,7 +3709,7 @@ class YoutubeDL(object): self.write_debug(f'Skipping writing {label} thumbnail') return ret - for t in thumbnails[::-1]: + for idx, t in list(enumerate(thumbnails))[::-1]: thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) @@ -3703,6 +3730,7 @@ class YoutubeDL(object): ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: + thumbnails.pop(idx) self.report_warning(f'Unable to download {thumb_display_id}: {err}') if ret and not write_all: break diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index baba5411e..2a13f61c5 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -742,6 +742,7 @@ def _real_main(argv=None): 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, + 'live_from_start': opts.live_from_start, 'wait_for_video': opts.wait_for_video, 'mark_watched': opts.mark_watched, 'merge_output_format': opts.merge_output_format, diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 5270e8081..acc19f43a 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -12,10 +12,15 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N info_copy = info_dict.copy() info_copy['to_stdout'] = to_stdout - downloaders = [_get_suitable_downloader(info_copy, proto, params, default) - for proto in (protocol or info_copy['protocol']).split('+')] + protocols = (protocol or info_copy['protocol']).split('+') + downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols] + if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): return FFmpegFD + elif (set(downloaders) == {DashSegmentsFD} + and not (to_stdout and len(protocols) > 1) + and set(protocols) == {'http_dash_segments_generator'}): + return DashSegmentsFD elif len(downloaders) == 1: return downloaders[0] return None @@ -49,6 +54,7 @@ PROTOCOL_MAP = { 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, + 'http_dash_segments_generator': DashSegmentsFD, 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, @@ -63,6 +69,7 @@ def shorten_protocol_name(proto, simplify=False): 'm3u8_native': 'm3u8_n', 'rtmp_ffmpeg': 'rtmp_f', 'http_dash_segments': 'dash', + 'http_dash_segments_generator': 'dash_g', 'niconico_dmc': 'dmc', 'websocket_frag': 'WSfrag', } @@ -71,6 +78,7 @@ def shorten_protocol_name(proto, simplify=False): 'https': 'http', 'ftps': 'ftp', 'm3u8_native': 'm3u8', + 'http_dash_segments_generator': 'dash', 'rtmp_ffmpeg': 'rtmp', 'm3u8_frag_urls': 'm3u8', 'dash_frag_urls': 'dash', diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 6444ad692..8dd43f4fa 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -1,4 +1,5 @@ from __future__ import unicode_literals +import time from ..downloader import get_suitable_downloader from .fragment import FragmentFD @@ -15,27 +16,53 @@ class DashSegmentsFD(FragmentFD): FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): - if info_dict.get('is_live'): + if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}: self.report_error('Live DASH videos are not supported') - fragment_base_url = info_dict.get('fragment_base_url') - fragments = info_dict['fragments'][:1] if self.params.get( - 'test', False) else info_dict['fragments'] - + real_start = time.time() real_downloader = get_suitable_downloader( info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) - ctx = { - 'filename': filename, - 'total_frags': len(fragments), - } + requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] + args = [] + for fmt in requested_formats or [info_dict]: + try: + fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) + except TypeError: + fragment_count = None + ctx = { + 'filename': fmt.get('filepath') or filename, + 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'), + 'total_frags': fragment_count, + } + + if real_downloader: + self._prepare_external_frag_download(ctx) + else: + self._prepare_and_start_frag_download(ctx, fmt) + ctx['start'] = real_start + + fragments_to_download = self._get_fragments(fmt, ctx) + + if real_downloader: + self.to_screen( + '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) + info_dict['fragments'] = fragments_to_download + fd = real_downloader(self.ydl, self.params) + return fd.real_download(filename, info_dict) + + args.append([ctx, fragments_to_download, fmt]) - if real_downloader: - self._prepare_external_frag_download(ctx) - else: - self._prepare_and_start_frag_download(ctx, info_dict) + return self.download_and_append_fragments_multiple(*args) + + def _resolve_fragments(self, fragments, ctx): + fragments = fragments(ctx) if callable(fragments) else fragments + return [next(fragments)] if self.params.get('test') else fragments + + def _get_fragments(self, fmt, ctx): + fragment_base_url = fmt.get('fragment_base_url') + fragments = self._resolve_fragments(fmt['fragments'], ctx) - fragments_to_download = [] frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 @@ -46,17 +73,8 @@ class DashSegmentsFD(FragmentFD): assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) - fragments_to_download.append({ + yield { 'frag_index': frag_index, 'index': i, 'url': fragment_url, - }) - - if real_downloader: - self.to_screen( - '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) - info_dict['fragments'] = fragments_to_download - fd = real_downloader(self.ydl, self.params) - return fd.real_download(filename, info_dict) - - return self.download_and_append_fragments(ctx, fragments_to_download, info_dict) + } diff --git a/yt_dlp/downloader/f4m.py b/yt_dlp/downloader/f4m.py index 9da2776d9..0008b7c28 100644 --- a/yt_dlp/downloader/f4m.py +++ b/yt_dlp/downloader/f4m.py @@ -366,7 +366,7 @@ class F4mFD(FragmentFD): ctx = { 'filename': filename, 'total_frags': total_frags, - 'live': live, + 'live': bool(live), } self._prepare_frag_download(ctx) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 04b0f68c0..79c6561c7 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -1,9 +1,10 @@ from __future__ import division, unicode_literals +import http.client +import json +import math import os import time -import json -from math import ceil try: import concurrent.futures @@ -15,6 +16,7 @@ from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes from ..compat import ( + compat_os_name, compat_urllib_error, compat_struct_pack, ) @@ -90,7 +92,7 @@ class FragmentFD(FileDownloader): self._start_frag_download(ctx, info_dict) def __do_ytdl_file(self, ctx): - return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file') + return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx @@ -375,17 +377,20 @@ class FragmentFD(FileDownloader): @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list ''' + interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func) - max_workers = self.params.get('concurrent_fragment_downloads', max_progress) + max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress ctx['progress_idx'] = idx - return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe) + return self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, + tpe=tpe, interrupt_trigger=interrupt_trigger) class FTPE(concurrent.futures.ThreadPoolExecutor): # has to stop this or it's going to wait on the worker thread itself @@ -393,8 +398,11 @@ class FragmentFD(FileDownloader): pass spins = [] + if compat_os_name == 'nt': + self.report_warning('Ctrl+C does not work on Windows when used with parallel threads. ' + 'This is a known issue and patches are welcome') for idx, (ctx, fragments, info_dict) in enumerate(args): - tpe = FTPE(ceil(max_workers / max_progress)) + tpe = FTPE(math.ceil(max_workers / max_progress)) job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe) spins.append((tpe, job)) @@ -402,18 +410,32 @@ class FragmentFD(FileDownloader): for tpe, job in spins: try: result = result and job.result() + except KeyboardInterrupt: + interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) + if not interrupt_trigger[0]: + raise KeyboardInterrupt() return result - def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None): + def download_and_append_fragments( + self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, + tpe=None, interrupt_trigger=None): + if not interrupt_trigger: + interrupt_trigger = (True, ) + fragment_retries = self.params.get('fragment_retries', 0) - is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True) + is_fatal = ( + ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0)) + if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)) + if not pack_func: pack_func = lambda frag_content, _: frag_content def download_fragment(fragment, ctx): frag_index = ctx['fragment_index'] = fragment['frag_index'] + if not interrupt_trigger[0]: + return False, frag_index headers = info_dict.get('http_headers', {}).copy() byte_range = fragment.get('byte_range') if byte_range: @@ -428,7 +450,7 @@ class FragmentFD(FileDownloader): if not success: return False, frag_index break - except compat_urllib_error.HTTPError as err: + except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err: # Unavailable (possibly temporary) fragments may be served. # First we try to retry then either skip or abort. # See https://github.com/ytdl-org/youtube-dl/issues/10165, @@ -466,7 +488,8 @@ class FragmentFD(FileDownloader): decrypt_fragment = self.decrypter(info_dict) - max_workers = self.params.get('concurrent_fragment_downloads', 1) + max_workers = math.ceil( + self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) if can_threaded_download and max_workers > 1: def _download_fragment(fragment): @@ -477,6 +500,8 @@ class FragmentFD(FileDownloader): self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments): + if not interrupt_trigger[0]: + break ctx['fragment_filename_sanitized'] = frag_filename ctx['fragment_index'] = frag_index result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) @@ -484,6 +509,8 @@ class FragmentFD(FileDownloader): return False else: for fragment in fragments: + if not interrupt_trigger[0]: + break frag_content, frag_index = download_fragment(fragment, ctx) result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx) if not result: diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index e3369306c..354453a27 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -8,6 +8,7 @@ import time from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + dict_get, ExtractorError, js_to_json, int_or_none, @@ -253,3 +254,66 @@ class ABCIViewIE(InfoExtractor): 'subtitles': subtitles, 'is_live': is_live, } + + +class ABCIViewShowSeriesIE(InfoExtractor): + IE_NAME = 'abc.net.au:iview:showseries' + _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P<id>[^/]+)(?:/series/\d+)?$' + _GEO_COUNTRIES = ['AU'] + + _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/upper-middle-bogan', + 'info_dict': { + 'id': '124870-1', + 'title': 'Series 1', + 'description': 'md5:93119346c24a7c322d446d8eece430ff', + 'series': 'Upper Middle Bogan', + 'season': 'Series 1', + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + }, + 'playlist_count': 8, + }, { + 'url': 'https://iview.abc.net.au/show/upper-middle-bogan', + 'info_dict': { + 'id': 'CO1108V001S00', + 'ext': 'mp4', + 'title': 'Series 1 Ep 1 I\'m A Swan', + 'description': 'md5:7b676758c1de11a30b79b4d301e8da93', + 'series': 'Upper Middle Bogan', + 'uploader_id': 'abc1', + 'upload_date': '20210630', + 'timestamp': 1625036400, + }, + 'params': { + 'noplaylist': True, + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + webpage_data = self._search_regex( + r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', + webpage, 'initial state') + video_data = self._parse_json( + unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + video_data = video_data['route']['pageData']['_embedded'] + + if self.get_param('noplaylist') and 'highlightVideo' in video_data: + self.to_screen('Downloading just the highlight video because of --no-playlist') + return self.url_result(video_data['highlightVideo']['shareUrl'], ie=ABCIViewIE.ie_key()) + + self.to_screen(f'Downloading playlist {show_id} - add --no-playlist to just download the highlight video') + series = video_data['selectedSeries'] + return { + '_type': 'playlist', + 'entries': [self.url_result(episode['shareUrl']) + for episode in series['_embedded']['videoEpisodes']], + 'id': series.get('id'), + 'title': dict_get(series, ('title', 'displaySubtitle')), + 'description': series.get('description'), + 'series': dict_get(series, ('showTitle', 'displayTitle')), + 'season': dict_get(series, ('title', 'displaySubtitle')), + 'thumbnail': series.get('thumbnail'), + } diff --git a/yt_dlp/extractor/audiomack.py b/yt_dlp/extractor/audiomack.py index cc7771354..31fb859ae 100644 --- a/yt_dlp/extractor/audiomack.py +++ b/yt_dlp/extractor/audiomack.py @@ -14,7 +14,7 @@ from ..utils import ( class AudiomackIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?audiomack\.com/song/(?P<id>[\w/-]+)' + _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:song/|(?=.+/song/))(?P<id>[\w/-]+)' IE_NAME = 'audiomack' _TESTS = [ # hosted on audiomack @@ -39,15 +39,16 @@ class AudiomackIE(InfoExtractor): 'title': 'Black Mamba Freestyle [Prod. By Danny Wolf]', 'uploader': 'ILOVEMAKONNEN', 'upload_date': '20160414', - } + }, + 'skip': 'Song has been removed from the site', }, ] def _real_extract(self, url): - # URLs end with [uploader name]/[uploader title] + # URLs end with [uploader name]/song/[uploader title] # this title is whatever the user types in, and is rarely # the proper song title. Real metadata is in the api response - album_url_tag = self._match_id(url) + album_url_tag = self._match_id(url).replace('/song/', '/') # Request the extended version of the api for extra fields like artist and title api_response = self._download_json( @@ -73,13 +74,13 @@ class AudiomackIE(InfoExtractor): class AudiomackAlbumIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?audiomack\.com/album/(?P<id>[\w/-]+)' + _VALID_URL = r'https?://(?:www\.)?audiomack\.com/(?:album/|(?=.+/album/))(?P<id>[\w/-]+)' IE_NAME = 'audiomack:album' _TESTS = [ # Standard album playlist { 'url': 'http://www.audiomack.com/album/flytunezcom/tha-tour-part-2-mixtape', - 'playlist_count': 15, + 'playlist_count': 11, 'info_dict': { 'id': '812251', @@ -95,24 +96,27 @@ class AudiomackAlbumIE(InfoExtractor): }, 'playlist': [{ 'info_dict': { - 'title': 'PPP (Pistol P Project) - 9. Heaven or Hell (CHIMACA) ft Zuse (prod by DJ FU)', - 'id': '837577', + 'title': 'PPP (Pistol P Project) - 8. Real (prod by SYK SENSE )', + 'id': '837576', + 'ext': 'mp3', + 'uploader': 'Lil Herb a.k.a. G Herbo', + } + }, { + 'info_dict': { + 'title': 'PPP (Pistol P Project) - 10. 4 Minutes Of Hell Part 4 (prod by DY OF 808 MAFIA)', + 'id': '837580', 'ext': 'mp3', 'uploader': 'Lil Herb a.k.a. G Herbo', } }], - 'params': { - 'playliststart': 9, - 'playlistend': 9, - } } ] def _real_extract(self, url): - # URLs end with [uploader name]/[uploader title] + # URLs end with [uploader name]/album/[uploader title] # this title is whatever the user types in, and is rarely # the proper song title. Real metadata is in the api response - album_url_tag = self._match_id(url) + album_url_tag = self._match_id(url).replace('/album/', '/') result = {'_type': 'playlist', 'entries': []} # There is no one endpoint for album metadata - instead it is included/repeated in each song's metadata # Therefore we don't know how many songs the album has and must infi-loop until failure @@ -134,7 +138,7 @@ class AudiomackAlbumIE(InfoExtractor): # Pull out the album metadata and add to result (if it exists) for resultkey, apikey in [('id', 'album_id'), ('title', 'album_title')]: if apikey in api_response and resultkey not in result: - result[resultkey] = api_response[apikey] + result[resultkey] = compat_str(api_response[apikey]) song_id = url_basename(api_response['url']).rpartition('.')[0] result['entries'].append({ 'id': compat_str(api_response.get('id', song_id)), diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ebf2e3cea..9abbaf04f 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -163,9 +163,8 @@ class InfoExtractor(object): * filesize_approx An estimate for the number of bytes * player_url SWF Player URL (used for rtmpdump). * protocol The protocol that will be used for the actual - download, lower-case. - "http", "https", "rtsp", "rtmp", "rtmp_ffmpeg", "rtmpe", - "m3u8", "m3u8_native" or "http_dash_segments". + download, lower-case. One of "http", "https" or + one of the protocols defined in downloader.PROTOCOL_MAP * fragment_base_url Base URL for fragments. Each fragment's path value (if present) will be relative to @@ -181,6 +180,8 @@ class InfoExtractor(object): fragment_base_url * "duration" (optional, int or float) * "filesize" (optional, int) + * is_from_start Is a live format that can be downloaded + from the start. Boolean * preference Order number of this format. If this field is present and not None, the formats get sorted by this field, regardless of all other values. @@ -1451,8 +1452,13 @@ class InfoExtractor(object): }) extract_interaction_statistic(e) - for e in json_ld: - if '@context' in e: + def traverse_json_ld(json_ld, at_top_level=True): + for e in json_ld: + if at_top_level and '@context' not in e: + continue + if at_top_level and set(e.keys()) == {'@context', '@graph'}: + traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False) + break item_type = e.get('@type') if expected_type is not None and expected_type != item_type: continue @@ -1488,7 +1494,7 @@ class InfoExtractor(object): info.update({ 'timestamp': parse_iso8601(e.get('datePublished')), 'title': unescapeHTML(e.get('headline')), - 'description': unescapeHTML(e.get('articleBody')), + 'description': unescapeHTML(e.get('articleBody') or e.get('description')), }) elif item_type == 'VideoObject': extract_video_object(e) @@ -1503,6 +1509,8 @@ class InfoExtractor(object): continue else: break + traverse_json_ld(json_ld) + return dict((k, v) for k, v in info.items() if v is not None) def _search_nextjs_data(self, webpage, video_id, **kw): diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index e4755b3d1..ee5ea533f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, + ABCIViewShowSeriesIE, ) from .abcnews import ( AbcNewsIE, @@ -434,6 +435,7 @@ from .eyedotv import EyedoTVIE from .facebook import ( FacebookIE, FacebookPluginsVideoIE, + FacebookRedirectURLIE, ) from .fancode import ( FancodeVodIE, @@ -563,6 +565,10 @@ from .hrti import ( HRTiIE, HRTiPlaylistIE, ) +from .hse import ( + HSEShowIE, + HSEProductIE, +) from .huajiao import HuajiaoIE from .huffpost import HuffPostIE from .hungama import ( @@ -1357,6 +1363,7 @@ from .soundcloud import ( SoundcloudEmbedIE, SoundcloudIE, SoundcloudSetIE, + SoundcloudRelatedIE, SoundcloudUserIE, SoundcloudTrackStationIE, SoundcloudPlaylistIE, diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 44d3dc0d7..6dbcd690d 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -23,9 +23,11 @@ from ..utils import ( merge_dicts, network_exceptions, parse_count, + parse_qs, qualities, sanitized_Request, try_get, + url_or_none, urlencode_postdata, urljoin, ) @@ -746,3 +748,42 @@ class FacebookPluginsVideoIE(InfoExtractor): return self.url_result( compat_urllib_parse_unquote(self._match_id(url)), FacebookIE.ie_key()) + + +class FacebookRedirectURLIE(InfoExtractor): + IE_DESC = False # Do not list + _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/flx/warn[/?]' + _TESTS = [{ + 'url': 'https://www.facebook.com/flx/warn/?h=TAQHsoToz&u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&s=1', + 'info_dict': { + 'id': 'pO8h3EaFRdo', + 'ext': 'mp4', + 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set', + 'description': 'md5:2d713ccbb45b686a1888397b2c77ca6b', + 'channel_id': 'UCGBpxWJr9FNOcFYA5GkKrMg', + 'playable_in_embed': True, + 'categories': ['Music'], + 'channel': 'Boiler Room', + 'uploader_id': 'brtvofficial', + 'uploader': 'Boiler Room', + 'tags': 'count:11', + 'duration': 3332, + 'live_status': 'not_live', + 'thumbnail': 'https://i.ytimg.com/vi/pO8h3EaFRdo/maxresdefault.jpg', + 'channel_url': 'https://www.youtube.com/channel/UCGBpxWJr9FNOcFYA5GkKrMg', + 'availability': 'public', + 'uploader_url': 'http://www.youtube.com/user/brtvofficial', + 'upload_date': '20150917', + 'age_limit': 0, + 'view_count': int, + 'like_count': int, + }, + 'add_ie': ['Youtube'], + 'params': {'skip_download': 'Youtube'}, + }] + + def _real_extract(self, url): + redirect_url = url_or_none(parse_qs(url).get('u', [None])[-1]) + if not redirect_url: + raise ExtractorError('Invalid facebook redirect URL', expected=True) + return self.url_result(redirect_url) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 51557f0f1..1ec0ce986 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -3653,6 +3653,10 @@ class GenericIE(InfoExtractor): json_ld = self._search_json_ld(webpage, video_id, default={}) if json_ld.get('url'): self.report_detected('JSON LD') + if determine_ext(json_ld.get('url')) == 'm3u8': + json_ld['formats'], json_ld['subtitles'] = self._extract_m3u8_formats_and_subtitles( + json_ld['url'], video_id, 'mp4') + json_ld.pop('url') return merge_dicts(json_ld, info_dict) def check_video(vurl): diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index 58cd59511..c9f1dd256 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -6,7 +6,7 @@ from ..utils import unified_strdate class GronkhIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/(?:watch/)?stream/(?P<id>\d+)' _TESTS = [{ 'url': 'https://gronkh.tv/stream/536', @@ -19,6 +19,9 @@ class GronkhIE(InfoExtractor): 'upload_date': '20211001' }, 'params': {'skip_download': True} + }, { + 'url': 'https://gronkh.tv/watch/stream/546', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/yt_dlp/extractor/hse.py b/yt_dlp/extractor/hse.py new file mode 100644 index 000000000..9144ff8dc --- /dev/null +++ b/yt_dlp/extractor/hse.py @@ -0,0 +1,95 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, + unified_timestamp, +) + + +class HSEShowBaseInfoExtractor(InfoExtractor): + _GEO_COUNTRIES = ['DE'] + + def _extract_redux_data(self, url, video_id): + webpage = self._download_webpage(url, video_id) + redux = self._html_search_regex( + r'window\.__REDUX_DATA__\s*=\s*({.*});?', webpage, 'redux data') + return self._parse_json(redux.replace('\n', ''), video_id) + + def _extract_formats_and_subtitles(self, sources, video_id): + if not sources: + raise ExtractorError('No video found', expected=True, video_id=video_id) + formats, subtitles = [], {} + for src in sources: + if src['mimetype'] != 'application/x-mpegURL': + continue + fmts, subs = self._extract_m3u8_formats_and_subtitles(src['url'], video_id, ext='mp4') + formats.extend(fmts) + subtitles = self._merge_subtitles(subtitles, subs) + self._sort_formats(formats) + return formats, subtitles + + +class HSEShowIE(HSEShowBaseInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/c/tv-shows/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.hse.de/dpl/c/tv-shows/505350', + 'info_dict': { + 'id': '505350', + 'ext': 'mp4', + 'title': 'Pfeffinger Mode & Accessoires', + 'timestamp': 1638810000, + 'upload_date': '20211206', + 'channel': 'HSE24', + 'uploader': 'Arina Pirayesh' + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._extract_redux_data(url, video_id) + formats, subtitles = self._extract_formats_and_subtitles( + traverse_obj(json_data, ('tvShowPage', 'tvShowVideo', 'sources')), video_id) + + show = traverse_obj(json_data, ('tvShowPage', 'tvShow')) or {} + return { + 'id': video_id, + 'title': show.get('title') or video_id, + 'formats': formats, + 'timestamp': unified_timestamp(f'{show.get("date")} {show.get("hour")}:00'), + 'thumbnail': traverse_obj(json_data, ('tvShowVideo', 'poster')), + 'channel': self._search_regex( + r'tvShow \| ([A-Z0-9]+)_', show.get('actionFieldText') or '', video_id, fatal=False), + 'uploader': show.get('presenter'), + 'subtitles': subtitles, + } + + +class HSEProductIE(HSEShowBaseInfoExtractor): + _VALID_URL = r'https?://(?:www\.)?hse\.de/dpl/p/product/(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.hse.de/dpl/p/product/408630', + 'info_dict': { + 'id': '408630', + 'ext': 'mp4', + 'title': 'Hose im Ponte-Mix', + 'uploader': 'Judith Williams' + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._extract_redux_data(url, video_id) + video = traverse_obj(json_data, ('productContent', 'productContent', 'videos', 0)) or {} + formats, subtitles = self._extract_formats_and_subtitles(video.get('sources'), video_id) + + return { + 'id': video_id, + 'title': traverse_obj(json_data, ('productDetail', 'product', 'name', 'short')) or video_id, + 'formats': formats, + 'subtitles': subtitles, + 'thumbnail': video.get('poster'), + 'uploader': traverse_obj(json_data, ('productDetail', 'product', 'brand', 'brandName')), + } diff --git a/yt_dlp/extractor/ondemandkorea.py b/yt_dlp/extractor/ondemandkorea.py index cc3c587bc..e933ea2cc 100644 --- a/yt_dlp/extractor/ondemandkorea.py +++ b/yt_dlp/extractor/ondemandkorea.py @@ -1,6 +1,8 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -71,8 +73,8 @@ class OnDemandKoreaIE(InfoExtractor): jw_config = self._parse_json( self._search_regex( - r'(?s)odkPlayer\.init.*?(?P<options>{[^;]+}).*?;', - webpage, 'jw config', group='options'), + r'playlist\s*=\s*\[(?P<options>.+)];?$', + webpage, 'jw config', flags=re.MULTILINE, group='options'), video_id, transform_source=js_to_json) info = self._parse_jwplayer_data( jw_config, video_id, require_title=False, m3u8_id='hls', diff --git a/yt_dlp/extractor/plutotv.py b/yt_dlp/extractor/plutotv.py index 0cf82466a..26aff1af5 100644 --- a/yt_dlp/extractor/plutotv.py +++ b/yt_dlp/extractor/plutotv.py @@ -20,11 +20,11 @@ from ..utils import ( class PlutoTVIE(InfoExtractor): _VALID_URL = r'''(?x) - https?://(?:www\.)?pluto\.tv(?:/en)?/on-demand + https?://(?:www\.)?pluto\.tv(?:/[^/]+)?/on-demand /(?P<video_type>movies|series) /(?P<series_or_movie_slug>[^/]+) (?: - /seasons?/(?P<season_no>\d+) + (?:/seasons?/(?P<season_no>\d+))? (?:/episode/(?P<episode_slug>[^/]+))? )? /?(?:$|[#?])''' @@ -84,6 +84,9 @@ class PlutoTVIE(InfoExtractor): }, { 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1', 'only_matching': True, + }, { + 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', + 'only_matching': True, } ] diff --git a/yt_dlp/extractor/sendtonews.py b/yt_dlp/extractor/sendtonews.py index bc38a0f1e..858547b54 100644 --- a/yt_dlp/extractor/sendtonews.py +++ b/yt_dlp/extractor/sendtonews.py @@ -80,7 +80,7 @@ class SendtoNewsIE(InfoExtractor): 'format_id': '%s-%d' % (determine_protocol(f), tbr), 'tbr': tbr, }) - # 'tbr' was explicitly set to be prefered over 'height' originally, + # 'tbr' was explicitly set to be preferred over 'height' originally, # So this is being kept unless someone can confirm this is unnecessary self._sort_formats(info_dict['formats'], ('tbr', 'res')) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index d5cbe70ea..f251e5599 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -214,8 +214,9 @@ class SoundcloudIE(SoundcloudBaseIE): (?!stations/track) (?P<uploader>[\w\d-]+)/ (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#])) - (?P<title>[\w\d-]+)/? - (?P<token>[^?]+?)?(?:[?].*)?$) + (?P<title>[\w\d-]+) + (?:/(?P<token>(?!(?:albums|sets|recommended))[^?]+?))? + (?:[?].*)?$) |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+) (?:/?\?secret_token=(?P<secret_token>[^&]+))?) ) @@ -827,6 +828,54 @@ class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE): track_id, 'Track station: %s' % track['title']) +class SoundcloudRelatedIE(SoundcloudPagedPlaylistBaseIE): + _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<slug>[\w\d-]+/[\w\d-]+)/(?P<relation>albums|sets|recommended)' + IE_NAME = 'soundcloud:related' + _TESTS = [{ + 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/recommended', + 'info_dict': { + 'id': '1084577272', + 'title': 'Sexapil - Pingers 5 (Recommended)', + }, + 'playlist_mincount': 50, + }, { + 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/albums', + 'info_dict': { + 'id': '1084577272', + 'title': 'Sexapil - Pingers 5 (Albums)', + }, + 'playlist_mincount': 1, + }, { + 'url': 'https://soundcloud.com/wajang/sexapil-pingers-5/sets', + 'info_dict': { + 'id': '1084577272', + 'title': 'Sexapil - Pingers 5 (Sets)', + }, + 'playlist_mincount': 4, + }] + + _BASE_URL_MAP = { + 'albums': 'tracks/%s/albums', + 'sets': 'tracks/%s/playlists_without_albums', + 'recommended': 'tracks/%s/related', + } + + def _real_extract(self, url): + slug, relation = self._match_valid_url(url).group('slug', 'relation') + + track = self._download_json( + self._resolv_url(self._BASE_URL + slug), + slug, 'Downloading track info', headers=self._HEADERS) + + if track.get('errors'): + raise ExtractorError(f'{self.IE_NAME} said: %s' % ','.join( + str(err['error_message']) for err in track['errors']), expected=True) + + return self._extract_playlist( + self._API_V2_BASE + self._BASE_URL_MAP[relation] % track['id'], str(track['id']), + '%s (%s)' % (track.get('title') or slug, relation.capitalize())) + + class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE): _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$' IE_NAME = 'soundcloud:playlist' diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 8f64b6657..1f5009399 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals import calendar import copy import datetime +import functools import hashlib import itertools import json @@ -15,6 +16,7 @@ import re import sys import time import traceback +import threading from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -55,6 +57,7 @@ from ..utils import ( smuggle_url, str_or_none, str_to_int, + strftime_or_none, traverse_obj, try_get, unescapeHTML, @@ -358,7 +361,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor): consent_id = random.randint(100, 999) self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id) + def _initialize_pref(self): + cookies = self._get_cookies('https://www.youtube.com/') + pref_cookie = cookies.get('PREF') + pref = {} + if pref_cookie: + try: + pref = dict(compat_urlparse.parse_qsl(pref_cookie.value)) + except ValueError: + self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) + pref.update({'hl': 'en'}) + self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref)) + def _real_initialize(self): + self._initialize_pref() self._initialize_consent() self._login() @@ -391,23 +407,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) def _extract_context(self, ytcfg=None, default_client='web'): - _get_context = lambda y: try_get(y, lambda x: x['INNERTUBE_CONTEXT'], dict) - context = _get_context(ytcfg) - if context: - return context - - context = _get_context(self._get_default_ytcfg(default_client)) - if not ytcfg: - return context - - # Recreate the client context (required) - context['client'].update({ - 'clientVersion': self._extract_client_version(ytcfg, default_client), - 'clientName': self._extract_client_name(ytcfg, default_client), - }) - visitor_data = try_get(ytcfg, lambda x: x['VISITOR_DATA'], compat_str) - if visitor_data: - context['client']['visitorData'] = visitor_data + context = get_first( + (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) + # Enforce language for extraction + traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en' return context _SAPISID = None @@ -664,6 +667,29 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if text: return text + @staticmethod + def extract_relative_time(relative_time_text): + """ + Extracts a relative time from string and converts to dt object + e.g. 'streamed 6 days ago', '5 seconds ago (edited)' + """ + mobj = re.search(r'(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text) + if mobj: + try: + return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit')), precision='auto') + except ValueError: + return None + + def _extract_time_text(self, renderer, *path_list): + text = self._get_text(renderer, *path_list) or '' + dt = self.extract_relative_time(text) + timestamp = None + if isinstance(dt, datetime.datetime): + timestamp = calendar.timegm(dt.timetuple()) + if text and timestamp is None: + self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True) + return timestamp, text + def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): @@ -750,7 +776,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'view count', default=None)) uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') - + channel_id = traverse_obj( + renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) + timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText') + scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) + overlay_style = traverse_obj( + renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) + badges = self._extract_badges(renderer) return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), @@ -761,6 +793,14 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'duration': duration, 'view_count': view_count, 'uploader': uploader, + 'channel_id': channel_id, + 'upload_date': strftime_or_none(timestamp, '%Y%m%d'), + 'live_status': ('is_upcoming' if scheduled_timestamp is not None + else 'was_live' if 'streamed' in time_text.lower() + else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges + else None), + 'release_timestamp': scheduled_timestamp, + 'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges) } @@ -1709,6 +1749,142 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._code_cache = {} self._player_cache = {} + def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data): + EXPIRATION_DURATION = 18_000 + lock = threading.Lock() + + is_live = True + expiration_time = time.time() + EXPIRATION_DURATION + formats = [f for f in formats if f.get('is_from_start')] + + def refetch_manifest(format_id): + nonlocal formats, expiration_time, is_live + if time.time() <= expiration_time: + return + + _, _, prs, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url) + video_details = traverse_obj( + prs, (..., 'videoDetails'), expected_type=dict, default=[]) + microformats = traverse_obj( + prs, (..., 'microformat', 'playerMicroformatRenderer'), + expected_type=dict, default=[]) + _, is_live, _, formats = self._list_formats(video_id, microformats, video_details, prs, player_url) + expiration_time = time.time() + EXPIRATION_DURATION + + def mpd_feed(format_id): + """ + @returns (manifest_url, manifest_stream_number, is_live) or None + """ + with lock: + refetch_manifest(format_id) + + f = next((f for f in formats if f['format_id'] == format_id), None) + if not f: + self.report_warning( + f'Cannot find refreshed manifest for format {format_id}{bug_reports_message()}') + return None + return f['manifest_url'], f['manifest_stream_number'], is_live + + for f in formats: + f['protocol'] = 'http_dash_segments_generator' + f['fragments'] = functools.partial( + self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed) + + def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx): + FETCH_SPAN, MAX_DURATION = 5, 432000 + + mpd_url, stream_number, is_live = None, None, True + + begin_index = 0 + download_start_time = ctx.get('start') or time.time() + + lack_early_segments = download_start_time - (live_start_time or download_start_time) > MAX_DURATION + if lack_early_segments: + self.report_warning(bug_reports_message( + 'Starting download from the last 120 hours of the live stream since ' + 'YouTube does not have data before that. If you think this is wrong,'), only_once=True) + lack_early_segments = True + + known_idx, no_fragment_score, last_segment_url = begin_index, 0, None + fragments, fragment_base_url = None, None + + def _extract_sequence_from_mpd(refresh_sequence): + nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url + # Obtain from MPD's maximum seq value + old_mpd_url = mpd_url + mpd_url, stream_number, is_live = mpd_feed(format_id) or (mpd_url, stream_number, False) + if old_mpd_url == mpd_url and not refresh_sequence: + return True, last_seq + try: + fmts, _ = self._extract_mpd_formats_and_subtitles( + mpd_url, None, note=False, errnote=False, fatal=False) + except ExtractorError: + fmts = None + if not fmts: + no_fragment_score += 1 + return False, last_seq + fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number) + fragments = fmt_info['fragments'] + fragment_base_url = fmt_info['fragment_base_url'] + assert fragment_base_url + + _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1)) + return True, _last_seq + + while is_live: + fetch_time = time.time() + if no_fragment_score > 30: + return + if last_segment_url: + # Obtain from "X-Head-Seqnum" header value from each segment + try: + urlh = self._request_webpage( + last_segment_url, None, note=False, errnote=False, fatal=False) + except ExtractorError: + urlh = None + last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum'])) + if last_seq is None: + no_fragment_score += 1 + last_segment_url = None + continue + else: + should_retry, last_seq = _extract_sequence_from_mpd(True) + if not should_retry: + continue + + if known_idx > last_seq: + last_segment_url = None + continue + + last_seq += 1 + + if begin_index < 0 and known_idx < 0: + # skip from the start when it's negative value + known_idx = last_seq + begin_index + if lack_early_segments: + known_idx = max(known_idx, last_seq - int(MAX_DURATION // fragments[-1]['duration'])) + try: + for idx in range(known_idx, last_seq): + # do not update sequence here or you'll get skipped some part of it + should_retry, _ = _extract_sequence_from_mpd(False) + if not should_retry: + # retry when it gets weird state + known_idx = idx - 1 + raise ExtractorError('breaking out of outer loop') + last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) + yield { + 'url': last_segment_url, + } + if known_idx == last_seq: + no_fragment_score += 5 + else: + no_fragment_score = 0 + known_idx = last_seq + except ExtractorError: + continue + + time.sleep(max(0, FETCH_SPAN + fetch_time - time.time())) + def _extract_player_url(self, *ytcfgs, webpage=None): player_url = traverse_obj( ytcfgs, (..., 'PLAYER_JS_URL'), (..., 'WEB_PLAYER_CONTEXT_CONFIGS', ..., 'jsUrl'), @@ -2064,19 +2240,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (r'%s\s*%s' % (regex, self._YT_INITIAL_BOUNDARY_RE), regex), webpage, name, default='{}'), video_id, fatal=False) - @staticmethod - def parse_time_text(time_text): - """ - Parse the comment time text - time_text is in the format 'X units ago (edited)' - """ - time_text_split = time_text.split(' ') - if len(time_text_split) >= 3: - try: - return datetime_from_str('now-%s%s' % (time_text_split[0], time_text_split[1]), precision='auto') - except ValueError: - return None - def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') if not comment_id: @@ -2085,10 +2248,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): text = self._get_text(comment_renderer, 'contentText') # note: timestamp is an estimate calculated from the current time and time_text - time_text = self._get_text(comment_renderer, 'publishedTimeText') or '' - time_text_dt = self.parse_time_text(time_text) - if isinstance(time_text_dt, datetime.datetime): - timestamp = calendar.timegm(time_text_dt.timetuple()) + timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText') author = self._get_text(comment_renderer, 'authorText') author_id = try_get(comment_renderer, lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], compat_str) @@ -2261,11 +2421,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): yield from self._comment_entries(renderer, ytcfg, video_id) max_comments = int_or_none(self._configuration_arg('max_comments', [''])[0]) - # Force English regardless of account setting to prevent parsing issues - # See: https://github.com/yt-dlp/yt-dlp/issues/532 - ytcfg = copy.deepcopy(ytcfg) - traverse_obj( - ytcfg, ('INNERTUBE_CONTEXT', 'client'), expected_type=dict, default={})['hl'] = 'en' return itertools.islice(_real_comment_extract(contents), 0, max_comments) @staticmethod @@ -2531,11 +2686,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): dct['container'] = dct['ext'] + '_dash' yield dct + live_from_start = is_live and self.get_param('live_from_start') skip_manifests = self._configuration_arg('skip') - get_dash = ( - (not is_live or self._configuration_arg('include_live_dash')) - and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)) - get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) + if not self.get_param('youtube_include_hls_manifest', True): + skip_manifests.append('hls') + get_dash = 'dash' not in skip_manifests and ( + not is_live or live_from_start or self._configuration_arg('include_live_dash')) + get_hls = not live_from_start and 'hls' not in skip_manifests def process_manifest_format(f, proto, itag): if itag in itags: @@ -2566,6 +2723,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if process_manifest_format(f, 'dash', f['format_id']): f['filesize'] = int_or_none(self._search_regex( r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) + if live_from_start: + f['is_from_start'] = True + yield f def _extract_storyboard(self, player_responses, duration): @@ -2603,12 +2763,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } for j in range(math.ceil(fragment_count))], } - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - video_id = self._match_id(url) - - base_url = self.http_scheme() + '//www.youtube.com/' - webpage_url = base_url + 'watch?v=' + video_id + def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): webpage = None if 'webpage' not in self._configuration_arg('player_skip'): webpage = self._download_webpage( @@ -2620,6 +2775,28 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._get_requested_clients(url, smuggled_data), video_id, webpage, master_ytcfg) + return webpage, master_ytcfg, player_responses, player_url + + def _list_formats(self, video_id, microformats, video_details, player_responses, player_url): + live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) + is_live = get_first(video_details, 'isLive') + if is_live is None: + is_live = get_first(live_broadcast_details, 'isLiveNow') + + streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) + formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live)) + + return live_broadcast_details, is_live, streaming_data, formats + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + video_id = self._match_id(url) + + base_url = self.http_scheme() + '//www.youtube.com/' + webpage_url = base_url + 'watch?v=' + video_id + + webpage, master_ytcfg, player_responses, player_url = self._download_player_responses(url, smuggled_data, video_id, webpage_url) + playability_statuses = traverse_obj( player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[]) @@ -2688,13 +2865,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self.playlist_result( entries, video_id, video_title, video_description) - live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) - is_live = get_first(video_details, 'isLive') - if is_live is None: - is_live = get_first(live_broadcast_details, 'isLiveNow') - - streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) - formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live)) + live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url) if not formats: if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')): @@ -2797,10 +2968,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = False if is_upcoming is None and (live_content or is_live): is_upcoming = False - live_starttime = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp')) - live_endtime = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp')) - if not duration and live_endtime and live_starttime: - duration = live_endtime - live_starttime + live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp')) + live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp')) + if not duration and live_end_time and live_start_time: + duration = live_end_time - live_start_time + + if is_live and self.get_param('live_from_start'): + self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data) formats.extend(self._extract_storyboard(player_responses, duration)) @@ -2843,7 +3017,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else None if is_live is None or is_upcoming is None else live_content), 'live_status': 'is_upcoming' if is_upcoming else None, # rest will be set by YoutubeDL - 'release_timestamp': live_starttime, + 'release_timestamp': live_start_time, } pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict) @@ -4223,7 +4397,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data) return info_dict - _url_re = re.compile(r'(?P<pre>%s)(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$' % _VALID_URL) + _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(channel_type)(?P<tab>/\w+))?(?P<post>.*)$') def __real_extract(self, url, smuggled_data): item_id = self._match_id(url) @@ -4232,36 +4406,33 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): compat_opts = self.get_param('compat_opts', []) def get_mobj(url): - mobj = self._url_re.match(url).groupdict() + mobj = self._URL_RE.match(url).groupdict() mobj.update((k, '') for k, v in mobj.items() if v is None) return mobj - mobj = get_mobj(url) + mobj, redirect_warning = get_mobj(url), None # Youtube returns incomplete data if tabname is not lower case pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel'] if is_channel: if smuggled_data.get('is_music_url'): - if item_id[:2] == 'VL': - # Youtube music VL channels have an equivalent playlist + if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist item_id = item_id[2:] - pre, tab, post, is_channel = 'https://www.youtube.com/playlist?list=%s' % item_id, '', '', False - elif item_id[:2] == 'MP': - # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist + pre, tab, post, is_channel = f'https://www.youtube.com/playlist?list={item_id}', '', '', False + elif item_id[:2] == 'MP': # Resolve albums (/[channel/browse]/MP...) to their equivalent playlist mdata = self._extract_tab_endpoint( - 'https://music.youtube.com/channel/%s' % item_id, item_id, default_client='web_music') - murl = traverse_obj( - mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), get_all=False, expected_type=compat_str) + f'https://music.youtube.com/channel/{item_id}', item_id, default_client='web_music') + murl = traverse_obj(mdata, ('microformat', 'microformatDataRenderer', 'urlCanonical'), + get_all=False, expected_type=compat_str) if not murl: - raise ExtractorError('Failed to resolve album to playlist.') + raise ExtractorError('Failed to resolve album to playlist') return self.url_result(murl, ie=YoutubeTabIE.ie_key()) - elif mobj['channel_type'] == 'browse': - # Youtube music /browse/ should be changed to /channel/ - pre = 'https://www.youtube.com/channel/%s' % item_id + elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ + pre = f'https://www.youtube.com/channel/{item_id}' + if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: # Home URLs should redirect to /videos/ - self.report_warning( - 'A channel/user page was given. All the channel\'s videos will be downloaded. ' - 'To download only the videos in the home page, add a "/featured" to the URL') + redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. ' + 'To download only the videos in the home page, add a "/featured" to the URL') tab = '/videos' url = ''.join((pre, tab, post)) @@ -4269,28 +4440,27 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # Handle both video/playlist URLs qs = parse_qs(url) - video_id = qs.get('v', [None])[0] - playlist_id = qs.get('list', [None])[0] + video_id, playlist_id = [qs.get(key, [None])[0] for key in ('v', 'list')] if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable raise ExtractorError('Unable to recognize tab page') # Common mistake: https://www.youtube.com/watch?list=playlist_id - self.report_warning('A video URL was given without video ID. Trying to download playlist %s' % playlist_id) - url = 'https://www.youtube.com/playlist?list=%s' % playlist_id + self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}') + url = f'https://www.youtube.com/playlist?list={playlist_id}' mobj = get_mobj(url) if video_id and playlist_id: if self.get_param('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id) - self.to_screen('Downloading playlist %s; add --no-playlist to just download video %s' % (playlist_id, video_id)) + self.to_screen(f'Downloading just video {video_id} because of --no-playlist') + return self.url_result(f'https://www.youtube.com/watch?v={video_id}', + ie=YoutubeIE.ie_key(), video_id=video_id) + self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}') data, ytcfg = self._extract_data(url, item_id) - tabs = try_get( - data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) + tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) if tabs: selected_tab = self._extract_selected_tab(tabs) tab_name = selected_tab.get('title', '') @@ -4299,41 +4469,45 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # Live tab should have redirected to the video raise ExtractorError('The channel is not currently live', expected=True) if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]: + redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab' if not mobj['not_channel'] and item_id[:2] == 'UC': # Topic channels don't have /videos. Use the equivalent playlist instead - self.report_warning('The URL does not have a %s tab. Trying to redirect to playlist UU%s instead' % (mobj['tab'][1:], item_id[2:])) - pl_id = 'UU%s' % item_id[2:] - pl_url = 'https://www.youtube.com/playlist?list=%s%s' % (pl_id, mobj['post']) + pl_id = f'UU{item_id[2:]}' + pl_url = f'https://www.youtube.com/playlist?list={pl_id}' try: - data, ytcfg, item_id, url = *self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True), pl_id, pl_url + data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True) except ExtractorError: - self.report_warning('The playlist gave error. Falling back to channel URL') - else: - self.report_warning('The URL does not have a %s tab. %s is being downloaded instead' % (mobj['tab'][1:], tab_name)) + redirect_warning += ' and the playlist redirect gave error' + else: + item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:] + redirect_warning += f'. Redirecting to playlist {pl_id} instead' + if tab_name.lower() != mobj['tab'][1:]: + redirect_warning += f'. {tab_name} tab is being downloaded instead' - self.write_debug('Final URL: %s' % url) + if redirect_warning: + self.report_warning(redirect_warning) + self.write_debug(f'Final URL: {url}') # YouTube sometimes provides a button to reload playlist with unavailable videos. if 'no-youtube-unavailable-videos' not in compat_opts: data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data self._extract_and_report_alerts(data, only_once=True) - tabs = try_get( - data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) + tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) if tabs: return self._extract_from_tabs(item_id, ytcfg, data, tabs) - playlist = try_get( - data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) + playlist = traverse_obj( + data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict) if playlist: return self._extract_from_playlist(item_id, url, data, playlist, ytcfg) - video_id = try_get( - data, lambda x: x['currentVideoEndpoint']['watchEndpoint']['videoId'], - compat_str) or video_id + video_id = traverse_obj( + data, ('currentVideoEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) or video_id if video_id: if mobj['tab'] != '/live': # live tab is expected to redirect to video - self.report_warning('Unable to recognize playlist. Downloading just video %s' % video_id) - return self.url_result(f'https://www.youtube.com/watch?v={video_id}', ie=YoutubeIE.ie_key(), video_id=video_id) + self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}') + return self.url_result(f'https://www.youtube.com/watch?v={video_id}', + ie=YoutubeIE.ie_key(), video_id=video_id) raise ExtractorError('Unable to recognize tab page') diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 462bc4efe..5a5eebd30 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -177,7 +177,7 @@ class Zee5SeriesIE(InfoExtractor): https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/ ) - (?P<id>[^#/?]+)/?(?:$|[?#]) + (?P<id>[^#/?]+)(?:/episodes)?/?(?:$|[?#]) ''' _TESTS = [{ 'url': 'https://www.zee5.com/kids/kids-shows/krishna-balram/0-6-1871', @@ -209,8 +209,10 @@ class Zee5SeriesIE(InfoExtractor): 'info_dict': { 'id': '0-6-270', }, - } - ] + }, { + 'url': 'https://www.zee5.com/tvshows/details/chala-hawa-yeu-dya-ladies-zindabaad/0-6-2943/episodes', + 'only_matching': True, + }] def _entries(self, show_id): access_token_request = self._download_json( diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 0af891bd3..350b44dd0 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -87,7 +87,7 @@ class JSInterpreter(object): return name @staticmethod - def _seperate(expr, delim=',', max_split=None): + def _separate(expr, delim=',', max_split=None): if not expr: return counters = {k: 0 for k in _MATCHING_PARENS.values()} @@ -111,17 +111,17 @@ class JSInterpreter(object): yield expr[start:] @staticmethod - def _seperate_at_paren(expr, delim): - seperated = list(JSInterpreter._seperate(expr, delim, 1)) - if len(seperated) < 2: + def _separate_at_paren(expr, delim): + separated = list(JSInterpreter._separate(expr, delim, 1)) + if len(separated) < 2: raise ExtractorError(f'No terminating paren {delim} in {expr}') - return seperated[0][1:].strip(), seperated[1].strip() + return separated[0][1:].strip(), separated[1].strip() def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') - sub_statements = list(self._seperate(stmt, ';')) + sub_statements = list(self._separate(stmt, ';')) stmt = (sub_statements or ['']).pop() for sub_stmt in sub_statements: ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1) @@ -151,7 +151,7 @@ class JSInterpreter(object): return None if expr.startswith('{'): - inner, outer = self._seperate_at_paren(expr, '}') + inner, outer = self._separate_at_paren(expr, '}') inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1) if not outer or should_abort: return inner @@ -159,7 +159,7 @@ class JSInterpreter(object): expr = json.dumps(inner) + outer if expr.startswith('('): - inner, outer = self._seperate_at_paren(expr, ')') + inner, outer = self._separate_at_paren(expr, ')') inner = self.interpret_expression(inner, local_vars, allow_recursion) if not outer: return inner @@ -167,16 +167,16 @@ class JSInterpreter(object): expr = json.dumps(inner) + outer if expr.startswith('['): - inner, outer = self._seperate_at_paren(expr, ']') + inner, outer = self._separate_at_paren(expr, ']') name = self._named_object(local_vars, [ self.interpret_expression(item, local_vars, allow_recursion) - for item in self._seperate(inner)]) + for item in self._separate(inner)]) expr = name + outer m = re.match(r'try\s*', expr) if m: if expr[m.end()] == '{': - try_expr, expr = self._seperate_at_paren(expr[m.end():], '}') + try_expr, expr = self._separate_at_paren(expr[m.end():], '}') else: try_expr, expr = expr[m.end() - 1:], '' ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1) @@ -187,23 +187,23 @@ class JSInterpreter(object): m = re.match(r'catch\s*\(', expr) if m: # We ignore the catch block - _, expr = self._seperate_at_paren(expr, '}') + _, expr = self._separate_at_paren(expr, '}') return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] m = re.match(r'for\s*\(', expr) if m: - constructor, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')') if remaining.startswith('{'): - body, expr = self._seperate_at_paren(remaining, '}') + body, expr = self._separate_at_paren(remaining, '}') else: m = re.match(r'switch\s*\(', remaining) # FIXME if m: - switch_val, remaining = self._seperate_at_paren(remaining[m.end() - 1:], ')') - body, expr = self._seperate_at_paren(remaining, '}') + switch_val, remaining = self._separate_at_paren(remaining[m.end() - 1:], ')') + body, expr = self._separate_at_paren(remaining, '}') body = 'switch(%s){%s}' % (switch_val, body) else: body, expr = remaining, '' - start, cndn, increment = self._seperate(constructor, ';') + start, cndn, increment = self._separate(constructor, ';') if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]: raise ExtractorError( f'Premature return in the initialization of a for loop in {constructor!r}') @@ -225,14 +225,14 @@ class JSInterpreter(object): m = re.match(r'switch\s*\(', expr) if m: - switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')') switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) - body, expr = self._seperate_at_paren(remaining, '}') + body, expr = self._separate_at_paren(remaining, '}') items = body.replace('default:', 'case default:').split('case ')[1:] for default in (False, True): matched = False for item in items: - case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + case, stmt = [i.strip() for i in self._separate(item, ':', 1)] if default: matched = matched or case == 'default' elif not matched: @@ -249,8 +249,8 @@ class JSInterpreter(object): break return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] - # Comma seperated statements - sub_expressions = list(self._seperate(expr)) + # Comma separated statements + sub_expressions = list(self._separate(expr)) expr = sub_expressions.pop().strip() if sub_expressions else '' for sub_expr in sub_expressions: self.interpret_expression(sub_expr, local_vars, allow_recursion) @@ -318,11 +318,11 @@ class JSInterpreter(object): return val[idx] for op, opfunc in _OPERATORS: - seperated = list(self._seperate(expr, op)) - if len(seperated) < 2: + separated = list(self._separate(expr, op)) + if len(separated) < 2: continue - right_val = seperated.pop() - left_val = op.join(seperated) + right_val = separated.pop() + left_val = op.join(separated) left_val, should_abort = self.interpret_statement( left_val, local_vars, allow_recursion - 1) if should_abort: @@ -341,7 +341,7 @@ class JSInterpreter(object): member = remove_quotes(m.group('member') or m.group('member2')) arg_str = expr[m.end():] if arg_str.startswith('('): - arg_str, remaining = self._seperate_at_paren(arg_str, ')') + arg_str, remaining = self._separate_at_paren(arg_str, ')') else: arg_str, remaining = None, arg_str @@ -370,7 +370,7 @@ class JSInterpreter(object): # Function call argvals = [ self.interpret_expression(v, local_vars, allow_recursion) - for v in self._seperate(arg_str)] + for v in self._separate(arg_str)] if obj == str: if member == 'fromCharCode': @@ -453,7 +453,7 @@ class JSInterpreter(object): fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in self._seperate(m.group('args'))]) + for v in self._separate(m.group('args'))]) if fname in local_vars: return local_vars[fname](argvals) elif fname not in self._functions: @@ -495,7 +495,7 @@ class JSInterpreter(object): (?P<code>\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % ( re.escape(funcname), re.escape(funcname), re.escape(funcname)), self.code) - code, _ = self._seperate_at_paren(func_m.group('code'), '}') # refine the match + code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match if func_m is None: raise ExtractorError('Could not find JS function %r' % funcname) return func_m.group('args').split(','), code @@ -510,7 +510,7 @@ class JSInterpreter(object): if mobj is None: break start, body_start = mobj.span() - body, remaining = self._seperate_at_paren(code[body_start - 1:], '}') + body, remaining = self._separate_at_paren(code[body_start - 1:], '}') name = self._named_object( local_vars, self.extract_function_from_code( @@ -532,7 +532,7 @@ class JSInterpreter(object): **kwargs }) var_stack = LocalNameSpace(local_vars, *global_stack) - for stmt in self._seperate(code.replace('\n', ''), ';'): + for stmt in self._separate(code.replace('\n', ''), ';'): ret, should_abort = self.interpret_statement(stmt, var_stack) if should_abort: break diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index c81153c1e..f9f99e390 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -147,6 +147,7 @@ class MultilinePrinter(MultilinePrinterBase): def print_at_line(self, text, pos): if self._HAVE_FULLCAP: self.write(*self._move_cursor(pos), CONTROL_SEQUENCES['ERASE_LINE'], text) + return text = self._add_line_number(text, pos) textlen = len(text) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 85c7c8cda..168821a68 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -255,6 +255,14 @@ def parseOpts(overrideArguments=None): action='store_false', dest='extract_flat', help='Extract the videos of a playlist') general.add_option( + '--live-from-start', + action='store_true', dest='live_from_start', + help='Download livestreams from the start. Currently only supported for YouTube') + general.add_option( + '--no-live-from-start', + action='store_false', dest='live_from_start', + help='Download livestreams from the current time (default)') + general.add_option( '--wait-for-video', dest='wait_for_video', metavar='MIN[-MAX]', default=None, help=( @@ -987,8 +995,9 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '-a', '--batch-file', dest='batchfile', metavar='FILE', - help="File containing URLs to download ('-' for stdin), one URL per line. " - "Lines starting with '#', ';' or ']' are considered as comments and ignored") + help=( + 'File containing URLs to download ("-" for stdin), one URL per line. ' + 'Lines starting with "#", ";" or "]" are considered as comments and ignored')) filesystem.add_option( '--no-batch-file', dest='batchfile', action='store_const', const=None, @@ -1429,7 +1438,7 @@ def parseOpts(overrideArguments=None): action='store_true', dest='force_keyframes_at_cuts', default=False, help=( 'Force keyframes around the chapters before removing/splitting them. ' - 'Requires a reencode and thus is very slow, but the resulting video ' + 'Requires a re-encode and thus is very slow, but the resulting video ' 'may have fewer artifacts around the cuts')) postproc.add_option( '--no-force-keyframes-at-cuts', @@ -1447,7 +1456,7 @@ def parseOpts(overrideArguments=None): 'process': lambda val: dict(_postprocessor_opts_parser(*val.split(':', 1))) }, help=( 'The (case sensitive) name of plugin postprocessors to be enabled, ' - 'and (optionally) arguments to be passed to it, seperated by a colon ":". ' + 'and (optionally) arguments to be passed to it, separated by a colon ":". ' 'ARGS are a semicolon ";" delimited list of NAME=VALUE. ' 'The "when" argument determines when the postprocessor is invoked. ' 'It can be one of "pre_process" (after extraction), ' diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 4ae230d2f..7f8adb368 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -9,6 +9,7 @@ from .ffmpeg import ( FFmpegPostProcessor, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, + FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, FFmpegFixupStretchedPP, FFmpegFixupTimestampPP, diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index ab9eb6acf..f2467c542 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import copy import functools import os @@ -18,7 +17,7 @@ class PostProcessorMetaClass(type): def run_wrapper(func): @functools.wraps(func) def run(self, info, *args, **kwargs): - info_copy = copy.deepcopy(self._copy_infodict(info)) + info_copy = self._copy_infodict(info) self._hook_progress({'status': 'started'}, info_copy) ret = func(self, info, *args, **kwargs) if ret is not None: diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 73bbf7fb0..594762974 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -294,7 +294,9 @@ class FFmpegPostProcessor(PostProcessor): def make_args(file, args, name, number): keys = ['_%s%d' % (name, number), '_%s' % name] - if name == 'o' and number == 1: + if name == 'o': + args += ['-movflags', '+faststart'] + elif number == 1: keys.append('') args += self._configuration_args(self.basename, keys) if name == 'i': @@ -368,7 +370,7 @@ class FFmpegPostProcessor(PostProcessor): out_flags = ['-c', 'copy'] if out_file.rpartition('.')[-1] in ('mp4', 'mov'): # For some reason, '-c copy' is not enough to copy subtitles - out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart']) + out_flags.extend(['-c:s', 'mov_text']) try: self.real_run_ffmpeg( @@ -571,10 +573,7 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): @staticmethod def _options(target_ext): - options = ['-c', 'copy', '-map', '0', '-dn'] - if target_ext in ['mp4', 'm4a', 'mov']: - options.extend(['-movflags', '+faststart']) - return options + return ['-c', 'copy', '-map', '0', '-dn'] class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): @@ -909,13 +908,23 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): return [], info -class FFmpegFixupDurationPP(FFmpegFixupPostProcessor): +class FFmpegCopyStreamPostProcessor(FFmpegFixupPostProcessor): + MESSAGE = 'Copying stream' + @PostProcessor._restrict_to(images=False) def run(self, info): - self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn']) + self._fixup(self.MESSAGE, info['filepath'], ['-c', 'copy', '-map', '0', '-dn']) return [], info +class FFmpegFixupDurationPP(FFmpegCopyStreamPostProcessor): + MESSAGE = 'Fixing video duration' + + +class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPostProcessor): + MESSAGE = 'Fixing duplicate MOOV atoms' + + class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 10c35cbb9..d34e5b545 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -97,1589 +97,50 @@ compiled_regex_type = type(re.compile('')) def random_user_agent(): _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36' _CHROME_VERSIONS = ( - '74.0.3729.129', - '76.0.3780.3', - '76.0.3780.2', - '74.0.3729.128', - '76.0.3780.1', - '76.0.3780.0', - '75.0.3770.15', - '74.0.3729.127', - '74.0.3729.126', - '76.0.3779.1', - '76.0.3779.0', - '75.0.3770.14', - '74.0.3729.125', - '76.0.3778.1', - '76.0.3778.0', - '75.0.3770.13', - '74.0.3729.124', - '74.0.3729.123', - '73.0.3683.121', - '76.0.3777.1', - '76.0.3777.0', - '75.0.3770.12', - '74.0.3729.122', - '76.0.3776.4', - '75.0.3770.11', - '74.0.3729.121', - '76.0.3776.3', - '76.0.3776.2', - '73.0.3683.120', - '74.0.3729.120', - '74.0.3729.119', - '74.0.3729.118', - '76.0.3776.1', - '76.0.3776.0', - '76.0.3775.5', - '75.0.3770.10', - '74.0.3729.117', - '76.0.3775.4', - '76.0.3775.3', - '74.0.3729.116', - '75.0.3770.9', - '76.0.3775.2', - '76.0.3775.1', - '76.0.3775.0', - '75.0.3770.8', - '74.0.3729.115', - '74.0.3729.114', - '76.0.3774.1', - '76.0.3774.0', - '75.0.3770.7', - '74.0.3729.113', - '74.0.3729.112', - '74.0.3729.111', - '76.0.3773.1', - '76.0.3773.0', - '75.0.3770.6', - '74.0.3729.110', - '74.0.3729.109', - '76.0.3772.1', - '76.0.3772.0', - '75.0.3770.5', - '74.0.3729.108', - '74.0.3729.107', - '76.0.3771.1', - '76.0.3771.0', - '75.0.3770.4', - '74.0.3729.106', - '74.0.3729.105', - '75.0.3770.3', - '74.0.3729.104', - '74.0.3729.103', - '74.0.3729.102', - '75.0.3770.2', - '74.0.3729.101', - '75.0.3770.1', - '75.0.3770.0', - '74.0.3729.100', - '75.0.3769.5', - '75.0.3769.4', - '74.0.3729.99', - '75.0.3769.3', - '75.0.3769.2', - '75.0.3768.6', - '74.0.3729.98', - '75.0.3769.1', - '75.0.3769.0', - '74.0.3729.97', - '73.0.3683.119', - '73.0.3683.118', - '74.0.3729.96', - '75.0.3768.5', - '75.0.3768.4', - '75.0.3768.3', - '75.0.3768.2', - '74.0.3729.95', - '74.0.3729.94', - '75.0.3768.1', - '75.0.3768.0', - '74.0.3729.93', - '74.0.3729.92', - '73.0.3683.117', - '74.0.3729.91', - '75.0.3766.3', - '74.0.3729.90', - '75.0.3767.2', - '75.0.3767.1', - '75.0.3767.0', - '74.0.3729.89', - '73.0.3683.116', - '75.0.3766.2', - '74.0.3729.88', - '75.0.3766.1', - '75.0.3766.0', - '74.0.3729.87', - '73.0.3683.115', - '74.0.3729.86', - '75.0.3765.1', - '75.0.3765.0', - '74.0.3729.85', - '73.0.3683.114', - '74.0.3729.84', - '75.0.3764.1', - '75.0.3764.0', - '74.0.3729.83', - '73.0.3683.113', - '75.0.3763.2', - '75.0.3761.4', - '74.0.3729.82', - '75.0.3763.1', - '75.0.3763.0', - '74.0.3729.81', - '73.0.3683.112', - '75.0.3762.1', - '75.0.3762.0', - '74.0.3729.80', - '75.0.3761.3', - '74.0.3729.79', - '73.0.3683.111', - '75.0.3761.2', - '74.0.3729.78', - '74.0.3729.77', - '75.0.3761.1', - '75.0.3761.0', - '73.0.3683.110', - '74.0.3729.76', - '74.0.3729.75', - '75.0.3760.0', - '74.0.3729.74', - '75.0.3759.8', - '75.0.3759.7', - '75.0.3759.6', - '74.0.3729.73', - '75.0.3759.5', - '74.0.3729.72', - '73.0.3683.109', - '75.0.3759.4', - '75.0.3759.3', - '74.0.3729.71', - '75.0.3759.2', - '74.0.3729.70', - '73.0.3683.108', - '74.0.3729.69', - '75.0.3759.1', - '75.0.3759.0', - '74.0.3729.68', - '73.0.3683.107', - '74.0.3729.67', - '75.0.3758.1', - '75.0.3758.0', - '74.0.3729.66', - '73.0.3683.106', - '74.0.3729.65', - '75.0.3757.1', - '75.0.3757.0', - '74.0.3729.64', - '73.0.3683.105', - '74.0.3729.63', - '75.0.3756.1', - '75.0.3756.0', - '74.0.3729.62', - '73.0.3683.104', - '75.0.3755.3', - '75.0.3755.2', - '73.0.3683.103', - '75.0.3755.1', - '75.0.3755.0', - '74.0.3729.61', - '73.0.3683.102', - '74.0.3729.60', - '75.0.3754.2', - '74.0.3729.59', - '75.0.3753.4', - '74.0.3729.58', - '75.0.3754.1', - '75.0.3754.0', - '74.0.3729.57', - '73.0.3683.101', - '75.0.3753.3', - '75.0.3752.2', - '75.0.3753.2', - '74.0.3729.56', - '75.0.3753.1', - '75.0.3753.0', - '74.0.3729.55', - '73.0.3683.100', - '74.0.3729.54', - '75.0.3752.1', - '75.0.3752.0', - '74.0.3729.53', - '73.0.3683.99', - '74.0.3729.52', - '75.0.3751.1', - '75.0.3751.0', - '74.0.3729.51', - '73.0.3683.98', - '74.0.3729.50', - '75.0.3750.0', - '74.0.3729.49', - '74.0.3729.48', - '74.0.3729.47', - '75.0.3749.3', - '74.0.3729.46', - '73.0.3683.97', - '75.0.3749.2', - '74.0.3729.45', - '75.0.3749.1', - '75.0.3749.0', - '74.0.3729.44', - '73.0.3683.96', - '74.0.3729.43', - '74.0.3729.42', - '75.0.3748.1', - '75.0.3748.0', - '74.0.3729.41', - '75.0.3747.1', - '73.0.3683.95', - '75.0.3746.4', - '74.0.3729.40', - '74.0.3729.39', - '75.0.3747.0', - '75.0.3746.3', - '75.0.3746.2', - '74.0.3729.38', - '75.0.3746.1', - '75.0.3746.0', - '74.0.3729.37', - '73.0.3683.94', - '75.0.3745.5', - '75.0.3745.4', - '75.0.3745.3', - '75.0.3745.2', - '74.0.3729.36', - '75.0.3745.1', - '75.0.3745.0', - '75.0.3744.2', - '74.0.3729.35', - '73.0.3683.93', - '74.0.3729.34', - '75.0.3744.1', - '75.0.3744.0', - '74.0.3729.33', - '73.0.3683.92', - '74.0.3729.32', - '74.0.3729.31', - '73.0.3683.91', - '75.0.3741.2', - '75.0.3740.5', - '74.0.3729.30', - '75.0.3741.1', - '75.0.3741.0', - '74.0.3729.29', - '75.0.3740.4', - '73.0.3683.90', - '74.0.3729.28', - '75.0.3740.3', - '73.0.3683.89', - '75.0.3740.2', - '74.0.3729.27', - '75.0.3740.1', - '75.0.3740.0', - '74.0.3729.26', - '73.0.3683.88', - '73.0.3683.87', - '74.0.3729.25', - '75.0.3739.1', - '75.0.3739.0', - '73.0.3683.86', - '74.0.3729.24', - '73.0.3683.85', - '75.0.3738.4', - '75.0.3738.3', - '75.0.3738.2', - '75.0.3738.1', - '75.0.3738.0', - '74.0.3729.23', - '73.0.3683.84', - '74.0.3729.22', - '74.0.3729.21', - '75.0.3737.1', - '75.0.3737.0', - '74.0.3729.20', - '73.0.3683.83', - '74.0.3729.19', - '75.0.3736.1', - '75.0.3736.0', - '74.0.3729.18', - '73.0.3683.82', - '74.0.3729.17', - '75.0.3735.1', - '75.0.3735.0', - '74.0.3729.16', - '73.0.3683.81', - '75.0.3734.1', - '75.0.3734.0', - '74.0.3729.15', - '73.0.3683.80', - '74.0.3729.14', - '75.0.3733.1', - '75.0.3733.0', - '75.0.3732.1', - '74.0.3729.13', - '74.0.3729.12', - '73.0.3683.79', - '74.0.3729.11', - '75.0.3732.0', - '74.0.3729.10', - '73.0.3683.78', - '74.0.3729.9', - '74.0.3729.8', - '74.0.3729.7', - '75.0.3731.3', - '75.0.3731.2', - '75.0.3731.0', - '74.0.3729.6', - '73.0.3683.77', - '73.0.3683.76', - '75.0.3730.5', - '75.0.3730.4', - '73.0.3683.75', - '74.0.3729.5', - '73.0.3683.74', - '75.0.3730.3', - '75.0.3730.2', - '74.0.3729.4', - '73.0.3683.73', - '73.0.3683.72', - '75.0.3730.1', - '75.0.3730.0', - '74.0.3729.3', - '73.0.3683.71', - '74.0.3729.2', - '73.0.3683.70', - '74.0.3729.1', - '74.0.3729.0', - '74.0.3726.4', - '73.0.3683.69', - '74.0.3726.3', - '74.0.3728.0', - '74.0.3726.2', - '73.0.3683.68', - '74.0.3726.1', - '74.0.3726.0', - '74.0.3725.4', - '73.0.3683.67', - '73.0.3683.66', - '74.0.3725.3', - '74.0.3725.2', - '74.0.3725.1', - '74.0.3724.8', - '74.0.3725.0', - '73.0.3683.65', - '74.0.3724.7', - '74.0.3724.6', - '74.0.3724.5', - '74.0.3724.4', - '74.0.3724.3', - '74.0.3724.2', - '74.0.3724.1', - '74.0.3724.0', - '73.0.3683.64', - '74.0.3723.1', - '74.0.3723.0', - '73.0.3683.63', - '74.0.3722.1', - '74.0.3722.0', - '73.0.3683.62', - '74.0.3718.9', - '74.0.3702.3', - '74.0.3721.3', - '74.0.3721.2', - '74.0.3721.1', - '74.0.3721.0', - '74.0.3720.6', - '73.0.3683.61', - '72.0.3626.122', - '73.0.3683.60', - '74.0.3720.5', - '72.0.3626.121', - '74.0.3718.8', - '74.0.3720.4', - '74.0.3720.3', - '74.0.3718.7', - '74.0.3720.2', - '74.0.3720.1', - '74.0.3720.0', - '74.0.3718.6', - '74.0.3719.5', - '73.0.3683.59', - '74.0.3718.5', - '74.0.3718.4', - '74.0.3719.4', - '74.0.3719.3', - '74.0.3719.2', - '74.0.3719.1', - '73.0.3683.58', - '74.0.3719.0', - '73.0.3683.57', - '73.0.3683.56', - '74.0.3718.3', - '73.0.3683.55', - '74.0.3718.2', - '74.0.3718.1', - '74.0.3718.0', - '73.0.3683.54', - '74.0.3717.2', - '73.0.3683.53', - '74.0.3717.1', - '74.0.3717.0', - '73.0.3683.52', - '74.0.3716.1', - '74.0.3716.0', - '73.0.3683.51', - '74.0.3715.1', - '74.0.3715.0', - '73.0.3683.50', - '74.0.3711.2', - '74.0.3714.2', - '74.0.3713.3', - '74.0.3714.1', - '74.0.3714.0', - '73.0.3683.49', - '74.0.3713.1', - '74.0.3713.0', - '72.0.3626.120', - '73.0.3683.48', - '74.0.3712.2', - '74.0.3712.1', - '74.0.3712.0', - '73.0.3683.47', - '72.0.3626.119', - '73.0.3683.46', - '74.0.3710.2', - '72.0.3626.118', - '74.0.3711.1', - '74.0.3711.0', - '73.0.3683.45', - '72.0.3626.117', - '74.0.3710.1', - '74.0.3710.0', - '73.0.3683.44', - '72.0.3626.116', - '74.0.3709.1', - '74.0.3709.0', - '74.0.3704.9', - '73.0.3683.43', - '72.0.3626.115', - '74.0.3704.8', - '74.0.3704.7', - '74.0.3708.0', - '74.0.3706.7', - '74.0.3704.6', - '73.0.3683.42', - '72.0.3626.114', - '74.0.3706.6', - '72.0.3626.113', - '74.0.3704.5', - '74.0.3706.5', - '74.0.3706.4', - '74.0.3706.3', - '74.0.3706.2', - '74.0.3706.1', - '74.0.3706.0', - '73.0.3683.41', - '72.0.3626.112', - '74.0.3705.1', - '74.0.3705.0', - '73.0.3683.40', - '72.0.3626.111', - '73.0.3683.39', - '74.0.3704.4', - '73.0.3683.38', - '74.0.3704.3', - '74.0.3704.2', - '74.0.3704.1', - '74.0.3704.0', - '73.0.3683.37', - '72.0.3626.110', - '72.0.3626.109', - '74.0.3703.3', - '74.0.3703.2', - '73.0.3683.36', - '74.0.3703.1', - '74.0.3703.0', - '73.0.3683.35', - '72.0.3626.108', - '74.0.3702.2', - '74.0.3699.3', - '74.0.3702.1', - '74.0.3702.0', - '73.0.3683.34', - '72.0.3626.107', - '73.0.3683.33', - '74.0.3701.1', - '74.0.3701.0', - '73.0.3683.32', - '73.0.3683.31', - '72.0.3626.105', - '74.0.3700.1', - '74.0.3700.0', - '73.0.3683.29', - '72.0.3626.103', - '74.0.3699.2', - '74.0.3699.1', - '74.0.3699.0', - '73.0.3683.28', - '72.0.3626.102', - '73.0.3683.27', - '73.0.3683.26', - '74.0.3698.0', - '74.0.3696.2', - '72.0.3626.101', - '73.0.3683.25', - '74.0.3696.1', - '74.0.3696.0', - '74.0.3694.8', - '72.0.3626.100', - '74.0.3694.7', - '74.0.3694.6', - '74.0.3694.5', - '74.0.3694.4', - '72.0.3626.99', - '72.0.3626.98', - '74.0.3694.3', - '73.0.3683.24', - '72.0.3626.97', - '72.0.3626.96', - '72.0.3626.95', - '73.0.3683.23', - '72.0.3626.94', - '73.0.3683.22', - '73.0.3683.21', - '72.0.3626.93', - '74.0.3694.2', - '72.0.3626.92', - '74.0.3694.1', - '74.0.3694.0', - '74.0.3693.6', - '73.0.3683.20', - '72.0.3626.91', - '74.0.3693.5', - '74.0.3693.4', - '74.0.3693.3', - '74.0.3693.2', - '73.0.3683.19', - '74.0.3693.1', - '74.0.3693.0', - '73.0.3683.18', - '72.0.3626.90', - '74.0.3692.1', - '74.0.3692.0', - '73.0.3683.17', - '72.0.3626.89', - '74.0.3687.3', - '74.0.3691.1', - '74.0.3691.0', - '73.0.3683.16', - '72.0.3626.88', - '72.0.3626.87', - '73.0.3683.15', - '74.0.3690.1', - '74.0.3690.0', - '73.0.3683.14', - '72.0.3626.86', - '73.0.3683.13', - '73.0.3683.12', - '74.0.3689.1', - '74.0.3689.0', - '73.0.3683.11', - '72.0.3626.85', - '73.0.3683.10', - '72.0.3626.84', - '73.0.3683.9', - '74.0.3688.1', - '74.0.3688.0', - '73.0.3683.8', - '72.0.3626.83', - '74.0.3687.2', - '74.0.3687.1', - '74.0.3687.0', - '73.0.3683.7', - '72.0.3626.82', - '74.0.3686.4', - '72.0.3626.81', - '74.0.3686.3', - '74.0.3686.2', - '74.0.3686.1', - '74.0.3686.0', - '73.0.3683.6', - '72.0.3626.80', - '74.0.3685.1', - '74.0.3685.0', - '73.0.3683.5', - '72.0.3626.79', - '74.0.3684.1', - '74.0.3684.0', - '73.0.3683.4', - '72.0.3626.78', - '72.0.3626.77', - '73.0.3683.3', - '73.0.3683.2', - '72.0.3626.76', - '73.0.3683.1', - '73.0.3683.0', - '72.0.3626.75', - '71.0.3578.141', - '73.0.3682.1', - '73.0.3682.0', - '72.0.3626.74', - '71.0.3578.140', - '73.0.3681.4', - '73.0.3681.3', - '73.0.3681.2', - '73.0.3681.1', - '73.0.3681.0', - '72.0.3626.73', - '71.0.3578.139', - '72.0.3626.72', - '72.0.3626.71', - '73.0.3680.1', - '73.0.3680.0', - '72.0.3626.70', - '71.0.3578.138', - '73.0.3678.2', - '73.0.3679.1', - '73.0.3679.0', - '72.0.3626.69', - '71.0.3578.137', - '73.0.3678.1', - '73.0.3678.0', - '71.0.3578.136', - '73.0.3677.1', - '73.0.3677.0', - '72.0.3626.68', - '72.0.3626.67', - '71.0.3578.135', - '73.0.3676.1', - '73.0.3676.0', - '73.0.3674.2', - '72.0.3626.66', - '71.0.3578.134', - '73.0.3674.1', - '73.0.3674.0', - '72.0.3626.65', - '71.0.3578.133', - '73.0.3673.2', - '73.0.3673.1', - '73.0.3673.0', - '72.0.3626.64', - '71.0.3578.132', - '72.0.3626.63', - '72.0.3626.62', - '72.0.3626.61', - '72.0.3626.60', - '73.0.3672.1', - '73.0.3672.0', - '72.0.3626.59', - '71.0.3578.131', - '73.0.3671.3', - '73.0.3671.2', - '73.0.3671.1', - '73.0.3671.0', - '72.0.3626.58', - '71.0.3578.130', - '73.0.3670.1', - '73.0.3670.0', - '72.0.3626.57', - '71.0.3578.129', - '73.0.3669.1', - '73.0.3669.0', - '72.0.3626.56', - '71.0.3578.128', - '73.0.3668.2', - '73.0.3668.1', - '73.0.3668.0', - '72.0.3626.55', - '71.0.3578.127', - '73.0.3667.2', - '73.0.3667.1', - '73.0.3667.0', - '72.0.3626.54', - '71.0.3578.126', - '73.0.3666.1', - '73.0.3666.0', - '72.0.3626.53', - '71.0.3578.125', - '73.0.3665.4', - '73.0.3665.3', - '72.0.3626.52', - '73.0.3665.2', - '73.0.3664.4', - '73.0.3665.1', - '73.0.3665.0', - '72.0.3626.51', - '71.0.3578.124', - '72.0.3626.50', - '73.0.3664.3', - '73.0.3664.2', - '73.0.3664.1', - '73.0.3664.0', - '73.0.3663.2', - '72.0.3626.49', - '71.0.3578.123', - '73.0.3663.1', - '73.0.3663.0', - '72.0.3626.48', - '71.0.3578.122', - '73.0.3662.1', - '73.0.3662.0', - '72.0.3626.47', - '71.0.3578.121', - '73.0.3661.1', - '72.0.3626.46', - '73.0.3661.0', - '72.0.3626.45', - '71.0.3578.120', - '73.0.3660.2', - '73.0.3660.1', - '73.0.3660.0', - '72.0.3626.44', - '71.0.3578.119', - '73.0.3659.1', - '73.0.3659.0', - '72.0.3626.43', - '71.0.3578.118', - '73.0.3658.1', - '73.0.3658.0', - '72.0.3626.42', - '71.0.3578.117', - '73.0.3657.1', - '73.0.3657.0', - '72.0.3626.41', - '71.0.3578.116', - '73.0.3656.1', - '73.0.3656.0', - '72.0.3626.40', - '71.0.3578.115', - '73.0.3655.1', - '73.0.3655.0', - '72.0.3626.39', - '71.0.3578.114', - '73.0.3654.1', - '73.0.3654.0', - '72.0.3626.38', - '71.0.3578.113', - '73.0.3653.1', - '73.0.3653.0', - '72.0.3626.37', - '71.0.3578.112', - '73.0.3652.1', - '73.0.3652.0', - '72.0.3626.36', - '71.0.3578.111', - '73.0.3651.1', - '73.0.3651.0', - '72.0.3626.35', - '71.0.3578.110', - '73.0.3650.1', - '73.0.3650.0', - '72.0.3626.34', - '71.0.3578.109', - '73.0.3649.1', - '73.0.3649.0', - '72.0.3626.33', - '71.0.3578.108', - '73.0.3648.2', - '73.0.3648.1', - '73.0.3648.0', - '72.0.3626.32', - '71.0.3578.107', - '73.0.3647.2', - '73.0.3647.1', - '73.0.3647.0', - '72.0.3626.31', - '71.0.3578.106', - '73.0.3635.3', - '73.0.3646.2', - '73.0.3646.1', - '73.0.3646.0', - '72.0.3626.30', - '71.0.3578.105', - '72.0.3626.29', - '73.0.3645.2', - '73.0.3645.1', - '73.0.3645.0', - '72.0.3626.28', - '71.0.3578.104', - '72.0.3626.27', - '72.0.3626.26', - '72.0.3626.25', - '72.0.3626.24', - '73.0.3644.0', - '73.0.3643.2', - '72.0.3626.23', - '71.0.3578.103', - '73.0.3643.1', - '73.0.3643.0', - '72.0.3626.22', - '71.0.3578.102', - '73.0.3642.1', - '73.0.3642.0', - '72.0.3626.21', - '71.0.3578.101', - '73.0.3641.1', - '73.0.3641.0', - '72.0.3626.20', - '71.0.3578.100', - '72.0.3626.19', - '73.0.3640.1', - '73.0.3640.0', - '72.0.3626.18', - '73.0.3639.1', - '71.0.3578.99', - '73.0.3639.0', - '72.0.3626.17', - '73.0.3638.2', - '72.0.3626.16', - '73.0.3638.1', - '73.0.3638.0', - '72.0.3626.15', - '71.0.3578.98', - '73.0.3635.2', - '71.0.3578.97', - '73.0.3637.1', - '73.0.3637.0', - '72.0.3626.14', - '71.0.3578.96', - '71.0.3578.95', - '72.0.3626.13', - '71.0.3578.94', - '73.0.3636.2', - '71.0.3578.93', - '73.0.3636.1', - '73.0.3636.0', - '72.0.3626.12', - '71.0.3578.92', - '73.0.3635.1', - '73.0.3635.0', - '72.0.3626.11', - '71.0.3578.91', - '73.0.3634.2', - '73.0.3634.1', - '73.0.3634.0', - '72.0.3626.10', - '71.0.3578.90', - '71.0.3578.89', - '73.0.3633.2', - '73.0.3633.1', - '73.0.3633.0', - '72.0.3610.4', - '72.0.3626.9', - '71.0.3578.88', - '73.0.3632.5', - '73.0.3632.4', - '73.0.3632.3', - '73.0.3632.2', - '73.0.3632.1', - '73.0.3632.0', - '72.0.3626.8', - '71.0.3578.87', - '73.0.3631.2', - '73.0.3631.1', - '73.0.3631.0', - '72.0.3626.7', - '71.0.3578.86', - '72.0.3626.6', - '73.0.3630.1', - '73.0.3630.0', - '72.0.3626.5', - '71.0.3578.85', - '72.0.3626.4', - '73.0.3628.3', - '73.0.3628.2', - '73.0.3629.1', - '73.0.3629.0', - '72.0.3626.3', - '71.0.3578.84', - '73.0.3628.1', - '73.0.3628.0', - '71.0.3578.83', - '73.0.3627.1', - '73.0.3627.0', - '72.0.3626.2', - '71.0.3578.82', - '71.0.3578.81', - '71.0.3578.80', - '72.0.3626.1', - '72.0.3626.0', - '71.0.3578.79', - '70.0.3538.124', - '71.0.3578.78', - '72.0.3623.4', - '72.0.3625.2', - '72.0.3625.1', - '72.0.3625.0', - '71.0.3578.77', - '70.0.3538.123', - '72.0.3624.4', - '72.0.3624.3', - '72.0.3624.2', - '71.0.3578.76', - '72.0.3624.1', - '72.0.3624.0', - '72.0.3623.3', - '71.0.3578.75', - '70.0.3538.122', - '71.0.3578.74', - '72.0.3623.2', - '72.0.3610.3', - '72.0.3623.1', - '72.0.3623.0', - '72.0.3622.3', - '72.0.3622.2', - '71.0.3578.73', - '70.0.3538.121', - '72.0.3622.1', - '72.0.3622.0', - '71.0.3578.72', - '70.0.3538.120', - '72.0.3621.1', - '72.0.3621.0', - '71.0.3578.71', - '70.0.3538.119', - '72.0.3620.1', - '72.0.3620.0', - '71.0.3578.70', - '70.0.3538.118', - '71.0.3578.69', - '72.0.3619.1', - '72.0.3619.0', - '71.0.3578.68', - '70.0.3538.117', - '71.0.3578.67', - '72.0.3618.1', - '72.0.3618.0', - '71.0.3578.66', - '70.0.3538.116', - '72.0.3617.1', - '72.0.3617.0', - '71.0.3578.65', - '70.0.3538.115', - '72.0.3602.3', - '71.0.3578.64', - '72.0.3616.1', - '72.0.3616.0', - '71.0.3578.63', - '70.0.3538.114', - '71.0.3578.62', - '72.0.3615.1', - '72.0.3615.0', - '71.0.3578.61', - '70.0.3538.113', - '72.0.3614.1', - '72.0.3614.0', - '71.0.3578.60', - '70.0.3538.112', - '72.0.3613.1', - '72.0.3613.0', - '71.0.3578.59', - '70.0.3538.111', - '72.0.3612.2', - '72.0.3612.1', - '72.0.3612.0', - '70.0.3538.110', - '71.0.3578.58', - '70.0.3538.109', - '72.0.3611.2', - '72.0.3611.1', - '72.0.3611.0', - '71.0.3578.57', - '70.0.3538.108', - '72.0.3610.2', - '71.0.3578.56', - '71.0.3578.55', - '72.0.3610.1', - '72.0.3610.0', - '71.0.3578.54', - '70.0.3538.107', - '71.0.3578.53', - '72.0.3609.3', - '71.0.3578.52', - '72.0.3609.2', - '71.0.3578.51', - '72.0.3608.5', - '72.0.3609.1', - '72.0.3609.0', - '71.0.3578.50', - '70.0.3538.106', - '72.0.3608.4', - '72.0.3608.3', - '72.0.3608.2', - '71.0.3578.49', - '72.0.3608.1', - '72.0.3608.0', - '70.0.3538.105', - '71.0.3578.48', - '72.0.3607.1', - '72.0.3607.0', - '71.0.3578.47', - '70.0.3538.104', - '72.0.3606.2', - '72.0.3606.1', - '72.0.3606.0', - '71.0.3578.46', - '70.0.3538.103', - '70.0.3538.102', - '72.0.3605.3', - '72.0.3605.2', - '72.0.3605.1', - '72.0.3605.0', - '71.0.3578.45', - '70.0.3538.101', - '71.0.3578.44', - '71.0.3578.43', - '70.0.3538.100', - '70.0.3538.99', - '71.0.3578.42', - '72.0.3604.1', - '72.0.3604.0', - '71.0.3578.41', - '70.0.3538.98', - '71.0.3578.40', - '72.0.3603.2', - '72.0.3603.1', - '72.0.3603.0', - '71.0.3578.39', - '70.0.3538.97', - '72.0.3602.2', - '71.0.3578.38', - '71.0.3578.37', - '72.0.3602.1', - '72.0.3602.0', - '71.0.3578.36', - '70.0.3538.96', - '72.0.3601.1', - '72.0.3601.0', - '71.0.3578.35', - '70.0.3538.95', - '72.0.3600.1', - '72.0.3600.0', - '71.0.3578.34', - '70.0.3538.94', - '72.0.3599.3', - '72.0.3599.2', - '72.0.3599.1', - '72.0.3599.0', - '71.0.3578.33', - '70.0.3538.93', - '72.0.3598.1', - '72.0.3598.0', - '71.0.3578.32', - '70.0.3538.87', - '72.0.3597.1', - '72.0.3597.0', - '72.0.3596.2', - '71.0.3578.31', - '70.0.3538.86', - '71.0.3578.30', - '71.0.3578.29', - '72.0.3596.1', - '72.0.3596.0', - '71.0.3578.28', - '70.0.3538.85', - '72.0.3595.2', - '72.0.3591.3', - '72.0.3595.1', - '72.0.3595.0', - '71.0.3578.27', - '70.0.3538.84', - '72.0.3594.1', - '72.0.3594.0', - '71.0.3578.26', - '70.0.3538.83', - '72.0.3593.2', - '72.0.3593.1', - '72.0.3593.0', - '71.0.3578.25', - '70.0.3538.82', - '72.0.3589.3', - '72.0.3592.2', - '72.0.3592.1', - '72.0.3592.0', - '71.0.3578.24', - '72.0.3589.2', - '70.0.3538.81', - '70.0.3538.80', - '72.0.3591.2', - '72.0.3591.1', - '72.0.3591.0', - '71.0.3578.23', - '70.0.3538.79', - '71.0.3578.22', - '72.0.3590.1', - '72.0.3590.0', - '71.0.3578.21', - '70.0.3538.78', - '70.0.3538.77', - '72.0.3589.1', - '72.0.3589.0', - '71.0.3578.20', - '70.0.3538.76', - '71.0.3578.19', - '70.0.3538.75', - '72.0.3588.1', - '72.0.3588.0', - '71.0.3578.18', - '70.0.3538.74', - '72.0.3586.2', - '72.0.3587.0', - '71.0.3578.17', - '70.0.3538.73', - '72.0.3586.1', - '72.0.3586.0', - '71.0.3578.16', - '70.0.3538.72', - '72.0.3585.1', - '72.0.3585.0', - '71.0.3578.15', - '70.0.3538.71', - '71.0.3578.14', - '72.0.3584.1', - '72.0.3584.0', - '71.0.3578.13', - '70.0.3538.70', - '72.0.3583.2', - '71.0.3578.12', - '72.0.3583.1', - '72.0.3583.0', - '71.0.3578.11', - '70.0.3538.69', - '71.0.3578.10', - '72.0.3582.0', - '72.0.3581.4', - '71.0.3578.9', - '70.0.3538.67', - '72.0.3581.3', - '72.0.3581.2', - '72.0.3581.1', - '72.0.3581.0', - '71.0.3578.8', - '70.0.3538.66', - '72.0.3580.1', - '72.0.3580.0', - '71.0.3578.7', - '70.0.3538.65', - '71.0.3578.6', - '72.0.3579.1', - '72.0.3579.0', - '71.0.3578.5', - '70.0.3538.64', - '71.0.3578.4', - '71.0.3578.3', - '71.0.3578.2', - '71.0.3578.1', - '71.0.3578.0', - '70.0.3538.63', - '69.0.3497.128', - '70.0.3538.62', - '70.0.3538.61', - '70.0.3538.60', - '70.0.3538.59', - '71.0.3577.1', - '71.0.3577.0', - '70.0.3538.58', - '69.0.3497.127', - '71.0.3576.2', - '71.0.3576.1', - '71.0.3576.0', - '70.0.3538.57', - '70.0.3538.56', - '71.0.3575.2', - '70.0.3538.55', - '69.0.3497.126', - '70.0.3538.54', - '71.0.3575.1', - '71.0.3575.0', - '71.0.3574.1', - '71.0.3574.0', - '70.0.3538.53', - '69.0.3497.125', - '70.0.3538.52', - '71.0.3573.1', - '71.0.3573.0', - '70.0.3538.51', - '69.0.3497.124', - '71.0.3572.1', - '71.0.3572.0', - '70.0.3538.50', - '69.0.3497.123', - '71.0.3571.2', - '70.0.3538.49', - '69.0.3497.122', - '71.0.3571.1', - '71.0.3571.0', - '70.0.3538.48', - '69.0.3497.121', - '71.0.3570.1', - '71.0.3570.0', - '70.0.3538.47', - '69.0.3497.120', - '71.0.3568.2', - '71.0.3569.1', - '71.0.3569.0', - '70.0.3538.46', - '69.0.3497.119', - '70.0.3538.45', - '71.0.3568.1', - '71.0.3568.0', - '70.0.3538.44', - '69.0.3497.118', - '70.0.3538.43', - '70.0.3538.42', - '71.0.3567.1', - '71.0.3567.0', - '70.0.3538.41', - '69.0.3497.117', - '71.0.3566.1', - '71.0.3566.0', - '70.0.3538.40', - '69.0.3497.116', - '71.0.3565.1', - '71.0.3565.0', - '70.0.3538.39', - '69.0.3497.115', - '71.0.3564.1', - '71.0.3564.0', - '70.0.3538.38', - '69.0.3497.114', - '71.0.3563.0', - '71.0.3562.2', - '70.0.3538.37', - '69.0.3497.113', - '70.0.3538.36', - '70.0.3538.35', - '71.0.3562.1', - '71.0.3562.0', - '70.0.3538.34', - '69.0.3497.112', - '70.0.3538.33', - '71.0.3561.1', - '71.0.3561.0', - '70.0.3538.32', - '69.0.3497.111', - '71.0.3559.6', - '71.0.3560.1', - '71.0.3560.0', - '71.0.3559.5', - '71.0.3559.4', - '70.0.3538.31', - '69.0.3497.110', - '71.0.3559.3', - '70.0.3538.30', - '69.0.3497.109', - '71.0.3559.2', - '71.0.3559.1', - '71.0.3559.0', - '70.0.3538.29', - '69.0.3497.108', - '71.0.3558.2', - '71.0.3558.1', - '71.0.3558.0', - '70.0.3538.28', - '69.0.3497.107', - '71.0.3557.2', - '71.0.3557.1', - '71.0.3557.0', - '70.0.3538.27', - '69.0.3497.106', - '71.0.3554.4', - '70.0.3538.26', - '71.0.3556.1', - '71.0.3556.0', - '70.0.3538.25', - '71.0.3554.3', - '69.0.3497.105', - '71.0.3554.2', - '70.0.3538.24', - '69.0.3497.104', - '71.0.3555.2', - '70.0.3538.23', - '71.0.3555.1', - '71.0.3555.0', - '70.0.3538.22', - '69.0.3497.103', - '71.0.3554.1', - '71.0.3554.0', - '70.0.3538.21', - '69.0.3497.102', - '71.0.3553.3', - '70.0.3538.20', - '69.0.3497.101', - '71.0.3553.2', - '69.0.3497.100', - '71.0.3553.1', - '71.0.3553.0', - '70.0.3538.19', - '69.0.3497.99', - '69.0.3497.98', - '69.0.3497.97', - '71.0.3552.6', - '71.0.3552.5', - '71.0.3552.4', - '71.0.3552.3', - '71.0.3552.2', - '71.0.3552.1', - '71.0.3552.0', - '70.0.3538.18', - '69.0.3497.96', - '71.0.3551.3', - '71.0.3551.2', - '71.0.3551.1', - '71.0.3551.0', - '70.0.3538.17', - '69.0.3497.95', - '71.0.3550.3', - '71.0.3550.2', - '71.0.3550.1', - '71.0.3550.0', - '70.0.3538.16', - '69.0.3497.94', - '71.0.3549.1', - '71.0.3549.0', - '70.0.3538.15', - '69.0.3497.93', - '69.0.3497.92', - '71.0.3548.1', - '71.0.3548.0', - '70.0.3538.14', - '69.0.3497.91', - '71.0.3547.1', - '71.0.3547.0', - '70.0.3538.13', - '69.0.3497.90', - '71.0.3546.2', - '69.0.3497.89', - '71.0.3546.1', - '71.0.3546.0', - '70.0.3538.12', - '69.0.3497.88', - '71.0.3545.4', - '71.0.3545.3', - '71.0.3545.2', - '71.0.3545.1', - '71.0.3545.0', - '70.0.3538.11', - '69.0.3497.87', - '71.0.3544.5', - '71.0.3544.4', - '71.0.3544.3', - '71.0.3544.2', - '71.0.3544.1', - '71.0.3544.0', - '69.0.3497.86', - '70.0.3538.10', - '69.0.3497.85', - '70.0.3538.9', - '69.0.3497.84', - '71.0.3543.4', - '70.0.3538.8', - '71.0.3543.3', - '71.0.3543.2', - '71.0.3543.1', - '71.0.3543.0', - '70.0.3538.7', - '69.0.3497.83', - '71.0.3542.2', - '71.0.3542.1', - '71.0.3542.0', - '70.0.3538.6', - '69.0.3497.82', - '69.0.3497.81', - '71.0.3541.1', - '71.0.3541.0', - '70.0.3538.5', - '69.0.3497.80', - '71.0.3540.1', - '71.0.3540.0', - '70.0.3538.4', - '69.0.3497.79', - '70.0.3538.3', - '71.0.3539.1', - '71.0.3539.0', - '69.0.3497.78', - '68.0.3440.134', - '69.0.3497.77', - '70.0.3538.2', - '70.0.3538.1', - '70.0.3538.0', - '69.0.3497.76', - '68.0.3440.133', - '69.0.3497.75', - '70.0.3537.2', - '70.0.3537.1', - '70.0.3537.0', - '69.0.3497.74', - '68.0.3440.132', - '70.0.3536.0', - '70.0.3535.5', - '70.0.3535.4', - '70.0.3535.3', - '69.0.3497.73', - '68.0.3440.131', - '70.0.3532.8', - '70.0.3532.7', - '69.0.3497.72', - '69.0.3497.71', - '70.0.3535.2', - '70.0.3535.1', - '70.0.3535.0', - '69.0.3497.70', - '68.0.3440.130', - '69.0.3497.69', - '68.0.3440.129', - '70.0.3534.4', - '70.0.3534.3', - '70.0.3534.2', - '70.0.3534.1', - '70.0.3534.0', - '69.0.3497.68', - '68.0.3440.128', - '70.0.3533.2', - '70.0.3533.1', - '70.0.3533.0', - '69.0.3497.67', - '68.0.3440.127', - '70.0.3532.6', - '70.0.3532.5', - '70.0.3532.4', - '69.0.3497.66', - '68.0.3440.126', - '70.0.3532.3', - '70.0.3532.2', - '70.0.3532.1', - '69.0.3497.60', - '69.0.3497.65', - '69.0.3497.64', - '70.0.3532.0', - '70.0.3531.0', - '70.0.3530.4', - '70.0.3530.3', - '70.0.3530.2', - '69.0.3497.58', - '68.0.3440.125', - '69.0.3497.57', - '69.0.3497.56', - '69.0.3497.55', - '69.0.3497.54', - '70.0.3530.1', - '70.0.3530.0', - '69.0.3497.53', - '68.0.3440.124', - '69.0.3497.52', - '70.0.3529.3', - '70.0.3529.2', - '70.0.3529.1', - '70.0.3529.0', - '69.0.3497.51', - '70.0.3528.4', - '68.0.3440.123', - '70.0.3528.3', - '70.0.3528.2', - '70.0.3528.1', - '70.0.3528.0', - '69.0.3497.50', - '68.0.3440.122', - '70.0.3527.1', - '70.0.3527.0', - '69.0.3497.49', - '68.0.3440.121', - '70.0.3526.1', - '70.0.3526.0', - '68.0.3440.120', - '69.0.3497.48', - '69.0.3497.47', - '68.0.3440.119', - '68.0.3440.118', - '70.0.3525.5', - '70.0.3525.4', - '70.0.3525.3', - '68.0.3440.117', - '69.0.3497.46', - '70.0.3525.2', - '70.0.3525.1', - '70.0.3525.0', - '69.0.3497.45', - '68.0.3440.116', - '70.0.3524.4', - '70.0.3524.3', - '69.0.3497.44', - '70.0.3524.2', - '70.0.3524.1', - '70.0.3524.0', - '70.0.3523.2', - '69.0.3497.43', - '68.0.3440.115', - '70.0.3505.9', - '69.0.3497.42', - '70.0.3505.8', - '70.0.3523.1', - '70.0.3523.0', - '69.0.3497.41', - '68.0.3440.114', - '70.0.3505.7', - '69.0.3497.40', - '70.0.3522.1', - '70.0.3522.0', - '70.0.3521.2', - '69.0.3497.39', - '68.0.3440.113', - '70.0.3505.6', - '70.0.3521.1', - '70.0.3521.0', - '69.0.3497.38', - '68.0.3440.112', - '70.0.3520.1', - '70.0.3520.0', - '69.0.3497.37', - '68.0.3440.111', - '70.0.3519.3', - '70.0.3519.2', - '70.0.3519.1', - '70.0.3519.0', - '69.0.3497.36', - '68.0.3440.110', - '70.0.3518.1', - '70.0.3518.0', - '69.0.3497.35', - '69.0.3497.34', - '68.0.3440.109', - '70.0.3517.1', - '70.0.3517.0', - '69.0.3497.33', - '68.0.3440.108', - '69.0.3497.32', - '70.0.3516.3', - '70.0.3516.2', - '70.0.3516.1', - '70.0.3516.0', - '69.0.3497.31', - '68.0.3440.107', - '70.0.3515.4', - '68.0.3440.106', - '70.0.3515.3', - '70.0.3515.2', - '70.0.3515.1', - '70.0.3515.0', - '69.0.3497.30', - '68.0.3440.105', - '68.0.3440.104', - '70.0.3514.2', - '70.0.3514.1', - '70.0.3514.0', - '69.0.3497.29', - '68.0.3440.103', - '70.0.3513.1', - '70.0.3513.0', - '69.0.3497.28', + '90.0.4430.212', + '90.0.4430.24', + '90.0.4430.70', + '90.0.4430.72', + '90.0.4430.85', + '90.0.4430.93', + '91.0.4472.101', + '91.0.4472.106', + '91.0.4472.114', + '91.0.4472.124', + '91.0.4472.164', + '91.0.4472.19', + '91.0.4472.77', + '92.0.4515.107', + '92.0.4515.115', + '92.0.4515.131', + '92.0.4515.159', + '92.0.4515.43', + '93.0.4556.0', + '93.0.4577.15', + '93.0.4577.63', + '93.0.4577.82', + '94.0.4606.41', + '94.0.4606.54', + '94.0.4606.61', + '94.0.4606.71', + '94.0.4606.81', + '94.0.4606.85', + '95.0.4638.17', + '95.0.4638.50', + '95.0.4638.54', + '95.0.4638.69', + '95.0.4638.74', + '96.0.4664.18', + '96.0.4664.45', + '96.0.4664.55', + '96.0.4664.93', + '97.0.4692.20', ) return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS) std_headers = { 'User-Agent': random_user_agent(), - 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-us,en;q=0.5', @@ -4170,12 +2631,6 @@ class LazyList(collections.abc.Sequence): def __copy__(self): return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) - def __deepcopy__(self, memo): - # FIXME: This is actually just a shallow copy - id_ = id(self) - memo[id_] = self.__copy__() - return memo[id_] - def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable return repr(self.exhaust()) |