diff options
author | Jesús <heckyel@hyperbola.info> | 2021-11-30 17:19:55 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-11-30 17:19:55 -0500 |
commit | ccf02e63e53e481824b56b4e05ab1c2a9558c9a7 (patch) | |
tree | 41ad503be3ca01b0d83d0585f577bcf871b3f159 | |
parent | 54288332f1d8ec2974c65281e6a712e56b5cd24f (diff) | |
parent | 1bad50eced921126ea6587d9ae99e98164da500b (diff) | |
download | hypervideo-pre-ccf02e63e53e481824b56b4e05ab1c2a9558c9a7.tar.lz hypervideo-pre-ccf02e63e53e481824b56b4e05ab1c2a9558c9a7.tar.xz hypervideo-pre-ccf02e63e53e481824b56b4e05ab1c2a9558c9a7.zip |
updated from upstream | 30/11/2021 at 17:19
52 files changed, 953 insertions, 527 deletions
diff --git a/.gitignore b/.gitignore index 1b79afe62..8a72e3ea9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,41 +6,47 @@ cookies .netrc # Downloaded -*.srt -*.ttml -*.sbv -*.vtt -*.flv -*.mp4 -*.m4a -*.m4v -*.mp3 *.3gp -*.webm -*.wav +*.annotations.xml *.ape -*.mkv -*.flac +*.aria2 *.avi -*.swf -*.part -*.part-* -*.ytdl +*.description +*.desktop *.dump +*.flac +*.flv *.frag *.frag.urls -*.aria2 -*.swp -*.ogg -*.opus *.info.json -*.live_chat.json -*.jpg *.jpeg +*.jpg +*.live_chat.json +*.m4a +*.m4v +*.mhtml +*.mkv +*.mov +*.mp3 +*.mp4 +*.ogg +*.opus +*.part +*.part-* *.png +*.sbv +*.srt +*.swf +*.swp +*.ttml +*.unknown_video +*.url +*.vtt +*.wav +*.webloc +*.webm *.webp -*.annotations.xml -*.description +*.ytdl .cache/ # Allow config/media files in testdata diff --git a/Changelog.md b/Changelog.md index 072dc336d..c33691969 100644 --- a/Changelog.md +++ b/Changelog.md @@ -7,10 +7,8 @@ * Update Changelog.md and CONTRIBUTORS * Change "Merged with ytdl" version in Readme.md if needed * Add new/fixed extractors in "new features" section of Readme.md -* Commit as `Release <version>` -* Push to origin/release using `git push origin master:release` - build task will now run - +* Commit as `Release <version>` and push to master +* Dispatch the workflow https://github.com/yt-dlp/yt-dlp/actions/workflows/build.yml on master --> diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 15995bd3d..524994ab5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -93,6 +93,7 @@ from .utils import ( PostProcessingError, preferredencoding, prepend_extension, + ReExtractInfo, register_socks_protocols, RejectedVideoReached, render_table, @@ -109,7 +110,7 @@ from .utils import ( strftime_or_none, subtitles_filename, supports_terminal_sequences, - ThrottledDownload, + timetuple_from_msec, to_high_limit_path, traverse_obj, try_get, @@ -309,6 +310,8 @@ class YoutubeDL(object): file that is in the archive. break_on_reject: Stop the download process when encountering a video that has been filtered out. + break_per_url: Whether break_on_reject and break_on_existing + should act on each input URL as opposed to for the entire queue cookiefile: File name where cookies should be read from and dumped to cookiesfrombrowser: A tuple containing the name of the browser and the profile name/path from where cookies are loaded. @@ -330,6 +333,9 @@ class YoutubeDL(object): extract_flat: Do not resolve URLs, return the immediate result. Pass in 'in_playlist' to only show this behavior for playlist items. + wait_for_video: If given, wait for scheduled streams to become available. + The value should be a tuple containing the range + (min_secs, max_secs) to wait between retries postprocessors: A list of dictionaries, each with an entry * key: The name of the postprocessor. See yt_dlp/postprocessor/__init__.py for a list. @@ -559,6 +565,8 @@ class YoutubeDL(object): for msg in self.params.get('_warnings', []): self.report_warning(msg) + for msg in self.params.get('_deprecation_warnings', []): + self.deprecation_warning(msg) if 'list-formats' in self.params.get('compat_opts', []): self.params['listformats_table'] = False @@ -841,31 +849,31 @@ class YoutubeDL(object): class Styles(Enum): HEADERS = 'yellow' - EMPHASIS = 'blue' + EMPHASIS = 'light blue' ID = 'green' DELIM = 'blue' ERROR = 'red' WARNING = 'yellow' SUPPRESS = 'light black' - def __format_text(self, out, text, f, fallback=None, *, test_encoding=False): - assert out in ('screen', 'err') + def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): if test_encoding: original_text = text - handle = self._screen_file if out == 'screen' else self._err_file encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii') text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback if isinstance(f, self.Styles): - f = f._value_ - return format_text(text, f) if self._allow_colors[out] else text if fallback is None else fallback + f = f.value + return format_text(text, f) if allow_colors else text if fallback is None else fallback def _format_screen(self, *args, **kwargs): - return self.__format_text('screen', *args, **kwargs) + return self._format_text( + self._screen_file, self._allow_colors['screen'], *args, **kwargs) def _format_err(self, *args, **kwargs): - return self.__format_text('err', *args, **kwargs) + return self._format_text( + self._err_file, self._allow_colors['err'], *args, **kwargs) def report_warning(self, message, only_once=False): ''' @@ -879,6 +887,12 @@ class YoutubeDL(object): return self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) + def deprecation_warning(self, message): + if self.params.get('logger') is not None: + self.params['logger'].warning('DeprecationWarning: {message}') + else: + self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) + def report_error(self, message, tb=None): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored @@ -1171,12 +1185,8 @@ class YoutubeDL(object): # https://github.com/blackjack4494/youtube-dlc/issues/85 trim_file_name = self.params.get('trim_file_name', False) if trim_file_name: - fn_groups = filename.rsplit('.') - ext = fn_groups[-1] - sub_ext = '' - if len(fn_groups) > 2: - sub_ext = fn_groups[-2] - filename = join_nonempty(fn_groups[0][:trim_file_name], sub_ext, ext, delim='.') + no_ext, *ext = filename.rsplit('.', 2) + filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') return filename except ValueError as err: @@ -1303,8 +1313,9 @@ class YoutubeDL(object): temp_id = ie.get_temp_id(url) if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): - self.to_screen("[%s] %s: has already been recorded in archive" % ( - ie_key, temp_id)) + self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive') + if self.params.get('break_on_existing', False): + raise ExistingVideoReached() break return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) else: @@ -1324,9 +1335,12 @@ class YoutubeDL(object): self.report_error(msg) except ExtractorError as e: # An error we somewhat expected self.report_error(compat_str(e), e.format_traceback()) - except ThrottledDownload as e: - self.to_stderr('\r') - self.report_warning(f'{e}; Re-extracting data') + except ReExtractInfo as e: + if e.expected: + self.to_screen(f'{e}; Re-extracting data') + else: + self.to_stderr('\r') + self.report_warning(f'{e}; Re-extracting data') return wrapper(self, *args, **kwargs) except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise @@ -1337,6 +1351,47 @@ class YoutubeDL(object): raise return wrapper + def _wait_for_video(self, ie_result): + if (not self.params.get('wait_for_video') + or ie_result.get('_type', 'video') != 'video' + or ie_result.get('formats') or ie_result.get('url')): + return + + format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1] + last_msg = '' + + def progress(msg): + nonlocal last_msg + self.to_screen(msg + ' ' * (len(last_msg) - len(msg)) + '\r', skip_eol=True) + last_msg = msg + + min_wait, max_wait = self.params.get('wait_for_video') + diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) + if diff is None and ie_result.get('live_status') == 'is_upcoming': + diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait + self.report_warning('Release time of video is not known') + elif (diff or 0) <= 0: + self.report_warning('Video should already be available according to extracted info') + diff = min(max(diff, min_wait or 0), max_wait or float('inf')) + self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') + + wait_till = time.time() + diff + try: + while True: + diff = wait_till - time.time() + if diff <= 0: + progress('') + raise ReExtractInfo('[wait] Wait period ended', expected=True) + progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}') + time.sleep(1) + except KeyboardInterrupt: + progress('') + raise ReExtractInfo('[wait] Interrupted by user', expected=True) + except BaseException as e: + if not isinstance(e, ReExtractInfo): + self.to_screen('') + raise + @__handle_extraction_exceptions def __extract_info(self, url, ie, download, extra_info, process): ie_result = ie.extract(url) @@ -1352,6 +1407,7 @@ class YoutubeDL(object): ie_result.setdefault('original_url', extra_info['original_url']) self.add_default_extra_info(ie_result, ie, url) if process: + self._wait_for_video(ie_result) return self.process_ie_result(ie_result, download, extra_info) else: return ie_result @@ -2966,9 +3022,13 @@ class YoutubeDL(object): res = func(*args, **kwargs) except UnavailableVideoError as e: self.report_error(e) - except DownloadCancelled as e: + except MaxDownloadsReached as e: self.to_screen(f'[info] {e}') raise + except DownloadCancelled as e: + self.to_screen(f'[info] {e}') + if not self.params.get('break_per_url'): + raise else: if self.params.get('dump_single_json', False): self.post_extract(res) @@ -2999,7 +3059,7 @@ class YoutubeDL(object): info = self.sanitize_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True)) try: self.__download_wrapper(self.process_ie_result)(info, download=True) - except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e: + except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: if not isinstance(e, EntryNotInPlaylist): self.to_stderr('\r') webpage_url = info.get('webpage_url') @@ -3166,15 +3226,19 @@ class YoutubeDL(object): def _format_note(self, fdict): res = '' if fdict.get('ext') in ['f4f', 'f4m']: - res += '(unsupported) ' + res += '(unsupported)' if fdict.get('language'): if res: res += ' ' - res += '[%s] ' % fdict['language'] + res += '[%s]' % fdict['language'] if fdict.get('format_note') is not None: - res += fdict['format_note'] + ' ' + if res: + res += ' ' + res += fdict['format_note'] if fdict.get('tbr') is not None: - res += '%4dk ' % fdict['tbr'] + if res: + res += ', ' + res += '%4dk' % fdict['tbr'] if fdict.get('container') is not None: if res: res += ', ' @@ -3344,7 +3408,11 @@ class YoutubeDL(object): write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() - write_debug('yt-dlp version %s%s' % (__version__, '' if source == 'unknown' else f' ({source})')) + write_debug(join_nonempty( + 'yt-dlp version', __version__, + f'[{RELEASE_GIT_HEAD}]' if RELEASE_GIT_HEAD else '', + '' if source == 'unknown' else f'({source})', + delim=' ')) if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') @@ -3356,20 +3424,22 @@ class YoutubeDL(object): for name, klass in itertools.chain(plugin_extractors.items(), plugin_postprocessors.items())]) if self.params.get('compat_opts'): write_debug('Compatibility options: %s' % ', '.join(self.params.get('compat_opts'))) - try: - sp = Popen( - ['git', 'rev-parse', '--short', 'HEAD'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, - cwd=os.path.dirname(os.path.abspath(__file__))) - out, err = sp.communicate_or_kill() - out = out.decode().strip() - if re.match('[0-9a-f]+', out): - write_debug('Git HEAD: %s' % out) - except Exception: + + if source == 'source': try: - sys.exc_clear() + sp = Popen( + ['git', 'rev-parse', '--short', 'HEAD'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, + cwd=os.path.dirname(os.path.abspath(__file__))) + out, err = sp.communicate_or_kill() + out = out.decode().strip() + if re.match('[0-9a-f]+', out): + write_debug('Git HEAD: %s' % out) except Exception: - pass + try: + sys.exc_clear() + except Exception: + pass def python_implementation(): impl_name = platform.python_implementation() diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 88f5bbae2..005136e20 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -23,18 +23,17 @@ from .cookies import SUPPORTED_BROWSERS from .utils import ( DateRange, decodeOption, + DownloadCancelled, DownloadError, error_to_compat_str, - ExistingVideoReached, expand_path, + GeoUtils, float_or_none, int_or_none, match_filter_func, - MaxDownloadsReached, parse_duration, preferredencoding, read_batch_urls, - RejectedVideoReached, render_table, SameFileError, setproctitle, @@ -70,7 +69,7 @@ def _real_main(argv=None): setproctitle('yt-dlp') parser, opts, args = parseOpts(argv) - warnings = [] + warnings, deprecation_warnings = [], [] # Set user agent if opts.user_agent is not None: @@ -192,7 +191,15 @@ def _real_main(argv=None): if opts.overwrites: # --yes-overwrites implies --no-continue opts.continue_dl = False if opts.concurrent_fragment_downloads <= 0: - raise ValueError('Concurrent fragments must be positive') + parser.error('Concurrent fragments must be positive') + if opts.wait_for_video is not None: + mobj = re.match(r'(?P<min>\d+)(?:-(?P<max>\d+))?$', opts.wait_for_video) + if not mobj: + parser.error('Invalid time range to wait') + min_wait, max_wait = map(int_or_none, mobj.group('min', 'max')) + if max_wait is not None and max_wait < min_wait: + parser.error('Invalid time range to wait') + opts.wait_for_video = (min_wait, max_wait) def parse_retries(retries, name=''): if retries in ('inf', 'infinite'): @@ -220,9 +227,9 @@ def _real_main(argv=None): parser.error('invalid http chunk size specified') opts.http_chunk_size = numeric_chunksize if opts.playliststart <= 0: - raise ValueError('Playlist start must be positive') + raise parser.error('Playlist start must be positive') if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: - raise ValueError('Playlist end must be greater than playlist start') + raise parser.error('Playlist end must be greater than playlist start') if opts.extractaudio: opts.audioformat = opts.audioformat.lower() if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): @@ -246,12 +253,17 @@ def _real_main(argv=None): if opts.convertthumbnails is not None: if opts.convertthumbnails not in FFmpegThumbnailsConvertorPP.SUPPORTED_EXTS: parser.error('invalid thumbnail format specified') - if opts.cookiesfrombrowser is not None: opts.cookiesfrombrowser = [ part.strip() or None for part in opts.cookiesfrombrowser.split(':', 1)] if opts.cookiesfrombrowser[0].lower() not in SUPPORTED_BROWSERS: parser.error('unsupported browser specified for cookies') + geo_bypass_code = opts.geo_bypass_ip_block or opts.geo_bypass_country + if geo_bypass_code is not None: + try: + GeoUtils.random_ipv4(geo_bypass_code) + except Exception: + parser.error('unsupported geo-bypass country or ip-block') if opts.date is not None: date = DateRange.day(opts.date) @@ -527,7 +539,7 @@ def _real_main(argv=None): 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, }) - # Note: Deprecated + # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 @@ -540,6 +552,7 @@ def _real_main(argv=None): 'cut': opts.sponskrub_cut, 'force': opts.sponskrub_force, 'ignoreerror': opts.sponskrub is None, + '_from_cli': True, }) if opts.embedthumbnail: already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails @@ -579,6 +592,19 @@ def _real_main(argv=None): opts.postprocessor_args.setdefault('sponskrub', []) opts.postprocessor_args['default'] = opts.postprocessor_args['default-compat'] + def report_deprecation(val, old, new=None): + if not val: + return + deprecation_warnings.append( + f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new + else f'{old} is deprecated and may not work as expected') + + report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') + report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') + report_deprecation(opts.include_ads, '--include-ads') + # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future + # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it + final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS @@ -698,6 +724,7 @@ def _real_main(argv=None): 'download_archive': download_archive_fn, 'break_on_existing': opts.break_on_existing, 'break_on_reject': opts.break_on_reject, + 'break_per_url': opts.break_per_url, 'skip_playlist_after_errors': opts.skip_playlist_after_errors, 'cookiefile': opts.cookiefile, 'cookiesfrombrowser': opts.cookiesfrombrowser, @@ -716,6 +743,7 @@ def _real_main(argv=None): 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, + 'wait_for_video': opts.wait_for_video, 'mark_watched': opts.mark_watched, 'merge_output_format': opts.merge_output_format, 'final_ext': final_ext, @@ -745,11 +773,12 @@ def _real_main(argv=None): 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, '_warnings': warnings, + '_deprecation_warnings': deprecation_warnings, 'compat_opts': compat_opts, } with YoutubeDL(ydl_opts) as ydl: - actual_use = len(all_urls) or opts.load_info_filename + actual_use = all_urls or opts.load_info_filename # Remove cache dir if opts.rm_cachedir: @@ -767,7 +796,7 @@ def _real_main(argv=None): retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: retcode = ydl.download(all_urls) - except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached): + except DownloadCancelled: ydl.to_screen('Aborting remaining downloads') retcode = 101 diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py index 2449c7411..5270e8081 100644 --- a/yt_dlp/downloader/__init__.py +++ b/yt_dlp/downloader/__init__.py @@ -41,6 +41,7 @@ from .external import ( PROTOCOL_MAP = { 'rtmp': RtmpFD, + 'rtmpe': RtmpFD, 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 4528f3be5..d0c9c223f 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -93,6 +93,8 @@ class FileDownloader(object): def format_percent(percent): if percent is None: return '---.-%' + elif percent == 100: + return '100%' return '%6s' % ('%3.1f%%' % percent) @staticmethod @@ -247,11 +249,29 @@ class FileDownloader(object): self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) else: self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) + self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') def _finish_multiline_status(self): self._multiline.end() - def _report_progress_status(self, s): + _progress_styles = { + 'downloaded_bytes': 'light blue', + 'percent': 'light blue', + 'eta': 'yellow', + 'speed': 'green', + 'elapsed': 'bold white', + 'total_bytes': '', + 'total_bytes_estimate': '', + } + + def _report_progress_status(self, s, default_template): + for name, style in self._progress_styles.items(): + name = f'_{name}_str' + if name not in s: + continue + s[name] = self._format_progress(s[name], style) + s['_default_template'] = default_template % s + progress_dict = s.copy() progress_dict.pop('info_dict') progress_dict = {'info': s['info_dict'], 'progress': progress_dict} @@ -264,6 +284,10 @@ class FileDownloader(object): progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', progress_dict)) + def _format_progress(self, *args, **kwargs): + return self.ydl._format_text( + self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) + def report_progress(self, s): if s['status'] == 'finished': if self.params.get('noprogress'): @@ -276,8 +300,7 @@ class FileDownloader(object): s['_elapsed_str'] = self.format_seconds(s['elapsed']) msg_template += ' in %(_elapsed_str)s' s['_percent_str'] = self.format_percent(100) - s['_default_template'] = msg_template % s - self._report_progress_status(s) + self._report_progress_status(s, msg_template) return if s['status'] != 'downloading': @@ -286,7 +309,7 @@ class FileDownloader(object): if s.get('eta') is not None: s['_eta_str'] = self.format_eta(s['eta']) else: - s['_eta_str'] = 'Unknown ETA' + s['_eta_str'] = 'Unknown' if s.get('total_bytes') and s.get('downloaded_bytes') is not None: s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) @@ -318,13 +341,12 @@ class FileDownloader(object): else: msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: - msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s' if s.get('fragment_index') and s.get('fragment_count'): msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)' elif s.get('fragment_index'): msg_template += ' (frag %(fragment_index)s)' - s['_default_template'] = msg_template % s - self._report_progress_status(s) + self._report_progress_status(s, msg_template) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 7c5d35f47..07b1b1861 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -4,7 +4,7 @@ from ..utils import int_or_none class AmazonStoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)' + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)' _TESTS = [{ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 672ed1ffe..85ab478a6 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -472,8 +472,7 @@ class BBCCoUkIE(InfoExtractor): f['language_preference'] = -10 formats += version_formats for tag, subformats in (version_subtitles or {}).items(): - subtitles.setdefault(tag, []) - subtitles[tag] += subformats + subtitles.setdefault(tag, []).extend(subformats) return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 483f93d67..e019ec6a8 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -346,7 +346,8 @@ class BiliBiliIE(InfoExtractor): def _extract_anthology_entries(self, bv_id, video_id, webpage): title = self._html_search_regex( (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', - r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', + r'(?s)<h1[^>]*>(?P<title>.+?)</h1>', + r'<title>(?P<title>.+?)</title>'), webpage, 'title', group='title') json_data = self._download_json( f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py new file mode 100644 index 000000000..77efdf45a --- /dev/null +++ b/yt_dlp/extractor/cableav.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from .common import InfoExtractor + + +class CableAVIE(InfoExtractor): + _VALID_URL = r'https://cableav\.tv/(?P<id>[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'https://cableav.tv/lS4iR9lWjN8/', + 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18', + 'info_dict': { + 'id': 'lS4iR9lWjN8', + 'ext': 'mp4', + 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV', + 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._og_search_video_url(webpage, secure=False) + + formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + } diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 7287677c1..51d30a321 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -11,7 +11,7 @@ from ..utils import ( class CanalAlphaIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*' + _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*' _TESTS = [{ 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index 6bdc4f6bb..e6841fb8b 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -67,7 +67,7 @@ class ChingariBaseIE(InfoExtractor): class ChingariIE(ChingariBaseIE): - _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)' + _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)' _TESTS = [{ 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb', 'info_dict': { @@ -102,7 +102,7 @@ class ChingariIE(ChingariBaseIE): class ChingariUserIE(ChingariBaseIE): - _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)' _TESTS = [{ 'url': 'https://chingari.io/dada1023', 'playlist_mincount': 3, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index fc28bca2e..37e69d409 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1079,7 +1079,8 @@ class InfoExtractor(object): def raise_login_required( self, msg='This video is only available for registered users', metadata_available=False, method='any'): - if metadata_available and self.get_param('ignore_no_formats_error'): + if metadata_available and ( + self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) if method is not None: msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) @@ -1088,13 +1089,15 @@ class InfoExtractor(object): def raise_geo_restricted( self, msg='This video is not available from your location due to geo restriction', countries=None, metadata_available=False): - if metadata_available and self.get_param('ignore_no_formats_error'): + if metadata_available and ( + self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) else: raise GeoRestrictedError(msg, countries=countries) def raise_no_formats(self, msg, expected=False, video_id=None): - if expected and self.get_param('ignore_no_formats_error'): + if expected and ( + self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg, video_id) elif isinstance(msg, ExtractorError): raise msg @@ -1535,10 +1538,10 @@ class InfoExtractor(object): default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', 'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr', - 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases + 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', 'height', 'width', 'proto', 'vext', 'abr', 'aext', - 'fps', 'fs_approx', 'source', 'format_id') + 'fps', 'fs_approx', 'source', 'id') settings = { 'vcodec': {'type': 'ordered', 'regex': True, @@ -1548,7 +1551,7 @@ class InfoExtractor(object): 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', - 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']}, + 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']}, 'vext': {'type': 'ordered', 'field': 'video_ext', 'order': ('mp4', 'webm', 'flv', '', 'none'), 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, @@ -1583,7 +1586,7 @@ class InfoExtractor(object): 'res': {'type': 'multiple', 'field': ('height', 'width'), 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, - # Most of these exist only for compatibility reasons + # Deprecated 'dimension': {'type': 'alias', 'field': 'res'}, 'resolution': {'type': 'alias', 'field': 'res'}, 'extension': {'type': 'alias', 'field': 'ext'}, @@ -1592,7 +1595,7 @@ class InfoExtractor(object): 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, 'framerate': {'type': 'alias', 'field': 'fps'}, - 'language_preference': {'type': 'alias', 'field': 'lang'}, # not named as 'language' because such a field exists + 'language_preference': {'type': 'alias', 'field': 'lang'}, 'protocol': {'type': 'alias', 'field': 'proto'}, 'source_preference': {'type': 'alias', 'field': 'source'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, @@ -1612,10 +1615,20 @@ class InfoExtractor(object): 'format_id': {'type': 'alias', 'field': 'id'}, } - _order = [] + def __init__(self, ie, field_preference): + self._order = [] + self.ydl = ie._downloader + self.evaluate_params(self.ydl.params, field_preference) + if ie.get_param('verbose'): + self.print_verbose_info(self.ydl.write_debug) def _get_field_setting(self, field, key): if field not in self.settings: + if key in ('forced', 'priority'): + return False + self.ydl.deprecation_warning( + f'Using arbitrary fields ({field}) for format sorting is deprecated ' + 'and may be removed in a future version') self.settings[field] = {} propObj = self.settings[field] if key not in propObj: @@ -1698,7 +1711,10 @@ class InfoExtractor(object): if field is None: continue if self._get_field_setting(field, 'type') == 'alias': - field = self._get_field_setting(field, 'field') + alias, field = field, self._get_field_setting(field, 'field') + self.ydl.deprecation_warning( + f'Format sorting alias {alias} is deprecated ' + f'and may be removed in a future version. Please use {field} instead') reverse = match.group('reverse') is not None closest = match.group('separator') == '~' limit_text = match.group('limit') @@ -1802,10 +1818,7 @@ class InfoExtractor(object): def _sort_formats(self, formats, field_preference=[]): if not formats: return - format_sort = self.FormatSort() # params and to_screen are taken from the downloader - format_sort.evaluate_params(self._downloader.params, field_preference) - if self.get_param('verbose', False): - format_sort.print_verbose_info(self._downloader.write_debug) + format_sort = self.FormatSort(self, field_preference) formats.sort(key=lambda f: format_sort.calculate_preference(f)) def _check_formats(self, formats, video_id): diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index 868d8d27d..d49f1ca74 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -6,7 +6,7 @@ from ..utils import unified_strdate class CozyTVIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' + _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' _TESTS = [{ 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', diff --git a/yt_dlp/extractor/discoverynetworks.py b/yt_dlp/extractor/discoverynetworks.py deleted file mode 100644 index 4f8bdf0b9..000000000 --- a/yt_dlp/extractor/discoverynetworks.py +++ /dev/null @@ -1,41 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - - -from .dplay import DPlayIE - - -class DiscoveryNetworksDeIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' - - _TESTS = [{ - 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', - 'info_dict': { - 'id': '78867', - 'ext': 'mp4', - 'title': 'Die Welt da draußen', - 'description': 'md5:61033c12b73286e409d99a41742ef608', - 'timestamp': 1554069600, - 'upload_date': '20190331', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', - 'only_matching': True, - }, { - 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', - 'only_matching': True, - }, { - 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, programme, alternate_id = self._match_valid_url(url).groups() - country = 'GB' if domain == 'dplay.co.uk' else 'DE' - realm = 'questuk' if country == 'GB' else domain.replace('.', '') - return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) diff --git a/yt_dlp/extractor/discoveryplusindia.py b/yt_dlp/extractor/discoveryplusindia.py deleted file mode 100644 index 8ec418a97..000000000 --- a/yt_dlp/extractor/discoveryplusindia.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json - -from ..compat import compat_str -from ..utils import try_get -from .common import InfoExtractor -from .dplay import DPlayIE - - -class DiscoveryPlusIndiaIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE', - 'info_dict': { - 'id': '27104', - 'ext': 'mp4', - 'display_id': 'how-do-they-do-it/fugu-and-more', - 'title': 'Fugu and More', - 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.', - 'duration': 1319, - 'timestamp': 1582309800, - 'upload_date': '20200221', - 'series': 'How Do They Do It?', - 'season_number': 8, - 'episode_number': 2, - 'creator': 'Discovery Channel', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Cookies (not necessarily logged in) are needed' - }] - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-params'] = 'realm=%s' % realm - headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0' - - def _download_video_playback_info(self, disco_base, video_id, headers): - return self._download_json( - disco_base + 'playback/v3/videoPlaybackInfo', - video_id, headers=headers, data=json.dumps({ - 'deviceInfo': { - 'adBlocker': False, - }, - 'videoId': video_id, - }).encode('utf-8'))['data']['attributes']['streaming'] - - def _real_extract(self, url): - display_id = self._match_id(url) - return self._get_disco_api_info( - url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in') - - -class DiscoveryPlusIndiaShowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)' - _TESTS = [{ - 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it', - 'playlist_mincount': 140, - 'info_dict': { - 'id': 'how-do-they-do-it', - }, - }] - - def _entries(self, show_name): - headers = { - 'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod', - 'x-disco-params': 'realm=dplusindia', - 'referer': 'https://www.discoveryplus.in/', - } - show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name) - show_json = self._download_json(show_url, - video_id=show_name, - headers=headers)['included'][4]['attributes']['component'] - show_id = show_json['mandatoryParams'].split('=')[-1] - season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}' - for season in show_json['filters'][0]['options']: - season_id = season['id'] - total_pages, page_num = 1, 0 - while page_num < total_pages: - season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)), - video_id=show_id, headers=headers, - note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else '')) - if page_num == 0: - total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1 - episodes_json = season_json['data'] - for episode in episodes_json: - video_id = episode['attributes']['path'] - yield self.url_result( - 'https://discoveryplus.in/videos/%s' % video_id, - ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id) - page_num += 1 - - def _real_extract(self, url): - show_name = self._match_valid_url(url).group('show_name') - return self.playlist_result(self._entries(show_name), playlist_id=show_name) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 525c8e243..f5d6540c0 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import uuid from .common import InfoExtractor from ..compat import compat_HTTPError @@ -11,12 +12,172 @@ from ..utils import ( float_or_none, int_or_none, strip_or_none, + try_get, unified_timestamp, ) -class DPlayIE(InfoExtractor): +class DPlayBaseIE(InfoExtractor): _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)' + _auth_token_cache = {} + + def _get_auth(self, disco_base, display_id, realm, needs_device_id=True): + key = (disco_base, realm) + st = self._get_cookies(disco_base).get('st') + token = (st and st.value) or self._auth_token_cache.get(key) + + if not token: + query = {'realm': realm} + if needs_device_id: + query['deviceId'] = uuid.uuid4().hex + token = self._download_json( + disco_base + 'token', display_id, 'Downloading token', + query=query)['data']['attributes']['token'] + + # Save cache only if cookies are not being set + if not self._get_cookies(disco_base).get('st'): + self._auth_token_cache[key] = token + + return f'Bearer {token}' + + def _process_errors(self, e, geo_countries): + info = self._parse_json(e.cause.read().decode('utf-8'), None) + error = info['errors'][0] + error_code = error.get('code') + if error_code == 'access.denied.geoblocked': + self.raise_geo_restricted(countries=geo_countries) + elif error_code in ('access.denied.missingpackage', 'invalid.token'): + raise ExtractorError( + 'This video is only available for registered users. You may want to use --cookies.', expected=True) + raise ExtractorError(info['errors'][0]['detail'], expected=True) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False) + + def _download_video_playback_info(self, disco_base, video_id, headers): + streaming = self._download_json( + disco_base + 'playback/videoPlaybackInfo/' + video_id, + video_id, headers=headers)['data']['attributes']['streaming'] + streaming_list = [] + for format_id, format_dict in streaming.items(): + streaming_list.append({ + 'type': format_id, + 'url': format_dict.get('url'), + }) + return streaming_list + + def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''): + geo_countries = [country.upper()] + self._initialize_geo_bypass({ + 'countries': geo_countries, + }) + disco_base = 'https://%s/' % disco_host + headers = { + 'Referer': url, + } + self._update_disco_api_headers(headers, disco_base, display_id, realm) + try: + video = self._download_json( + disco_base + 'content/videos/' + display_id, display_id, + headers=headers, query={ + 'fields[channel]': 'name', + 'fields[image]': 'height,src,width', + 'fields[show]': 'name', + 'fields[tag]': 'name', + 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', + 'include': 'images,primaryChannel,show,tags' + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + self._process_errors(e, geo_countries) + raise + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'].strip() + formats = [] + subtitles = {} + try: + streaming = self._download_video_playback_info( + disco_base, video_id, headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self._process_errors(e, geo_countries) + raise + for format_dict in streaming: + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url: + continue + format_id = format_dict.get('type') + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles( + format_url, display_id, mpd_id='dash', fatal=False) + formats.extend(dash_fmts) + subtitles = self._merge_subtitles(subtitles, dash_subs) + elif format_id == 'hls' or ext == 'm3u8': + m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False) + formats.extend(m3u8_fmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + creator = series = None + tags = [] + thumbnails = [] + included = video.get('included') or [] + if isinstance(included, list): + for e in included: + attributes = e.get('attributes') + if not attributes: + continue + e_type = e.get('type') + if e_type == 'channel': + creator = attributes.get('name') + elif e_type == 'image': + src = attributes.get('src') + if src: + thumbnails.append({ + 'url': src, + 'width': int_or_none(attributes.get('width')), + 'height': int_or_none(attributes.get('height')), + }) + if e_type == 'show': + series = attributes.get('name') + elif e_type == 'tag': + name = attributes.get('name') + if name: + tags.append(name) + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': strip_or_none(info.get('description')), + 'duration': float_or_none(info.get('videoDuration'), 1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'creator': creator, + 'tags': tags, + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': { + 'referer': domain, + }, + } + + +class DPlayIE(DPlayBaseIE): _VALID_URL = r'''(?x)https?:// (?P<domain> (?:www\.)?(?P<host>d @@ -26,7 +187,7 @@ class DPlayIE(InfoExtractor): ) )| (?P<subdomain_country>es|it)\.dplay\.com - )/[^/]+''' + _PATH_REGEX + )/[^/]+''' + DPlayBaseIE._PATH_REGEX _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL @@ -150,138 +311,6 @@ class DPlayIE(InfoExtractor): 'only_matching': True, }] - def _process_errors(self, e, geo_countries): - info = self._parse_json(e.cause.read().decode('utf-8'), None) - error = info['errors'][0] - error_code = error.get('code') - if error_code == 'access.denied.geoblocked': - self.raise_geo_restricted(countries=geo_countries) - elif error_code in ('access.denied.missingpackage', 'invalid.token'): - raise ExtractorError( - 'This video is only available for registered users. You may want to use --cookies.', expected=True) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['Authorization'] = 'Bearer ' + self._download_json( - disco_base + 'token', display_id, 'Downloading token', - query={ - 'realm': realm, - })['data']['attributes']['token'] - - def _download_video_playback_info(self, disco_base, video_id, headers): - streaming = self._download_json( - disco_base + 'playback/videoPlaybackInfo/' + video_id, - video_id, headers=headers)['data']['attributes']['streaming'] - streaming_list = [] - for format_id, format_dict in streaming.items(): - streaming_list.append({ - 'type': format_id, - 'url': format_dict.get('url'), - }) - return streaming_list - - def _get_disco_api_info(self, url, display_id, disco_host, realm, country): - geo_countries = [country.upper()] - self._initialize_geo_bypass({ - 'countries': geo_countries, - }) - disco_base = 'https://%s/' % disco_host - headers = { - 'Referer': url, - } - self._update_disco_api_headers(headers, disco_base, display_id, realm) - try: - video = self._download_json( - disco_base + 'content/videos/' + display_id, display_id, - headers=headers, query={ - 'fields[channel]': 'name', - 'fields[image]': 'height,src,width', - 'fields[show]': 'name', - 'fields[tag]': 'name', - 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', - 'include': 'images,primaryChannel,show,tags' - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - self._process_errors(e, geo_countries) - raise - video_id = video['data']['id'] - info = video['data']['attributes'] - title = info['name'].strip() - formats = [] - try: - streaming = self._download_video_playback_info( - disco_base, video_id, headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self._process_errors(e, geo_countries) - raise - for format_dict in streaming: - if not isinstance(format_dict, dict): - continue - format_url = format_dict.get('url') - if not format_url: - continue - format_id = format_dict.get('type') - ext = determine_ext(format_url) - if format_id == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id='dash', fatal=False)) - elif format_id == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - creator = series = None - tags = [] - thumbnails = [] - included = video.get('included') or [] - if isinstance(included, list): - for e in included: - attributes = e.get('attributes') - if not attributes: - continue - e_type = e.get('type') - if e_type == 'channel': - creator = attributes.get('name') - elif e_type == 'image': - src = attributes.get('src') - if src: - thumbnails.append({ - 'url': src, - 'width': int_or_none(attributes.get('width')), - 'height': int_or_none(attributes.get('height')), - }) - if e_type == 'show': - series = attributes.get('name') - elif e_type == 'tag': - name = attributes.get('name') - if name: - tags.append(name) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': strip_or_none(info.get('description')), - 'duration': float_or_none(info.get('videoDuration'), 1000), - 'timestamp': unified_timestamp(info.get('publishStart')), - 'series': series, - 'season_number': int_or_none(info.get('seasonNumber')), - 'episode_number': int_or_none(info.get('episodeNumber')), - 'creator': creator, - 'tags': tags, - 'thumbnails': thumbnails, - 'formats': formats, - } - def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') @@ -289,11 +318,11 @@ class DPlayIE(InfoExtractor): country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( - url, display_id, host, 'dplay' + country, country) + url, display_id, host, 'dplay' + country, country, domain) -class HGTVDeIE(DPlayIE): - _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX +class HGTVDeIE(DPlayBaseIE): + _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/', 'info_dict': { @@ -318,8 +347,8 @@ class HGTVDeIE(DPlayIE): url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de') -class DiscoveryPlusIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayIE._PATH_REGEX +class DiscoveryPlusIE(DPlayBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -369,7 +398,7 @@ class DiscoveryPlusIE(DPlayIE): class ScienceChannelIE(DiscoveryPlusIE): - _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine', 'info_dict': { @@ -389,7 +418,7 @@ class ScienceChannelIE(DiscoveryPlusIE): class DIYNetworkIE(DiscoveryPlusIE): - _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', 'info_dict': { @@ -409,7 +438,7 @@ class DIYNetworkIE(DiscoveryPlusIE): class AnimalPlanetIE(DiscoveryPlusIE): - _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown', 'info_dict': { @@ -426,3 +455,159 @@ class AnimalPlanetIE(DiscoveryPlusIE): _PRODUCT = 'apl' _API_URL = 'us1-prod-direct.animalplanet.com' + + +class DiscoveryPlusIndiaIE(DPlayBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE', + 'info_dict': { + 'id': '27104', + 'ext': 'mp4', + 'display_id': 'how-do-they-do-it/fugu-and-more', + 'title': 'Fugu and More', + 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.', + 'duration': 1319, + 'timestamp': 1582309800, + 'upload_date': '20200221', + 'series': 'How Do They Do It?', + 'season_number': 8, + 'episode_number': 2, + 'creator': 'Discovery Channel', + }, + 'params': { + 'skip_download': True, + } + }] + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': 'realm=%s' % realm, + 'x-disco-client': 'WEB:UNKNOWN:dplus-india:17.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) + + def _download_video_playback_info(self, disco_base, video_id, headers): + return self._download_json( + disco_base + 'playback/v3/videoPlaybackInfo', + video_id, headers=headers, data=json.dumps({ + 'deviceInfo': { + 'adBlocker': False, + }, + 'videoId': video_id, + }).encode('utf-8'))['data']['attributes']['streaming'] + + def _real_extract(self, url): + display_id = self._match_id(url) + return self._get_disco_api_info( + url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in', 'https://www.discoveryplus.in/') + + +class DiscoveryNetworksDeIE(DPlayBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' + + _TESTS = [{ + 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', + 'info_dict': { + 'id': '78867', + 'ext': 'mp4', + 'title': 'Die Welt da draußen', + 'description': 'md5:61033c12b73286e409d99a41742ef608', + 'timestamp': 1554069600, + 'upload_date': '20190331', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', + 'only_matching': True, + }, { + 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', + 'only_matching': True, + }, { + 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, programme, alternate_id = self._match_valid_url(url).groups() + country = 'GB' if domain == 'dplay.co.uk' else 'DE' + realm = 'questuk' if country == 'GB' else domain.replace('.', '') + return self._get_disco_api_info( + url, '%s/%s' % (programme, alternate_id), + 'sonic-eu1-prod.disco-api.com', realm, country) + + +class DiscoveryPlusShowBaseIE(DPlayBaseIE): + + def _entries(self, show_name): + headers = { + 'x-disco-client': self._X_CLIENT, + 'x-disco-params': f'realm={self._REALM}', + 'referer': self._DOMAIN, + 'Authentication': self._get_auth(self._BASE_API, None, self._REALM), + } + show_json = self._download_json( + f'{self._BASE_API}cms/routes/{self._SHOW_STR}/{show_name}?include=default', + video_id=show_name, headers=headers)['included'][self._INDEX]['attributes']['component'] + show_id = show_json['mandatoryParams'].split('=')[-1] + season_url = self._BASE_API + 'content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}' + for season in show_json['filters'][0]['options']: + season_id = season['id'] + total_pages, page_num = 1, 0 + while page_num < total_pages: + season_json = self._download_json( + season_url.format(season_id, show_id, str(page_num + 1)), show_name, headers=headers, + note='Downloading season %s JSON metadata%s' % (season_id, ' page %d' % page_num if page_num else '')) + if page_num == 0: + total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1 + episodes_json = season_json['data'] + for episode in episodes_json: + video_id = episode['attributes']['path'] + yield self.url_result( + '%svideos/%s' % (self._DOMAIN, video_id), + ie=self._VIDEO_IE.ie_key(), video_id=video_id) + page_num += 1 + + def _real_extract(self, url): + show_name = self._match_valid_url(url).group('show_name') + return self.playlist_result(self._entries(show_name), playlist_id=show_name) + + +class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.discoveryplus.it/programmi/deal-with-it-stai-al-gioco', + 'playlist_mincount': 168, + 'info_dict': { + 'id': 'deal-with-it-stai-al-gioco', + }, + }] + + _BASE_API = 'https://disco-api.discoveryplus.it/' + _DOMAIN = 'https://www.discoveryplus.it/' + _X_CLIENT = 'WEB:UNKNOWN:dplay-client:2.6.0' + _REALM = 'dplayit' + _SHOW_STR = 'programmi' + _INDEX = 1 + _VIDEO_IE = DPlayIE + + +class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it', + 'playlist_mincount': 140, + 'info_dict': { + 'id': 'how-do-they-do-it', + }, + }] + + _BASE_API = 'https://ap2-prod-direct.discoveryplus.in/' + _DOMAIN = 'https://www.discoveryplus.in/' + _X_CLIENT = 'WEB:UNKNOWN:dplus-india:prod' + _REALM = 'dplusindia' + _SHOW_STR = 'show' + _INDEX = 4 + _VIDEO_IE = DiscoveryPlusIndiaIE diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index b4e544d4f..cd19325bc 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -8,7 +8,7 @@ from ..utils import ExtractorError class EpiconIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar', 'info_dict': { @@ -84,7 +84,7 @@ class EpiconIE(InfoExtractor): class EpiconSeriesIE(InfoExtractor): - _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)' + _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.epicon.in/tv-shows/1-of-something', 'playlist_mincount': 5, diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 3980c2349..2759e7436 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -10,7 +10,7 @@ from ..utils import ( class EUScreenIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)' + _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)' _TESTS = [{ 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C', diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a4baad2da..0741a728f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -188,6 +188,7 @@ from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .cableav import CableAVIE from .cam4 import CAM4IE from .camdemy import ( CamdemyIE, @@ -341,10 +342,6 @@ from .democracynow import DemocracynowIE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE -from .discoveryplusindia import ( - DiscoveryPlusIndiaIE, - DiscoveryPlusIndiaShowIE, -) from .dotsub import DotsubIE from .douyutv import ( DouyuShowIE, @@ -356,7 +353,11 @@ from .dplay import ( HGTVDeIE, ScienceChannelIE, DIYNetworkIE, - AnimalPlanetIE + AnimalPlanetIE, + DiscoveryPlusIndiaIE, + DiscoveryNetworksDeIE, + DiscoveryPlusItalyShowIE, + DiscoveryPlusIndiaShowIE, ) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE @@ -378,7 +379,6 @@ from .discoverygo import ( DiscoveryGoIE, DiscoveryGoPlaylistIE, ) -from .discoverynetworks import DiscoveryNetworksDeIE from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE @@ -1216,7 +1216,11 @@ from .redbulltv import ( RedBullIE, ) from .reddit import RedditIE -from .redgifs import RedGifsIE +from .redgifs import ( + RedGifsIE, + RedGifsSearchIE, + RedGifsUserIE, +) from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index bde6e8624..9ba0b1ca1 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -15,7 +15,7 @@ from ..utils import ( class GabTVIE(InfoExtractor): - _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)' + _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)' _TESTS = [{ 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488', 'info_dict': { diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index a7792a5e0..58cd59511 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -6,7 +6,7 @@ from ..utils import unified_strdate class GronkhIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' _TESTS = [{ 'url': 'https://gronkh.tv/stream/536', diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 0bdf772a1..de2b30cf7 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -296,7 +296,7 @@ class HotStarPlaylistIE(HotStarBaseIE): class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?P<url>(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))' + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 1fcf97a19..2ec24f3e7 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -63,6 +63,10 @@ class InstagramBaseIE(InfoExtractor): if not login.get('authenticated'): if login.get('message'): raise ExtractorError(f'Unable to login: {login["message"]}') + elif login.get('user'): + raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True) + elif login.get('user') is False: + raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True) raise ExtractorError('Unable to login') InstagramBaseIE._IS_LOGGED_IN = True @@ -495,7 +499,7 @@ class InstagramUserIE(InstagramPlaylistBaseIE): class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)' - IE_DESC = 'Instagram hashtag search' + IE_DESC = 'Instagram hashtag search URLs' IE_NAME = 'instagram:tag' _TESTS = [{ 'url': 'https://instagram.com/explore/tags/lolcats', diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 1706b28a0..088db1cb0 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -8,7 +8,7 @@ from ..utils import ( class KooIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)' + _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)' _TESTS = [{ # Test for video in the comments 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde', 'info_dict': { diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 2d65787e2..1d6d4b804 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class MLSSoccerIE(InfoExtractor): _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)' - _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS + _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS _TESTS = [{ 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986', @@ -21,7 +21,6 @@ class MLSSoccerIE(InfoExtractor): 'uploader_id': '5530036772001', 'tags': ['club/canada'], 'is_live': False, - 'duration_string': '5:50', 'upload_date': '20211007', 'filesize_approx': 255193528.83200002 }, diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index dcd26388a..09fadf8d9 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MuseScoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)' + _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)' _TESTS = [{ 'url': 'https://musescore.com/user/73797/scores/142975', 'info_dict': { @@ -13,7 +13,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'WA Mozart Marche Turque (Turkish March fingered)', 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'PapyPiano', 'creator': 'Wolfgang Amadeus Mozart', } @@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child', 'description': 'md5:4dca71191c14abc312a0a4192492eace', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'roxbelviolin', 'creator': 'Guns N´Roses Arr. Roxbel Violin', } @@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'Für Elise – Beethoven', 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'ClassicMan', 'creator': 'Ludwig van Beethoven (1770–1827)', } diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 5874556e3..3c2afd838 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -180,7 +180,7 @@ class MxplayerIE(InfoExtractor): class MxplayerShowIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417', 'playlist_mincount': 440, diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4bcea33d5..b46ca293f 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -703,7 +703,7 @@ class NicovideoSearchURLIE(InfoExtractor): class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): - IE_DESC = 'Nico video searches' + IE_DESC = 'Nico video search' IE_NAME = NicovideoSearchIE_NAME _SEARCH_KEY = 'nicosearch' _TESTS = [] @@ -714,7 +714,7 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): class NicovideoSearchDateIE(NicovideoSearchIE): - IE_DESC = 'Nico video searches, newest first' + IE_DESC = 'Nico video search, newest first' IE_NAME = f'{NicovideoSearchIE_NAME}:date' _SEARCH_KEY = 'nicosearchdate' _TESTS = [{ diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 79501003d..826faadd2 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class OneFootballIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)' _TESTS = [{ 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index d1d9911f7..07ac15b54 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -9,7 +9,7 @@ from ..utils import ( class PlanetMarathiIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' + _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' _TESTS = [{ 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', 'playlist_mincount': 2, diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index 1d832a679..9e9867ba5 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -10,7 +10,7 @@ from ..utils import ( class ProjectVeritasIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', 'info_dict': { diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 1257d1344..55196b768 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,21 +1,94 @@ # coding: utf-8 +import functools from .common import InfoExtractor +from ..compat import compat_parse_qs from ..utils import ( ExtractorError, int_or_none, qualities, try_get, + OnDemandPagedList, ) -class RedGifsIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P<id>[^-/?#\.]+)' +class RedGifsBaseInfoExtractor(InfoExtractor): _FORMATS = { 'gif': 250, 'sd': 480, 'hd': None, } + + def _parse_gif_data(self, gif_data): + video_id = gif_data.get('id') + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif_data.get('height')) + aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = gif_data['urls'].get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'webpage_url': f'https://redgifs.com/watch/{video_id}', + 'ie_key': RedGifsIE.ie_key(), + 'extractor': 'RedGifs', + 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif_data.get('createDate')), + 'uploader': gif_data.get('userName'), + 'duration': int_or_none(gif_data.get('duration')), + 'view_count': int_or_none(gif_data.get('views')), + 'like_count': int_or_none(gif_data.get('likes')), + 'categories': gif_data.get('tags') or [], + 'tags': gif_data.get('tags'), + 'age_limit': 18, + 'formats': formats, + } + + def _call_api(self, ep, video_id, *args, **kwargs): + data = self._download_json( + f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs) + if 'error' in data: + raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) + return data + + def _fetch_page(self, ep, video_id, query, page): + query['page'] = page + 1 + data = self._call_api( + ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') + + for entry in data['gifs']: + yield self._parse_gif_data(entry) + + def _prepare_api_query(self, query, fields): + api_query = [ + (field_name, query.get(field_name, (default,))[0]) + for field_name, default in fields.items()] + + return {key: val for key, val in api_query if val is not None} + + def _paged_entries(self, ep, item_id, query, fields): + page = int_or_none(query.get('page', (None,))[0]) + page_fetcher = functools.partial( + self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) + return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) + + +class RedGifsIE(RedGifsBaseInfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)' _TESTS = [{ 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', 'info_dict': { @@ -50,45 +123,110 @@ class RedGifsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url).lower() + video_info = self._call_api( + f'gifs/{video_id}', video_id, note='Downloading video info') + return self._parse_gif_data(video_info['gif']) - video_info = self._download_json( - 'https://api.redgifs.com/v2/gifs/%s' % video_id, - video_id, 'Downloading video info') - if 'error' in video_info: - raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True) - gif = video_info['gif'] - urls = gif['urls'] +class RedGifsSearchIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs search' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)' + _PAGE_SIZE = 80 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/browse?tags=Lesbian', + 'info_dict': { + 'id': 'tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by trending' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian&page=2', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_count': 80, + } + ] - quality = qualities(tuple(self._FORMATS.keys())) + def _real_extract(self, url): + query_str = self._match_valid_url(url).group('query') + query = compat_parse_qs(query_str) + if not query.get('tags'): + raise ExtractorError('Invalid query tags', expected=True) - orig_height = int_or_none(gif.get('height')) - aspect_ratio = try_get(gif, lambda x: orig_height / x['width']) + tags = query.get('tags')[0] + order = query.get('order', ('trending',))[0] - formats = [] - for format_id, height in self._FORMATS.items(): - video_url = urls.get(format_id) - if not video_url: - continue - height = min(orig_height, height or orig_height) - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'width': height * aspect_ratio if aspect_ratio else None, - 'height': height, - 'quality': quality(format_id), - }) - self._sort_formats(formats) + query['search_text'] = [tags] + entries = self._paged_entries('gifs/search', query_str, query, { + 'search_text': None, + 'order': 'trending', + 'type': None, + }) - return { - 'id': video_id, - 'title': ' '.join(gif.get('tags') or []) or 'RedGifs', - 'timestamp': int_or_none(gif.get('createDate')), - 'uploader': gif.get('userName'), - 'duration': int_or_none(gif.get('duration')), - 'view_count': int_or_none(gif.get('views')), - 'like_count': int_or_none(gif.get('likes')), - 'categories': gif.get('tags') or [], - 'age_limit': 18, - 'formats': formats, + return self.playlist_result( + entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') + + +class RedGifsUserIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs user' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' + _PAGE_SIZE = 30 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/users/lamsinka89', + 'info_dict': { + 'id': 'lamsinka89', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', + 'info_dict': { + 'id': 'lamsinka89?page=3', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_count': 30, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', + 'info_dict': { + 'id': 'lamsinka89?order=best&type=g', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by best' + }, + 'playlist_mincount': 100, } + ] + + def _real_extract(self, url): + username, query_str = self._match_valid_url(url).group('username', 'query') + playlist_id = f'{username}?{query_str}' if query_str else username + + query = compat_parse_qs(query_str) + order = query.get('order', ('recent',))[0] + + entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { + 'order': 'recent', + 'type': None, + }) + + return self.playlist_result( + entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}') diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 142d5dc3a..00a5b00cd 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -16,7 +16,7 @@ from ..utils import ( class ShemarooMeIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)' _TESTS = [{ 'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara', 'info_dict': { @@ -78,7 +78,7 @@ class ShemarooMeIE(InfoExtractor): iv = [0] * 16 m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv)) m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii') - formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) + formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) self._sort_formats(formats) release_date = self._html_search_regex( @@ -91,6 +91,7 @@ class ShemarooMeIE(InfoExtractor): subtitles.setdefault('EN', []).append({ 'url': self._proto_relative_url(sub_url), }) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False) return { diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py index b1d77951e..8e079ee31 100644 --- a/yt_dlp/extractor/skynewsau.py +++ b/yt_dlp/extractor/skynewsau.py @@ -9,7 +9,7 @@ from ..utils import ( class SkyNewsAUIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71', diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index 60e84529d..fe6a9554a 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -11,7 +11,7 @@ from ..utils import ( class ThreeSpeakIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)' + _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)' _TESTS = [{ 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy', @@ -75,7 +75,7 @@ class ThreeSpeakIE(InfoExtractor): class ThreeSpeakUserIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)' + _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)' _TESTS = [{ 'url': 'https://3speak.tv/user/theycallmedan', diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index a0f0cc31c..9d49840a5 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -17,6 +17,11 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} + def _call_api(self, video_id, query=None, data=None): + return self._download_json( + 'https://gql.trovo.live/', video_id, query=query, data=data, + headers={'Accept': 'application/json'}) + def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} username = streamer_info.get('userName') @@ -32,9 +37,8 @@ class TrovoIE(TrovoBaseIE): def _real_extract(self, url): username = self._match_id(url) - live_info = self._download_json( - 'https://gql.trovo.live/', username, query={ - 'query': '''{ + live_info = self._call_api(username, query={ + 'query': '''{ getLiveInfo(params: {userName: "%s"}) { isLive programInfo { @@ -53,7 +57,7 @@ class TrovoIE(TrovoBaseIE): } } }''' % username, - })['data']['getLiveInfo'] + })['data']['getLiveInfo'] if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -111,15 +115,14 @@ class TrovoVodIE(TrovoBaseIE): def _real_extract(self, url): vid = self._match_id(url) - resp = self._download_json( - 'https://gql.trovo.live/', vid, data=json.dumps([{ - 'query': '''{ + resp = self._call_api(vid, data=json.dumps([{ + 'query': '''{ batchGetVodDetailInfo(params: {vids: ["%s"]}) { VodDetailInfos } }''' % vid, - }, { - 'query': '''{ + }, { + 'query': '''{ getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { commentList { author { @@ -133,9 +136,7 @@ class TrovoVodIE(TrovoBaseIE): } } }''' % vid, - }]).encode(), headers={ - 'Content-Type': 'application/json', - }) + }]).encode()) vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] vod_info = vod_detail_info['vodInfo'] title = vod_info['title'] @@ -215,7 +216,7 @@ class TrovoChannelBaseIE(InfoExtractor): def _real_extract(self, url): id = self._match_id(url) - uid = str(self._download_json('https://gql.trovo.live/', id, query={ + uid = str(self._call_api(id, query={ 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id })['data']['getLiveInfo']['streamerInfo']['uid']) return self.playlist_result(self._entries(uid), playlist_id=uid) @@ -237,7 +238,7 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): _TYPE = 'video' def _get_vod_json(self, page, uid): - return self._download_json('https://gql.trovo.live/', uid, query={ + return self._call_api(uid, query={ 'query': self._QUERY % (page, uid) })['data']['getChannelLtvVideoInfos'] @@ -258,6 +259,6 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): _TYPE = 'clip' def _get_vod_json(self, page, uid): - return self._download_json('https://gql.trovo.live/', uid, query={ + return self._call_api(uid, query={ 'query': self._QUERY % (page, uid) })['data']['getChannelClipVideoInfos'] diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 4a25f0c55..4986635f2 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -13,7 +13,7 @@ from ..utils import ( class UtreonIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)' _TESTS = [{ 'url': 'https://utreon.com/v/z_I7ikQbuDw', 'info_dict': { diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index e2b86662b..27d5c969d 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -119,10 +119,9 @@ class VimeoBaseInfoExtractor(InfoExtractor): self._set_cookie('vimeo.com', name, value) def _vimeo_sort_formats(self, formats): - # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. This lead to wrong sorting. - # But since yt-dlp prefers 'res,fps' anyway, 'field_preference' is not needed - self._sort_formats(formats) + # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source')) def _parse_config(self, config, video_id): video_data = config['video'] @@ -140,6 +139,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): formats.append({ 'url': video_url, 'format_id': 'http-%s' % f.get('quality'), + 'source_preference': 10, 'width': int_or_none(f.get('width')), 'height': int_or_none(f.get('height')), 'fps': int_or_none(f.get('fps')), diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index e2944ec63..a9b66b95c 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -15,7 +15,7 @@ class VootIE(InfoExtractor): _VALID_URL = r'''(?x) (?: voot:| - (?:https?://)(?:www\.)?voot\.com/? + https?://(?:www\.)?voot\.com/? (?: movies/[^/]+/| (?:shows|kids)/(?:[^/]+/){4} diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index ba135613b..e4854bead 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2514,7 +2514,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): query = parse_qs(fmt_url) throttled = False - if query.get('ratebypass') != ['yes'] and query.get('n'): + if query.get('n'): try: fmt_url = update_url_query(fmt_url, { 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 536604167..462bc4efe 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -21,7 +21,7 @@ class Zee5IE(InfoExtractor): _VALID_URL = r'''(?x) (?: zee5:| - (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)? + https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?: (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3} |movies/[^#/?]+ @@ -174,7 +174,7 @@ class Zee5SeriesIE(InfoExtractor): _VALID_URL = r'''(?x) (?: zee5:series:| - (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)? + https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/ ) (?P<id>[^#/?]+)/?(?:$|[?#]) diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index a6084ab82..0af891bd3 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -25,6 +25,8 @@ _ASSIGN_OPERATORS.append(('=', (lambda cur, right: right))) _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' +_MATCHING_PARENS = dict(zip('({[', ')}]')) + class JS_Break(ExtractorError): def __init__(self): @@ -88,26 +90,24 @@ class JSInterpreter(object): def _seperate(expr, delim=',', max_split=None): if not expr: return - parens = {'(': 0, '{': 0, '[': 0, ']': 0, '}': 0, ')': 0} - start, splits, pos, max_pos = 0, 0, 0, len(delim) - 1 + counters = {k: 0 for k in _MATCHING_PARENS.values()} + start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1 for idx, char in enumerate(expr): - if char in parens: - parens[char] += 1 - is_in_parens = (parens['['] - parens[']'] - or parens['('] - parens[')'] - or parens['{'] - parens['}']) - if char == delim[pos] and not is_in_parens: - if pos == max_pos: - pos = 0 - yield expr[start: idx - max_pos] - start = idx + 1 - splits += 1 - if max_split and splits >= max_split: - break - else: - pos += 1 - else: + if char in _MATCHING_PARENS: + counters[_MATCHING_PARENS[char]] += 1 + elif char in counters: + counters[char] -= 1 + if char != delim[pos] or any(counters.values()): pos = 0 + continue + elif pos != delim_len: + pos += 1 + continue + yield expr[start: idx - delim_len] + start, pos = idx + 1, 0 + splits += 1 + if max_split and splits >= max_split: + break yield expr[start:] @staticmethod diff --git a/yt_dlp/minicurses.py b/yt_dlp/minicurses.py index 699b1158a..c81153c1e 100644 --- a/yt_dlp/minicurses.py +++ b/yt_dlp/minicurses.py @@ -78,6 +78,7 @@ class MultilinePrinterBase: def __init__(self, stream=None, lines=1): self.stream = stream self.maximum = lines - 1 + self._HAVE_FULLCAP = supports_terminal_sequences(stream) def __enter__(self): return self @@ -124,7 +125,6 @@ class MultilinePrinter(MultilinePrinterBase): self.preserve_output = preserve_output self._lastline = self._lastlength = 0 self._movelock = Lock() - self._HAVE_FULLCAP = supports_terminal_sequences(self.stream) def lock(func): @functools.wraps(func) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 9da37af28..2750a0f7b 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -255,6 +255,16 @@ def parseOpts(overrideArguments=None): action='store_false', dest='extract_flat', help='Extract the videos of a playlist') general.add_option( + '--wait-for-video', + dest='wait_for_video', metavar='MIN[-MAX]', default=None, + help=( + 'Wait for scheduled streams to become available. ' + 'Pass the minimum number of seconds (or range) to wait between retries')) + general.add_option( + '--no-wait-for-video', + dest='wait_for_video', action='store_const', const=None, + help='Do not wait for scheduled streams (default)') + general.add_option( '--mark-watched', action='store_true', dest='mark_watched', default=False, help='Mark videos watched (even with --simulate). Currently only supported for YouTube') @@ -364,10 +374,6 @@ def parseOpts(overrideArguments=None): dest='rejecttitle', metavar='REGEX', help=optparse.SUPPRESS_HELP) selection.add_option( - '--max-downloads', - dest='max_downloads', metavar='NUMBER', type=int, default=None, - help='Abort after downloading NUMBER files') - selection.add_option( '--min-filesize', metavar='SIZE', dest='min_filesize', default=None, help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)') @@ -438,6 +444,14 @@ def parseOpts(overrideArguments=None): dest='download_archive', help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it') selection.add_option( + '--no-download-archive', + dest='download_archive', action="store_const", const=None, + help='Do not use archive file (default)') + selection.add_option( + '--max-downloads', + dest='max_downloads', metavar='NUMBER', type=int, default=None, + help='Abort after downloading NUMBER files') + selection.add_option( '--break-on-existing', action='store_true', dest='break_on_existing', default=False, help='Stop the download process when encountering a file that is in the archive') @@ -446,14 +460,18 @@ def parseOpts(overrideArguments=None): action='store_true', dest='break_on_reject', default=False, help='Stop the download process when encountering a file that has been filtered out') selection.add_option( + '--break-per-input', + action='store_true', dest='break_per_url', default=False, + help='Make --break-on-existing and --break-on-reject act only on the current input URL') + selection.add_option( + '--no-break-per-input', + action='store_false', dest='break_per_url', + help='--break-on-existing and --break-on-reject terminates the entire download queue') + selection.add_option( '--skip-playlist-after-errors', metavar='N', dest='skip_playlist_after_errors', default=None, type=int, help='Number of allowed failures until the rest of the playlist is skipped') selection.add_option( - '--no-download-archive', - dest='download_archive', action="store_const", const=None, - help='Do not use archive file (default)') - selection.add_option( '--include-ads', dest='include_ads', action='store_true', help=optparse.SUPPRESS_HELP) @@ -1132,7 +1150,7 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--cookies', dest='cookiefile', metavar='FILE', - help='File to read cookies from and dump cookie jar in') + help='Netscape formatted file to read cookies from and dump cookie jar in') filesystem.add_option( '--no-cookies', action='store_const', const=None, dest='cookiefile', metavar='FILE', @@ -1332,7 +1350,7 @@ def parseOpts(overrideArguments=None): 'Automatically correct known faults of the file. ' 'One of never (do nothing), warn (only emit a warning), ' 'detect_or_warn (the default; fix file if we can, warn otherwise), ' - 'force (try fixing even if file already exists')) + 'force (try fixing even if file already exists)')) postproc.add_option( '--prefer-avconv', '--no-prefer-ffmpeg', action='store_false', dest='prefer_ffmpeg', diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index b36716743..ab9eb6acf 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -9,6 +9,7 @@ from ..utils import ( _configuration_args, encodeFilename, PostProcessingError, + write_string, ) @@ -74,6 +75,11 @@ class PostProcessor(metaclass=PostProcessorMetaClass): if self._downloader: return self._downloader.report_warning(text, *args, **kwargs) + def deprecation_warning(self, text): + if self._downloader: + return self._downloader.deprecation_warning(text) + write_string(f'DeprecationWarning: {text}') + def report_error(self, text, *args, **kwargs): # Exists only for compatibility. Do not use if self._downloader: diff --git a/yt_dlp/postprocessor/exec.py b/yt_dlp/postprocessor/exec.py index 7a3cb4999..28a7c3d70 100644 --- a/yt_dlp/postprocessor/exec.py +++ b/yt_dlp/postprocessor/exec.py @@ -38,5 +38,10 @@ class ExecPP(PostProcessor): return [], info -class ExecAfterDownloadPP(ExecPP): # for backward compatibility - pass +# Deprecated +class ExecAfterDownloadPP(ExecPP): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.deprecation_warning( + 'yt_dlp.postprocessor.ExecAfterDownloadPP is deprecated ' + 'and may be removed in a future version. Use yt_dlp.postprocessor.ExecPP instead') diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index f712547a8..609f97e47 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -167,6 +167,13 @@ class FFmpegPostProcessor(PostProcessor): self.probe_basename = p break + if self.basename == 'avconv': + self.deprecation_warning( + 'Support for avconv is deprecated and may be removed in a future version. Use ffmpeg instead') + if self.probe_basename == 'avprobe': + self.deprecation_warning( + 'Support for avprobe is deprecated and may be removed in a future version. Use ffprobe instead') + @property def available(self): return self.basename is not None diff --git a/yt_dlp/postprocessor/metadataparser.py b/yt_dlp/postprocessor/metadataparser.py index 96aac9beb..54b2c5627 100644 --- a/yt_dlp/postprocessor/metadataparser.py +++ b/yt_dlp/postprocessor/metadataparser.py @@ -16,7 +16,7 @@ class MetadataParserPP(PostProcessor): for f in actions: action = f[0] assert isinstance(action, self.Actions) - self._actions.append(getattr(self, action._value_)(*f[1:])) + self._actions.append(getattr(self, action.value)(*f[1:])) @classmethod def validate_action(cls, action, *data): @@ -26,7 +26,7 @@ class MetadataParserPP(PostProcessor): ''' if not isinstance(action, cls.Actions): raise ValueError(f'{action!r} is not a valid action') - getattr(cls, action._value_)(cls, *data) + getattr(cls, action.value)(cls, *data) @staticmethod def field_to_template(tmpl): @@ -96,6 +96,7 @@ class MetadataParserPP(PostProcessor): return f +# Deprecated class MetadataFromFieldPP(MetadataParserPP): @classmethod def to_action(cls, f): @@ -108,9 +109,16 @@ class MetadataFromFieldPP(MetadataParserPP): match.group('out')) def __init__(self, downloader, formats): - MetadataParserPP.__init__(self, downloader, [self.to_action(f) for f in formats]) + super().__init__(self, downloader, [self.to_action(f) for f in formats]) + self.deprecation_warning( + 'yt_dlp.postprocessor.MetadataFromFieldPP is deprecated ' + 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataParserPP instead') -class MetadataFromTitlePP(MetadataParserPP): # for backward compatibility +# Deprecated +class MetadataFromTitlePP(MetadataParserPP): def __init__(self, downloader, titleformat): - MetadataParserPP.__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) + super().__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) + self.deprecation_warning( + 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated ' + 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataParserPP instead') diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 37e7411e4..86149aeef 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -22,13 +22,18 @@ class SponSkrubPP(PostProcessor): _temp_ext = 'spons' _exe_name = 'sponskrub' - def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False): + def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False, _from_cli=False): PostProcessor.__init__(self, downloader) self.force = force self.cutout = cut self.args = str_or_none(args) or '' # For backward compatibility self.path = self.get_exe(path) + if not _from_cli: + self.deprecation_warning( + 'yt_dlp.postprocessor.SponSkrubPP support is deprecated and may be removed in a future version. ' + 'Use yt_dlp.postprocessor.SponsorBlock and yt_dlp.postprocessor.ModifyChaptersPP instead') + if not ignoreerror and self.path is None: if path: raise PostProcessingError('sponskrub not found in "%s"' % path) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 36597d41a..5537d63be 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2575,10 +2575,6 @@ class PostProcessingError(YoutubeDLError): indicate an error in the postprocessing task. """ - def __init__(self, msg): - super(PostProcessingError, self).__init__(msg) - self.msg = msg - class DownloadCancelled(YoutubeDLError): """ Exception raised when the download queue should be interrupted """ @@ -2600,10 +2596,21 @@ class MaxDownloadsReached(DownloadCancelled): msg = 'Maximum number of downloads reached, stopping due to --max-downloads' -class ThrottledDownload(YoutubeDLError): +class ReExtractInfo(YoutubeDLError): + """ Video info needs to be re-extracted. """ + + def __init__(self, msg, expected=False): + super().__init__(msg) + self.expected = expected + + +class ThrottledDownload(ReExtractInfo): """ Download speed below --throttled-rate. """ msg = 'The download speed is below throttle limit' + def __init__(self, msg): + super().__init__(msg, expected=False) + class UnavailableVideoError(YoutubeDLError): """Unavailable Format exception. @@ -6545,10 +6552,11 @@ def traverse_obj( return default +# Deprecated def traverse_dict(dictn, keys, casesense=True): - ''' For backward compatibility. Do not use ''' - return traverse_obj(dictn, keys, casesense=casesense, - is_user_input=True, traverse_string=True) + write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated ' + 'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead') + return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True) def variadic(x, allowed_types=(str, bytes)): diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 5290afa2d..aa8fd80a3 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,5 @@ -from __future__ import unicode_literals +# Autogenerated by devscripts/update-version.py __version__ = '2021.11.10.1' + +RELEASE_GIT_HEAD = '7144b697f' |