diff options
author | Jesús <heckyel@hyperbola.info> | 2021-12-15 13:01:17 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-12-15 13:01:17 -0500 |
commit | 97eb14c3689ac0589c41948161bc3ca23be31d95 (patch) | |
tree | 1b4ccb2b7243fc09ca9baf1f7fe907436cc4af4a | |
parent | 8389a1296991bbb0b36c61dce7d0691c69d865bf (diff) | |
parent | 3116be32b404cbeca066d73a14079a11b8072faf (diff) | |
download | hypervideo-pre-97eb14c3689ac0589c41948161bc3ca23be31d95.tar.lz hypervideo-pre-97eb14c3689ac0589c41948161bc3ca23be31d95.tar.xz hypervideo-pre-97eb14c3689ac0589c41948161bc3ca23be31d95.zip |
updated from upstream | 15/12/2021 at 13:01
69 files changed, 255 insertions, 225 deletions
diff --git a/test/helper.py b/test/helper.py index 5c0e645f9..aef78c79d 100644 --- a/test/helper.py +++ b/test/helper.py @@ -207,10 +207,47 @@ def expect_info_dict(self, got_dict, expected_dict): for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(got_dict.get(key), 'Missing field: %s' % key) - # Are checkable fields missing from the test case definition? - test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value)) - for key, value in got_dict.items() - if value and key in ('id', 'title', 'description', 'uploader', 'upload_date', 'timestamp', 'uploader_id', 'location', 'age_limit')) + ignored_fields = ( + # Format keys + 'url', 'manifest_url', 'format', 'format_id', 'format_note', 'width', 'height', 'resolution', + 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'vbr', 'fps', 'vcodec', 'container', 'filesize', + 'filesize_approx', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'preference', + 'language', 'language_preference', 'quality', 'source_preference', 'http_headers', + 'stretched_ratio', 'no_resume', 'has_drm', 'downloader_options', + + # RTMP formats + 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', + + # Lists + 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', + + # Auto-generated + 'playlist', 'format_index', 'webpage_url', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'fulltitle', + 'extractor', 'extractor_key', 'original_url', 'webpage_url_basename', 'filepath', 'infojson_filename', + + # Only live_status needs to be checked + 'is_live', 'was_live', + ) + + ignored_prefixes = ('', 'playlist', 'requested') + + def sanitize(key, value): + if isinstance(value, str) and len(value) > 100: + return f'md5:{md5(value)}' + elif isinstance(value, list) and len(value) > 10: + return f'count:{len(value)}' + return value + + test_info_dict = { + key: sanitize(key, value) for key, value in got_dict.items() + if value is not None and key not in ignored_fields and not any( + key.startswith(f'{prefix}_') for prefix in ignored_prefixes) + } + + # display_id may be generated from id + if test_info_dict.get('display_id') == test_info_dict['id']: + test_info_dict.pop('display_id') + missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index bd1d01584..0a5a2611b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -70,6 +70,7 @@ from .utils import ( format_field, formatSeconds, GeoRestrictedError, + get_domain, HEADRequest, int_or_none, iri_to_uri, @@ -633,13 +634,6 @@ class YoutubeDL(object): self.print_debug_header() self.add_default_info_extractors() - for pp_def_raw in self.params.get('postprocessors', []): - pp_def = dict(pp_def_raw) - when = pp_def.pop('when', 'post_process') - pp_class = get_postprocessor(pp_def.pop('key')) - pp = pp_class(self, **compat_kwargs(pp_def)) - self.add_post_processor(pp, when=when) - hooks = { 'post_hooks': self.add_post_hook, 'progress_hooks': self.add_progress_hook, @@ -649,6 +643,13 @@ class YoutubeDL(object): for ph in self.params.get(opt, []): fn(ph) + for pp_def_raw in self.params.get('postprocessors', []): + pp_def = dict(pp_def_raw) + when = pp_def.pop('when', 'post_process') + self.add_post_processor( + get_postprocessor(pp_def.pop('key'))(self, **compat_kwargs(pp_def)), + when=when) + register_socks_protocols() def preload_download_archive(fn): @@ -735,6 +736,9 @@ class YoutubeDL(object): def add_postprocessor_hook(self, ph): """Add the postprocessing progress hook""" self._postprocessor_hooks.append(ph) + for pps in self._pps.values(): + for pp in pps: + pp.add_progress_hook(ph) def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): @@ -1420,6 +1424,7 @@ class YoutubeDL(object): 'webpage_url': url, 'original_url': url, 'webpage_url_basename': url_basename(url), + 'webpage_url_domain': get_domain(url), }) if ie is not None: self.add_extra_info(ie_result, { @@ -1544,6 +1549,7 @@ class YoutubeDL(object): 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], }) return r @@ -1705,6 +1711,7 @@ class YoutubeDL(object): 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), + 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], } @@ -2665,6 +2672,9 @@ class YoutubeDL(object): if self._num_downloads >= int(max_downloads): raise MaxDownloadsReached() + if info_dict.get('is_live'): + info_dict['title'] += ' ' + datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + # TODO: backward compatibility, to be removed info_dict['fulltitle'] = info_dict['title'] diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 3e202168e..e3369306c 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -233,8 +233,6 @@ class ABCIViewIE(InfoExtractor): }] is_live = video_params.get('livestream') == '1' - if is_live: - title = self._live_title(title) return { 'id': video_id, diff --git a/yt_dlp/extractor/adobeconnect.py b/yt_dlp/extractor/adobeconnect.py index 728549eb9..e688dddcb 100644 --- a/yt_dlp/extractor/adobeconnect.py +++ b/yt_dlp/extractor/adobeconnect.py @@ -31,7 +31,7 @@ class AdobeConnectIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'is_live': is_live, } diff --git a/yt_dlp/extractor/arcpublishing.py b/yt_dlp/extractor/arcpublishing.py index 5a9b8181a..1943fd5f8 100644 --- a/yt_dlp/extractor/arcpublishing.py +++ b/yt_dlp/extractor/arcpublishing.py @@ -158,7 +158,7 @@ class ArcPublishingIE(InfoExtractor): return { 'id': uuid, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'thumbnail': try_get(video, lambda x: x['promo_image']['url']), 'description': try_get(video, lambda x: x['subheadlines']['basic']), 'formats': formats, diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index f8d57109e..1aff0361c 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -280,7 +280,7 @@ class ARDMediathekIE(ARDMediathekBaseIE): info.update({ 'id': video_id, - 'title': self._live_title(title) if info.get('is_live') else title, + 'title': title, 'description': description, 'thumbnail': thumbnail, }) diff --git a/yt_dlp/extractor/awaan.py b/yt_dlp/extractor/awaan.py index 22cc10d98..b5d1b57af 100644 --- a/yt_dlp/extractor/awaan.py +++ b/yt_dlp/extractor/awaan.py @@ -41,7 +41,7 @@ class AWAANBaseIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': video_data.get('description_en') or video_data.get('description_ar'), 'thumbnail': 'http://admin.mangomolo.com/analytics/%s' % img if img else None, 'duration': int_or_none(video_data.get('duration')), diff --git a/yt_dlp/extractor/bitwave.py b/yt_dlp/extractor/bitwave.py index eb16c469d..e6e093f59 100644 --- a/yt_dlp/extractor/bitwave.py +++ b/yt_dlp/extractor/bitwave.py @@ -51,7 +51,7 @@ class BitwaveStreamIE(InfoExtractor): return { 'id': username, - 'title': self._live_title(channel['data']['title']), + 'title': channel['data']['title'], 'uploader': username, 'uploader_id': username, 'formats': formats, diff --git a/yt_dlp/extractor/bongacams.py b/yt_dlp/extractor/bongacams.py index 9e7551136..4e346e7b6 100644 --- a/yt_dlp/extractor/bongacams.py +++ b/yt_dlp/extractor/bongacams.py @@ -49,7 +49,7 @@ class BongaCamsIE(InfoExtractor): return { 'id': channel_id, - 'title': self._live_title(uploader or uploader_id), + 'title': uploader or uploader_id, 'uploader': uploader, 'uploader_id': uploader_id, 'like_count': like_count, diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py index 82bb76f29..dcd332b43 100644 --- a/yt_dlp/extractor/brightcove.py +++ b/yt_dlp/extractor/brightcove.py @@ -581,10 +581,9 @@ class BrightcoveNewIE(AdobePassIE): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': clean_html(json_data.get('description')), - 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), - 'thumbnials': thumbnails, + 'thumbnails': thumbnails, 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), 'uploader_id': json_data.get('account_id'), diff --git a/yt_dlp/extractor/cam4.py b/yt_dlp/extractor/cam4.py index 30daf2be9..f47de9176 100644 --- a/yt_dlp/extractor/cam4.py +++ b/yt_dlp/extractor/cam4.py @@ -25,7 +25,7 @@ class CAM4IE(InfoExtractor): return { 'id': channel_id, - 'title': self._live_title(channel_id), + 'title': channel_id, 'is_live': True, 'age_limit': 18, 'formats': formats, diff --git a/yt_dlp/extractor/cammodels.py b/yt_dlp/extractor/cammodels.py index eb2a8b4c6..3dc19377b 100644 --- a/yt_dlp/extractor/cammodels.py +++ b/yt_dlp/extractor/cammodels.py @@ -91,7 +91,7 @@ class CamModelsIE(InfoExtractor): return { 'id': user_id, - 'title': self._live_title(user_id), + 'title': user_id, 'is_live': True, 'formats': formats, 'age_limit': 18 diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 6ca2f38b5..6c90b247e 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -212,8 +212,6 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_len == 1: final_title = playlist_title or title - if is_live: - final_title = self._live_title(final_title) else: final_title = '%s (%s)' % (playlist_title, title) diff --git a/yt_dlp/extractor/chaturbate.py b/yt_dlp/extractor/chaturbate.py index a459dcb8d..8da51f919 100644 --- a/yt_dlp/extractor/chaturbate.py +++ b/yt_dlp/extractor/chaturbate.py @@ -101,7 +101,7 @@ class ChaturbateIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(video_id), + 'title': video_id, 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id, 'age_limit': self._rta_search(webpage), 'is_live': True, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index d8fc5272c..ebf2e3cea 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import base64 import collections -import datetime import hashlib import itertools import json @@ -1606,6 +1605,11 @@ class InfoExtractor(object): 'res': {'type': 'multiple', 'field': ('height', 'width'), 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, + # For compatibility with youtube-dl + 'format_id': {'type': 'alias', 'field': 'id'}, + 'preference': {'type': 'alias', 'field': 'ie_pref'}, + 'language_preference': {'type': 'alias', 'field': 'lang'}, + # Deprecated 'dimension': {'type': 'alias', 'field': 'res'}, 'resolution': {'type': 'alias', 'field': 'res'}, @@ -1615,7 +1619,6 @@ class InfoExtractor(object): 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, 'framerate': {'type': 'alias', 'field': 'fps'}, - 'language_preference': {'type': 'alias', 'field': 'lang'}, 'protocol': {'type': 'alias', 'field': 'proto'}, 'source_preference': {'type': 'alias', 'field': 'source'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, @@ -1630,9 +1633,7 @@ class InfoExtractor(object): 'audio': {'type': 'alias', 'field': 'hasaud'}, 'has_audio': {'type': 'alias', 'field': 'hasaud'}, 'extractor': {'type': 'alias', 'field': 'ie_pref'}, - 'preference': {'type': 'alias', 'field': 'ie_pref'}, 'extractor_preference': {'type': 'alias', 'field': 'ie_pref'}, - 'format_id': {'type': 'alias', 'field': 'id'}, } def __init__(self, ie, field_preference): @@ -1732,9 +1733,10 @@ class InfoExtractor(object): continue if self._get_field_setting(field, 'type') == 'alias': alias, field = field, self._get_field_setting(field, 'field') - self.ydl.deprecation_warning( - f'Format sorting alias {alias} is deprecated ' - f'and may be removed in a future version. Please use {field} instead') + if alias not in ('format_id', 'preference', 'language_preference'): + self.ydl.deprecation_warning( + f'Format sorting alias {alias} is deprecated ' + f'and may be removed in a future version. Please use {field} instead') reverse = match.group('reverse') is not None closest = match.group('separator') == '~' limit_text = match.group('limit') @@ -3453,10 +3455,8 @@ class InfoExtractor(object): return formats def _live_title(self, name): - """ Generate the title for a live video """ - now = datetime.datetime.now() - now_str = now.strftime('%Y-%m-%d %H:%M') - return name + ' ' + now_str + self._downloader.deprecation_warning('yt_dlp.InfoExtractor._live_title is deprecated and does not work as expected') + return name def _int(self, v, name, fatal=False, **kwargs): res = int_or_none(v, **kwargs) diff --git a/yt_dlp/extractor/dailymotion.py b/yt_dlp/extractor/dailymotion.py index e04e10b86..b4211e1e4 100644 --- a/yt_dlp/extractor/dailymotion.py +++ b/yt_dlp/extractor/dailymotion.py @@ -305,7 +305,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': clean_html(media.get('description')), 'thumbnails': thumbnails, 'duration': int_or_none(metadata.get('duration')) or None, diff --git a/yt_dlp/extractor/dlive.py b/yt_dlp/extractor/dlive.py index 90462c0ab..7410eb6c8 100644 --- a/yt_dlp/extractor/dlive.py +++ b/yt_dlp/extractor/dlive.py @@ -84,7 +84,7 @@ class DLiveStreamIE(InfoExtractor): self._sort_formats(formats) return { 'id': display_name, - 'title': self._live_title(title), + 'title': title, 'uploader': display_name, 'uploader_id': username, 'formats': formats, diff --git a/yt_dlp/extractor/douyutv.py b/yt_dlp/extractor/douyutv.py index 9757f4422..26a8d645c 100644 --- a/yt_dlp/extractor/douyutv.py +++ b/yt_dlp/extractor/douyutv.py @@ -105,7 +105,7 @@ class DouyuTVIE(InfoExtractor): 'aid': 'pcclient' })['data']['live_url'] - title = self._live_title(unescapeHTML(room['room_name'])) + title = unescapeHTML(room['room_name']) description = room.get('show_details') thumbnail = room.get('room_src') uploader = room.get('nickname') diff --git a/yt_dlp/extractor/drtv.py b/yt_dlp/extractor/drtv.py index 7bb15f8d4..70134204c 100644 --- a/yt_dlp/extractor/drtv.py +++ b/yt_dlp/extractor/drtv.py @@ -321,7 +321,7 @@ class DRTVLiveIE(InfoExtractor): channel_data = self._download_json( 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id, channel_id) - title = self._live_title(channel_data['Title']) + title = channel_data['Title'] formats = [] for streaming_server in channel_data.get('StreamingServers', []): diff --git a/yt_dlp/extractor/filmon.py b/yt_dlp/extractor/filmon.py index f775fe0ba..7b43ecc0f 100644 --- a/yt_dlp/extractor/filmon.py +++ b/yt_dlp/extractor/filmon.py @@ -170,7 +170,7 @@ class FilmOnChannelIE(InfoExtractor): return { 'id': channel_id, 'display_id': channel_data.get('alias'), - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': channel_data.get('description'), 'thumbnails': thumbnails, 'formats': formats, diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index bc5ef4df9..877c5c055 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -203,7 +203,7 @@ class FranceTVIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'thumbnail': image, 'duration': duration, 'timestamp': timestamp, diff --git a/yt_dlp/extractor/freshlive.py b/yt_dlp/extractor/freshlive.py index 72a845945..ad19b8109 100644 --- a/yt_dlp/extractor/freshlive.py +++ b/yt_dlp/extractor/freshlive.py @@ -59,9 +59,6 @@ class FreshLiveIE(InfoExtractor): stream_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') - if is_live: - title = self._live_title(title) - return { 'id': video_id, 'formats': formats, diff --git a/yt_dlp/extractor/hitbox.py b/yt_dlp/extractor/hitbox.py index 3e5ff2685..0470d0a99 100644 --- a/yt_dlp/extractor/hitbox.py +++ b/yt_dlp/extractor/hitbox.py @@ -209,6 +209,6 @@ class HitboxLiveIE(HitboxIE): 'https://www.smashcast.tv/api/media/live', video_id) metadata['formats'] = formats metadata['is_live'] = True - metadata['title'] = self._live_title(metadata.get('title')) + metadata['title'] = metadata.get('title') return metadata diff --git a/yt_dlp/extractor/imggaming.py b/yt_dlp/extractor/imggaming.py index ef20a4b9e..14d3fad55 100644 --- a/yt_dlp/extractor/imggaming.py +++ b/yt_dlp/extractor/imggaming.py @@ -88,7 +88,7 @@ class ImgGamingBaseIE(InfoExtractor): video_data = self._download_json(dve_api_url, media_id) is_live = media_type == 'live' if is_live: - title = self._live_title(self._call_api('event/', media_id)['title']) + title = self._call_api('event/', media_id)['title'] else: title = video_data['name'] diff --git a/yt_dlp/extractor/ivideon.py b/yt_dlp/extractor/ivideon.py index 01e7b22d4..44b220846 100644 --- a/yt_dlp/extractor/ivideon.py +++ b/yt_dlp/extractor/ivideon.py @@ -75,7 +75,7 @@ class IvideonIE(InfoExtractor): return { 'id': server_id, - 'title': self._live_title(camera_name or server_id), + 'title': camera_name or server_id, 'description': description, 'is_live': True, 'formats': formats, diff --git a/yt_dlp/extractor/laola1tv.py b/yt_dlp/extractor/laola1tv.py index fa217365a..b5d27c2f0 100644 --- a/yt_dlp/extractor/laola1tv.py +++ b/yt_dlp/extractor/laola1tv.py @@ -112,7 +112,7 @@ class Laola1TvEmbedIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'upload_date': unified_strdate(_v('time_date')), 'uploader': _v('meta_organisation'), 'categories': categories, @@ -161,7 +161,7 @@ class Laola1TvBaseIE(Laola1TvEmbedIE): return { 'id': video_id, 'display_id': display_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': video_data.get('description'), 'thumbnail': video_data.get('image'), 'categories': categories, diff --git a/yt_dlp/extractor/line.py b/yt_dlp/extractor/line.py index d4bcae6c1..e1d5f21e1 100644 --- a/yt_dlp/extractor/line.py +++ b/yt_dlp/extractor/line.py @@ -116,7 +116,7 @@ class LineLiveBaseIE(InfoExtractor): return { 'id': broadcast_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'thumbnails': thumbnails, 'timestamp': int_or_none(item.get('createdAt')), 'channel': channel.get('name'), diff --git a/yt_dlp/extractor/livestream.py b/yt_dlp/extractor/livestream.py index f591289ec..45bf26d26 100644 --- a/yt_dlp/extractor/livestream.py +++ b/yt_dlp/extractor/livestream.py @@ -176,7 +176,7 @@ class LivestreamIE(InfoExtractor): return { 'id': broadcast_id, 'formats': formats, - 'title': self._live_title(stream_info['stream_title']) if is_live else stream_info['stream_title'], + 'title': stream_info['stream_title'], 'thumbnail': stream_info.get('thumbnail_url'), 'is_live': is_live, } @@ -344,7 +344,7 @@ class LivestreamOriginalIE(InfoExtractor): is_live = video_data.get('isLive') info.update({ 'id': content_id, - 'title': self._live_title(info['title']) if is_live else info['title'], + 'title': info['title'], 'formats': self._extract_video_formats(video_data, content_id), 'is_live': is_live, }) diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index acee370e9..68ce138b3 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -33,7 +33,7 @@ class MangomoloBaseIE(InfoExtractor): return { 'id': page_id, - 'title': self._live_title(page_id) if self._IS_LIVE else page_id, + 'title': page_id, 'uploader_id': hidden_inputs.get('userid'), 'duration': int_or_none(hidden_inputs.get('duration')), 'is_live': self._IS_LIVE, diff --git a/yt_dlp/extractor/matchtv.py b/yt_dlp/extractor/matchtv.py index bc9933a81..e003b8d25 100644 --- a/yt_dlp/extractor/matchtv.py +++ b/yt_dlp/extractor/matchtv.py @@ -49,7 +49,7 @@ class MatchTVIE(InfoExtractor): self._sort_formats(formats) return { 'id': video_id, - 'title': self._live_title('Матч ТВ - Прямой эфир'), + 'title': 'Матч ТВ - Прямой эфир', 'is_live': True, 'formats': formats, } diff --git a/yt_dlp/extractor/muenchentv.py b/yt_dlp/extractor/muenchentv.py index d256236d1..a53929e1b 100644 --- a/yt_dlp/extractor/muenchentv.py +++ b/yt_dlp/extractor/muenchentv.py @@ -33,7 +33,7 @@ class MuenchenTVIE(InfoExtractor): display_id = 'live' webpage = self._download_webpage(url, display_id) - title = self._live_title(self._og_search_title(webpage)) + title = self._og_search_title(webpage) data_js = self._search_regex( r'(?s)\nplaylist:\s*(\[.*?}\]),', diff --git a/yt_dlp/extractor/nbc.py b/yt_dlp/extractor/nbc.py index f304f191a..cd573690b 100644 --- a/yt_dlp/extractor/nbc.py +++ b/yt_dlp/extractor/nbc.py @@ -305,7 +305,7 @@ class NBCSportsStreamIE(AdobePassIE): self._sort_formats(formats) return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': live_source.get('description'), 'formats': formats, 'is_live': is_live, @@ -545,8 +545,6 @@ class NBCOlympicsStreamIE(AdobePassIE): title = event_config['eventTitle'] is_live = {'live': True, 'replay': False}.get(event_config.get('eventStatus')) - if is_live: - title = self._live_title(title) source_url = self._download_json( f'https://api-leap.nbcsports.com/feeds/assets/{pid}?application=NBCOlympics&platform=desktop&format=nbc-player&env=staging', diff --git a/yt_dlp/extractor/ndr.py b/yt_dlp/extractor/ndr.py index f2bae2c1a..1917254b8 100644 --- a/yt_dlp/extractor/ndr.py +++ b/yt_dlp/extractor/ndr.py @@ -245,8 +245,6 @@ class NDREmbedBaseIE(InfoExtractor): live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive'] title = config['title'] - if live: - title = self._live_title(title) uploader = ppjson.get('config', {}).get('branding') upload_date = ppjson.get('config', {}).get('publicationDate') duration = int_or_none(config.get('duration')) diff --git a/yt_dlp/extractor/nfl.py b/yt_dlp/extractor/nfl.py index 871923e4c..821276a31 100644 --- a/yt_dlp/extractor/nfl.py +++ b/yt_dlp/extractor/nfl.py @@ -89,7 +89,7 @@ class NFLBaseIE(InfoExtractor): 'ext': determine_ext(image_url, 'jpg'), }] info.update({ - 'title': self._live_title(title) if is_live else title, + 'title': title, 'is_live': is_live, 'description': clean_html(item.get('description')), 'thumbnails': thumbnails, diff --git a/yt_dlp/extractor/npo.py b/yt_dlp/extractor/npo.py index ed547d04b..a8aaef6f1 100644 --- a/yt_dlp/extractor/npo.py +++ b/yt_dlp/extractor/npo.py @@ -467,7 +467,7 @@ class NPOIE(NPOBaseIE): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': metadata.get('info'), 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], 'upload_date': unified_strdate(metadata.get('gidsdatum')), @@ -561,7 +561,7 @@ class NPORadioIE(InfoExtractor): return { 'id': video_id, 'url': stream['url'], - 'title': self._live_title(title), + 'title': title, 'acodec': codec, 'ext': codec, 'is_live': True, diff --git a/yt_dlp/extractor/odnoklassniki.py b/yt_dlp/extractor/odnoklassniki.py index 9cacd3815..42f210a9b 100644 --- a/yt_dlp/extractor/odnoklassniki.py +++ b/yt_dlp/extractor/odnoklassniki.py @@ -12,6 +12,7 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + float_or_none, unified_strdate, int_or_none, qualities, @@ -97,6 +98,14 @@ class OdnoklassnikiIE(InfoExtractor): }, 'skip': 'Video has not been found', }, { + 'note': 'Only available in mobile webpage', + 'url': 'https://m.ok.ru/video/2361249957145', + 'info_dict': { + 'id': '2361249957145', + 'title': 'Быковское крещение', + 'duration': 3038.181, + }, + }, { 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452', 'only_matching': True, }, { @@ -131,13 +140,24 @@ class OdnoklassnikiIE(InfoExtractor): return mobj.group('url') def _real_extract(self, url): + try: + return self._extract_desktop(url) + except ExtractorError as e: + try: + return self._extract_mobile(url) + except ExtractorError: + # error message of desktop webpage is in English + raise e + + def _extract_desktop(self, url): start_time = int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0]) video_id = self._match_id(url) webpage = self._download_webpage( - 'http://ok.ru/video/%s' % video_id, video_id) + 'http://ok.ru/video/%s' % video_id, video_id, + note='Downloading desktop webpage') error = self._search_regex( r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<', @@ -215,7 +235,7 @@ class OdnoklassnikiIE(InfoExtractor): assert title if provider == 'LIVE_TV_APP': - info['title'] = self._live_title(title) + info['title'] = title quality = qualities(('4', '0', '1', '2', '3', '5')) @@ -265,3 +285,32 @@ class OdnoklassnikiIE(InfoExtractor): info['formats'] = formats return info + + def _extract_mobile(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage( + 'http://m.ok.ru/video/%s' % video_id, video_id, + note='Downloading mobile webpage') + + error = self._search_regex( + r'видео</a>\s*<div\s+class="empty">(.+?)</div>', + webpage, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + + json_data = self._search_regex( + r'data-video="(.+?)"', webpage, 'json data') + json_data = self._parse_json(unescapeHTML(json_data), video_id) or {} + + return { + 'id': video_id, + 'title': json_data.get('videoName'), + 'duration': float_or_none(json_data.get('videoDuration'), scale=1000), + 'thumbnail': json_data.get('videoPosterSrc'), + 'formats': [{ + 'format_id': 'mobile', + 'url': json_data.get('videoSrc'), + 'ext': 'mp4', + }] + } diff --git a/yt_dlp/extractor/oktoberfesttv.py b/yt_dlp/extractor/oktoberfesttv.py index a914068f9..276567436 100644 --- a/yt_dlp/extractor/oktoberfesttv.py +++ b/yt_dlp/extractor/oktoberfesttv.py @@ -25,8 +25,8 @@ class OktoberfestTVIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._live_title(self._html_search_regex( - r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title')) + title = self._html_search_regex( + r'<h1><strong>.*?</strong>(.*?)</h1>', webpage, 'title') clip = self._search_regex( r"clip:\s*\{\s*url:\s*'([^']+)'", webpage, 'clip') diff --git a/yt_dlp/extractor/picarto.py b/yt_dlp/extractor/picarto.py index 17d08d69e..adf21fda8 100644 --- a/yt_dlp/extractor/picarto.py +++ b/yt_dlp/extractor/picarto.py @@ -77,7 +77,7 @@ class PicartoIE(InfoExtractor): return { 'id': channel_id, - 'title': self._live_title(title.strip()), + 'title': title.strip(), 'is_live': True, 'channel': channel_id, 'channel_id': metadata.get('id'), diff --git a/yt_dlp/extractor/playtvak.py b/yt_dlp/extractor/playtvak.py index 84e92dda4..30c8a599e 100644 --- a/yt_dlp/extractor/playtvak.py +++ b/yt_dlp/extractor/playtvak.py @@ -167,8 +167,6 @@ class PlaytvakIE(InfoExtractor): title = item['title'] is_live = item['type'] == 'stream' - if is_live: - title = self._live_title(title) description = self._og_search_description(webpage, default=None) or self._html_search_meta( 'description', webpage, 'description', default=None) timestamp = None diff --git a/yt_dlp/extractor/radiode.py b/yt_dlp/extractor/radiode.py index 2c06c8b1e..038287363 100644 --- a/yt_dlp/extractor/radiode.py +++ b/yt_dlp/extractor/radiode.py @@ -29,7 +29,7 @@ class RadioDeIE(InfoExtractor): webpage, 'broadcast') broadcast = self._parse_json(jscode, radio_id) - title = self._live_title(broadcast['name']) + title = broadcast['name'] description = broadcast.get('description') or broadcast.get('shortDescription') thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 4699fe17e..39e57decd 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -318,8 +318,7 @@ class RaiPlayIE(RaiBaseIE): info = { 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, 'display_id': video_id, - 'title': self._live_title(title) if relinker_info.get( - 'is_live') else title, + 'title': title, 'alt_title': strip_or_none(alt_title), 'description': media.get('description'), 'uploader': strip_or_none(media.get('channel')), diff --git a/yt_dlp/extractor/rtbf.py b/yt_dlp/extractor/rtbf.py index f9979d0a4..4b61fdb17 100644 --- a/yt_dlp/extractor/rtbf.py +++ b/yt_dlp/extractor/rtbf.py @@ -85,8 +85,6 @@ class RTBFIE(InfoExtractor): title = data['title'] is_live = data.get('isLive') - if is_live: - title = self._live_title(title) height_re = r'-(\d+)p\.' formats = [] diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 59832eeac..0654fb08b 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -160,7 +160,7 @@ class RTVEALaCartaIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'thumbnail': info.get('image'), 'subtitles': subtitles, @@ -230,7 +230,7 @@ class RTVELiveIE(RTVEALaCartaIE): return { 'id': video_id, - 'title': self._live_title(title), + 'title': title, 'formats': self._extract_png_formats(vidplayer_id), 'is_live': True, } diff --git a/yt_dlp/extractor/rutv.py b/yt_dlp/extractor/rutv.py index 7e0de994a..3de86b232 100644 --- a/yt_dlp/extractor/rutv.py +++ b/yt_dlp/extractor/rutv.py @@ -201,7 +201,7 @@ class RUTVIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': description, 'thumbnail': thumbnail, 'view_count': view_count, diff --git a/yt_dlp/extractor/showroomlive.py b/yt_dlp/extractor/showroomlive.py index efd9d561f..1aada69ac 100644 --- a/yt_dlp/extractor/showroomlive.py +++ b/yt_dlp/extractor/showroomlive.py @@ -73,7 +73,7 @@ class ShowRoomLiveIE(InfoExtractor): return { 'id': compat_str(room.get('live_id') or broadcaster_id), - 'title': self._live_title(title), + 'title': title, 'description': room.get('description'), 'timestamp': int_or_none(room.get('current_live_started_at')), 'uploader': uploader, diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py index 14a4d8d4c..496bb42a2 100644 --- a/yt_dlp/extractor/skyit.py +++ b/yt_dlp/extractor/skyit.py @@ -55,7 +55,7 @@ class SkyItPlayerIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')), 'description': video.get('short_desc') or None, diff --git a/yt_dlp/extractor/skylinewebcams.py b/yt_dlp/extractor/skylinewebcams.py index b7f8ac736..47bbb7632 100644 --- a/yt_dlp/extractor/skylinewebcams.py +++ b/yt_dlp/extractor/skylinewebcams.py @@ -36,7 +36,7 @@ class SkylineWebcamsIE(InfoExtractor): 'id': video_id, 'url': stream_url, 'ext': 'mp4', - 'title': self._live_title(title), + 'title': title, 'description': description, 'is_live': True, } diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py index efd0afc75..0d4a0ce4c 100644 --- a/yt_dlp/extractor/stripchat.py +++ b/yt_dlp/extractor/stripchat.py @@ -57,7 +57,7 @@ class StripchatIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(video_id), + 'title': video_id, 'description': self._og_search_description(webpage), 'is_live': True, 'formats': formats, diff --git a/yt_dlp/extractor/svt.py b/yt_dlp/extractor/svt.py index 489f197fe..6ad01a912 100644 --- a/yt_dlp/extractor/svt.py +++ b/yt_dlp/extractor/svt.py @@ -203,10 +203,6 @@ class SVTPlayIE(SVTPlayBaseIE): 'only_matching': True, }] - def _adjust_title(self, info): - if info['is_live']: - info['title'] = self._live_title(info['title']) - def _extract_by_video_id(self, video_id, webpage=None): data = self._download_json( 'https://api.svt.se/videoplayer-api/video/%s' % video_id, @@ -220,7 +216,6 @@ class SVTPlayIE(SVTPlayBaseIE): if not title: title = video_id info_dict['title'] = title - self._adjust_title(info_dict) return info_dict def _real_extract(self, url): @@ -251,7 +246,6 @@ class SVTPlayIE(SVTPlayBaseIE): 'title': data['context']['dispatcher']['stores']['MetaStore']['title'], 'thumbnail': thumbnail, }) - self._adjust_title(info_dict) return info_dict svt_id = try_get( diff --git a/yt_dlp/extractor/telebruxelles.py b/yt_dlp/extractor/telebruxelles.py index a0353fe3a..9e8c89bd6 100644 --- a/yt_dlp/extractor/telebruxelles.py +++ b/yt_dlp/extractor/telebruxelles.py @@ -69,7 +69,7 @@ class TeleBruxellesIE(InfoExtractor): return { 'id': article_id or display_id, 'display_id': display_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': description, 'formats': formats, 'is_live': is_live, diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py index e5c6a6de1..22b4fe7c8 100644 --- a/yt_dlp/extractor/threeqsdn.py +++ b/yt_dlp/extractor/threeqsdn.py @@ -148,7 +148,7 @@ class ThreeQSDNIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if live else title, + 'title': title, 'thumbnail': config.get('poster') or None, 'description': config.get('description') or None, 'timestamp': parse_iso8601(config.get('upload_date')), diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index a3079151a..2cd7ba02e 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -180,7 +180,7 @@ class TikTokBaseIE(InfoExtractor): user_url = self._UPLOADER_URL_FORMAT % (traverse_obj(author_info, 'sec_uid', 'id', 'uid', 'unique_id', expected_type=str_or_none, get_all=False)) - labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str) + labels = traverse_obj(aweme_detail, ('hybrid_label', ..., 'text'), expected_type=str, default=[]) contained_music_track = traverse_obj( music_info, ('matched_song', 'title'), ('matched_pgc_sound', 'title'), expected_type=str) diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index 127a5d2dc..580cb533b 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -62,7 +62,7 @@ class TrovoIE(TrovoBaseIE): raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] program_id = program_info['id'] - title = self._live_title(program_info['title']) + title = program_info['title'] formats = [] for stream_info in (program_info.get('streamInfo') or []): diff --git a/yt_dlp/extractor/tunein.py b/yt_dlp/extractor/tunein.py index c7a5f5a63..7e51de89e 100644 --- a/yt_dlp/extractor/tunein.py +++ b/yt_dlp/extractor/tunein.py @@ -62,7 +62,7 @@ class TuneInBaseIE(InfoExtractor): return { 'id': content_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'location': location, diff --git a/yt_dlp/extractor/turner.py b/yt_dlp/extractor/turner.py index 32125bc79..519dc323c 100644 --- a/yt_dlp/extractor/turner.py +++ b/yt_dlp/extractor/turner.py @@ -205,7 +205,7 @@ class TurnerBaseIE(AdobePassIE): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'subtitles': subtitles, 'thumbnails': thumbnails, diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index da351eeb0..b48dfe389 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -106,7 +106,7 @@ class TV2IE(InfoExtractor): return { 'id': video_id, 'url': video_url, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': strip_or_none(asset.get('description')), 'thumbnails': thumbnails, 'timestamp': parse_iso8601(asset.get('live_broadcast_time') or asset.get('update_time')), @@ -271,7 +271,7 @@ class KatsomoIE(InfoExtractor): return { 'id': video_id, 'url': video_url, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'description': strip_or_none(asset.get('description')), 'thumbnails': thumbnails, 'timestamp': parse_iso8601(asset.get('createTime')), diff --git a/yt_dlp/extractor/tvnet.py b/yt_dlp/extractor/tvnet.py index 4222ff9ee..4fe8dfb6c 100644 --- a/yt_dlp/extractor/tvnet.py +++ b/yt_dlp/extractor/tvnet.py @@ -130,9 +130,6 @@ class TVNetIE(InfoExtractor): r'data-image=(["\'])(?P<url>(?:https?:)?//.+?)\1', webpage, 'thumbnail', default=None, group='url')) - if is_live: - title = self._live_title(title) - view_count = int_or_none(self._search_regex( r'(?s)<div[^>]+\bclass=["\'].*?view-count[^>]+>.*?(\d+).*?</div>', webpage, 'view count', default=None)) diff --git a/yt_dlp/extractor/tvplayer.py b/yt_dlp/extractor/tvplayer.py index 8f8686a65..5970596b2 100644 --- a/yt_dlp/extractor/tvplayer.py +++ b/yt_dlp/extractor/tvplayer.py @@ -80,7 +80,7 @@ class TVPlayerIE(InfoExtractor): return { 'id': resource_id, 'display_id': display_id, - 'title': self._live_title(title), + 'title': title, 'formats': formats, 'is_live': True, } diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index cd97f0a24..54e500edd 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -903,7 +903,7 @@ class TwitchStreamIE(TwitchBaseIE): return { 'id': stream_id, 'display_id': channel_name, - 'title': self._live_title(title), + 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, diff --git a/yt_dlp/extractor/vgtv.py b/yt_dlp/extractor/vgtv.py index b6131ff82..10083cd24 100644 --- a/yt_dlp/extractor/vgtv.py +++ b/yt_dlp/extractor/vgtv.py @@ -242,7 +242,7 @@ class VGTVIE(XstreamIE): info.update({ 'id': video_id, - 'title': self._live_title(data['title']) if is_live else data['title'], + 'title': data['title'], 'description': data['description'], 'thumbnail': data['images']['main'] + '?t[]=900x506q80', 'timestamp': data['published'], diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 8c3b6af65..57391d766 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -224,7 +224,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): return { 'id': str_or_none(video_data.get('id')) or video_id, - 'title': self._live_title(video_title) if is_live else video_title, + 'title': video_title, 'uploader': owner.get('name'), 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None, 'uploader_url': video_uploader_url, diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index 9a5c9ee6b..5cdb1542d 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -434,8 +434,6 @@ class VKIE(VKBaseIE): # 2 = live # 3 = post live (finished live) is_live = data.get('live') == 2 - if is_live: - title = self._live_title(title) timestamp = unified_timestamp(self._html_search_regex( r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page, diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py index 8fccf1b63..547bdd323 100644 --- a/yt_dlp/extractor/vlive.py +++ b/yt_dlp/extractor/vlive.py @@ -210,7 +210,7 @@ class VLiveIE(VLiveBaseIE): self._sort_formats(formats) info = get_common_fields() info.update({ - 'title': self._live_title(video['title']), + 'title': video['title'], 'id': video_id, 'formats': formats, 'is_live': True, diff --git a/yt_dlp/extractor/vyborymos.py b/yt_dlp/extractor/vyborymos.py index 9e703c4b6..4d93666c5 100644 --- a/yt_dlp/extractor/vyborymos.py +++ b/yt_dlp/extractor/vyborymos.py @@ -44,11 +44,11 @@ class VyboryMosIE(InfoExtractor): info = self._download_json( 'http://vybory.mos.ru/json/voting_stations/%s/%s.json' % (compat_str(station_id)[:3], station_id), - station_id, 'Downloading station JSON', fatal=False) + station_id, 'Downloading station JSON', fatal=False) or {} return { 'id': station_id, - 'title': self._live_title(info['name'] if info else station_id), + 'title': info.get('name') or station_id, 'description': info.get('address'), 'is_live': True, 'formats': formats, diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index d3229d8af..45bfe5f3a 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -117,7 +117,7 @@ class WDRIE(InfoExtractor): return { 'id': tracker_data.get('trackerClipId', video_id), - 'title': self._live_title(title) if is_live else title, + 'title': title, 'alt_title': tracker_data.get('trackerClipSubcategory'), 'formats': formats, 'subtitles': subtitles, diff --git a/yt_dlp/extractor/yahoo.py b/yt_dlp/extractor/yahoo.py index 53556de00..313e596f5 100644 --- a/yt_dlp/extractor/yahoo.py +++ b/yt_dlp/extractor/yahoo.py @@ -264,7 +264,7 @@ class YahooIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'thumbnails': thumbnails, 'description': clean_html(video.get('description')), diff --git a/yt_dlp/extractor/younow.py b/yt_dlp/extractor/younow.py index 04dbc87fc..128faa30d 100644 --- a/yt_dlp/extractor/younow.py +++ b/yt_dlp/extractor/younow.py @@ -58,7 +58,7 @@ class YouNowLiveIE(InfoExtractor): return { 'id': uploader, 'is_live': True, - 'title': self._live_title(uploader), + 'title': uploader, 'thumbnail': data.get('awsUrl'), 'tags': data.get('tags'), 'categories': data.get('tags'), diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 566edb38f..8f64b6657 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals -import base64 import calendar import copy import datetime @@ -13,6 +12,7 @@ import math import os.path import random import re +import sys import time import traceback @@ -30,7 +30,6 @@ from ..compat import ( from ..jsinterp import JSInterpreter from ..utils import ( bug_reports_message, - bytes_to_intlist, clean_html, datetime_from_str, dict_get, @@ -39,7 +38,6 @@ from ..utils import ( float_or_none, format_field, int_or_none, - intlist_to_bytes, is_html, join_nonempty, mimetype2ext, @@ -932,16 +930,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag', + 'channel': 'Philipp Hagemeister', 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q', 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q', 'upload_date': '20121002', - 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', + 'description': 'md5:8fb536f4877b8a7455c2ec23794dbc22', 'categories': ['Science & Technology'], 'tags': ['youtube-dl'], 'duration': 10, 'view_count': int, 'like_count': int, - 'dislike_count': int, + # 'dislike_count': int, + 'availability': 'public', + 'playable_in_embed': True, + 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg', + 'live_status': 'not_live', + 'age_limit': 0, 'start_time': 1, 'end_time': 9, } @@ -2111,20 +2115,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'parent': parent or 'root' } - def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, comment_counts=None): + def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None): + + get_single_config_arg = lambda c: self._configuration_arg(c, [''])[0] def extract_header(contents): _continuation = None for content in contents: - comments_header_renderer = try_get(content, lambda x: x['commentsHeaderRenderer']) + comments_header_renderer = traverse_obj(content, 'commentsHeaderRenderer') expected_comment_count = parse_count(self._get_text( comments_header_renderer, 'countText', 'commentsCount', max_runs=1)) if expected_comment_count: - comment_counts[1] = expected_comment_count - self.to_screen('Downloading ~%d comments' % expected_comment_count) - sort_mode_str = self._configuration_arg('comment_sort', [''])[0] - comment_sort_index = int(sort_mode_str != 'top') # 1 = new, 0 = top + tracker['est_total'] = expected_comment_count + self.to_screen(f'Downloading ~{expected_comment_count} comments') + comment_sort_index = int(get_single_config_arg('comment_sort') != 'top') # 1 = new, 0 = top sort_menu_item = try_get( comments_header_renderer, @@ -2135,76 +2140,84 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not _continuation: continue - sort_text = sort_menu_item.get('title') - if isinstance(sort_text, compat_str): - sort_text = sort_text.lower() - else: + sort_text = str_or_none(sort_menu_item.get('title')) + if not sort_text: sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' - self.to_screen('Sorting comments by %s' % sort_text) + self.to_screen('Sorting comments by %s' % sort_text.lower()) break return _continuation def extract_thread(contents): if not parent: - comment_counts[2] = 0 + tracker['current_page_thread'] = 0 for content in contents: + if not parent and tracker['total_parent_comments'] >= max_parents: + yield comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer']) - comment_renderer = try_get( - comment_thread_renderer, (lambda x: x['comment']['commentRenderer'], dict)) or try_get( - content, (lambda x: x['commentRenderer'], dict)) + comment_renderer = get_first( + (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], + expected_type=dict, default={}) - if not comment_renderer: - continue comment = self._extract_comment(comment_renderer, parent) if not comment: continue - comment_counts[0] += 1 + + tracker['running_total'] += 1 + tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1 yield comment + # Attempt to get the replies comment_replies_renderer = try_get( comment_thread_renderer, lambda x: x['replies']['commentRepliesRenderer'], dict) if comment_replies_renderer: - comment_counts[2] += 1 + tracker['current_page_thread'] += 1 comment_entries_iter = self._comment_entries( comment_replies_renderer, ytcfg, video_id, - parent=comment.get('id'), comment_counts=comment_counts) - - for reply_comment in comment_entries_iter: + parent=comment.get('id'), tracker=tracker) + for reply_comment in itertools.islice(comment_entries_iter, min(max_replies_per_thread, max(0, max_replies - tracker['total_reply_comments']))): yield reply_comment + # Keeps track of counts across recursive calls + if not tracker: + tracker = dict( + running_total=0, + est_total=0, + current_page_thread=0, + total_parent_comments=0, + total_reply_comments=0) + + # TODO: Deprecated # YouTube comments have a max depth of 2 - max_depth = int_or_none(self._configuration_arg('max_comment_depth', [''])[0]) or float('inf') + max_depth = int_or_none(get_single_config_arg('max_comment_depth')) + if max_depth: + self._downloader.deprecation_warning( + '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.') if max_depth == 1 and parent: return - if not comment_counts: - # comment so far, est. total comments, current comment thread # - comment_counts = [0, 0, 0] - continuation = self._extract_continuation(root_continuation_data) - if continuation and len(continuation['continuation']) < 27: - self.write_debug('Detected old API continuation token. Generating new API compatible token.') - continuation_token = self._generate_comment_continuation(video_id) - continuation = self._build_api_continuation_query(continuation_token, None) + max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map( + lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) + continuation = self._extract_continuation(root_continuation_data) message = self._get_text(root_continuation_data, ('contents', ..., 'messageRenderer', 'text'), max_runs=1) if message and not parent: self.report_warning(message, video_id=video_id) - visitor_data = None + response = None is_first_continuation = parent is None for page_num in itertools.count(0): if not continuation: break - headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) - comment_prog_str = '(%d/%d)' % (comment_counts[0], comment_counts[1]) + headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response)) + comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})" if page_num == 0: if is_first_continuation: note_prefix = 'Downloading comment section API JSON' else: note_prefix = ' Downloading comment API JSON reply thread %d %s' % ( - comment_counts[2], comment_prog_str) + tracker['current_page_thread'], comment_prog_str) else: note_prefix = '%sDownloading comment%s API JSON page %d %s' % ( ' ' if parent else '', ' replies' if parent else '', @@ -2213,83 +2226,32 @@ class YoutubeIE(YoutubeBaseInfoExtractor): response = self._extract_response( item_id=None, query=continuation, ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix, - check_get_keys=('onResponseReceivedEndpoints', 'continuationContents')) - if not response: - break - visitor_data = try_get( - response, - lambda x: x['responseContext']['webResponseContextExtensionData']['ytConfigData']['visitorData'], - compat_str) or visitor_data + check_get_keys='onResponseReceivedEndpoints') - continuation_contents = dict_get(response, ('onResponseReceivedEndpoints', 'continuationContents')) + continuation_contents = traverse_obj( + response, 'onResponseReceivedEndpoints', expected_type=list, default=[]) continuation = None - if isinstance(continuation_contents, list): - for continuation_section in continuation_contents: - if not isinstance(continuation_section, dict): - continue - continuation_items = try_get( - continuation_section, - (lambda x: x['reloadContinuationItemsCommand']['continuationItems'], - lambda x: x['appendContinuationItemsAction']['continuationItems']), - list) or [] - if is_first_continuation: - continuation = extract_header(continuation_items) - is_first_continuation = False - if continuation: - break - continue - count = 0 - for count, entry in enumerate(extract_thread(continuation_items)): - yield entry - continuation = self._extract_continuation({'contents': continuation_items}) + for continuation_section in continuation_contents: + continuation_items = traverse_obj( + continuation_section, + (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'), + get_all=False, expected_type=list) or [] + if is_first_continuation: + continuation = extract_header(continuation_items) + is_first_continuation = False if continuation: - # Sometimes YouTube provides a continuation without any comments - # In most cases we end up just downloading these with very little comments to come. - if count == 0: - if not parent: - self.report_warning('No comments received - assuming end of comments') - continuation = None break + continue - # Deprecated response structure - elif isinstance(continuation_contents, dict): - known_continuation_renderers = ('itemSectionContinuation', 'commentRepliesContinuation') - for key, continuation_renderer in continuation_contents.items(): - if key not in known_continuation_renderers: - continue - if not isinstance(continuation_renderer, dict): - continue - if is_first_continuation: - header_continuation_items = [continuation_renderer.get('header') or {}] - continuation = extract_header(header_continuation_items) - is_first_continuation = False - if continuation: - break - - # Sometimes YouTube provides a continuation without any comments - # In most cases we end up just downloading these with very little comments to come. - count = 0 - for count, entry in enumerate(extract_thread(continuation_renderer.get('contents') or {})): - yield entry - continuation = self._extract_continuation(continuation_renderer) - if count == 0: - if not parent: - self.report_warning('No comments received - assuming end of comments') - continuation = None + for entry in extract_thread(continuation_items): + if not entry: + return + yield entry + continuation = self._extract_continuation({'contents': continuation_items}) + if continuation: break - @staticmethod - def _generate_comment_continuation(video_id): - """ - Generates initial comment section continuation token from given video id - """ - b64_vid_id = base64.b64encode(bytes(video_id.encode('utf-8'))) - parts = ('Eg0SCw==', b64_vid_id, 'GAYyJyIRIgs=', b64_vid_id, 'MAB4AjAAQhBjb21tZW50cy1zZWN0aW9u') - new_continuation_intlist = list(itertools.chain.from_iterable( - [bytes_to_intlist(base64.b64decode(part)) for part in parts])) - return base64.b64encode(intlist_to_bytes(new_continuation_intlist)).decode('utf-8') - def _get_comments(self, ytcfg, video_id, contents, webpage): """Entry for comment extraction""" def _real_comment_extract(contents): @@ -2848,7 +2810,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): info = { 'id': video_id, - 'title': self._live_title(video_title) if is_live else video_title, + 'title': video_title, 'formats': formats, 'thumbnails': thumbnails, # The best thumbnail that we are sure exists. Prevents unnecessary diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index 98d15604d..9435920b2 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -187,7 +187,7 @@ class ZattooPlatformBaseIE(InfoExtractor): cid = self._extract_cid(video_id, channel_name) info_dict = { 'id': channel_name, - 'title': self._live_title(channel_name), + 'title': channel_name, 'is_live': True, } else: |