diff options
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r-- | yt_dlp/extractor/bilibili.py | 6 | ||||
-rw-r--r-- | yt_dlp/extractor/canvas.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/dropout.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/facebook.py | 6 | ||||
-rw-r--r-- | yt_dlp/extractor/generic.py | 6 | ||||
-rw-r--r-- | yt_dlp/extractor/limelight.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/niconico.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/tiktok.py | 11 | ||||
-rw-r--r-- | yt_dlp/extractor/yandexvideo.py | 1 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 15 |
11 files changed, 37 insertions, 32 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index dd1ff512e..3212f3328 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -926,9 +926,9 @@ class BiliIntlIE(BiliIntlBaseIE): if season_id and not video_data: # Non-Bstation layout, read through episode list season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id) - video_data = next( - episode for episode in traverse_obj(season_json, ('sections', ..., 'episodes', ...), expected_type=dict) - if str(episode.get('episode_id')) == ep_id) + video_data = traverse_obj(season_json, + ('sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == ep_id), + expected_type=dict, get_all=False) return self._extract_video_info(video_data, ep_id=ep_id, aid=aid) diff --git a/yt_dlp/extractor/canvas.py b/yt_dlp/extractor/canvas.py index 31e7d7de6..8b9903774 100644 --- a/yt_dlp/extractor/canvas.py +++ b/yt_dlp/extractor/canvas.py @@ -245,10 +245,6 @@ class VrtNUIE(GigyaBaseIE): 'upload_date': '20200727', }, 'skip': 'This video is only available for registered users', - 'params': { - 'username': '<snip>', - 'password': '<snip>', - }, 'expected_warnings': ['is not a supported codec'], }, { # Only available via new API endpoint @@ -264,10 +260,6 @@ class VrtNUIE(GigyaBaseIE): 'episode_number': 5, }, 'skip': 'This video is only available for registered users', - 'params': { - 'username': '<snip>', - 'password': '<snip>', - }, 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'], }] _NETRC_MACHINE = 'vrtnu' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 81688eb54..e2605c1f4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -139,6 +139,8 @@ class InfoExtractor(object): for HDS - URL of the F4M manifest, for DASH - URL of the MPD manifest, for MSS - URL of the ISM manifest. + * manifest_stream_number (For internal use only) + The index of the stream in the manifest file * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -215,7 +217,7 @@ class InfoExtractor(object): (HTTP or RTMP) download. Boolean. * has_drm The format has DRM and cannot be downloaded. Boolean * downloader_options A dictionary of downloader options as - described in FileDownloader + described in FileDownloader (For internal use only) RTMP formats can also have the additional fields: page_url, app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn, rtmp_protocol, rtmp_real_time @@ -3684,9 +3686,9 @@ class InfoExtractor(object): def _merge_subtitle_items(subtitle_list1, subtitle_list2): """ Merge subtitle items for one language. Items with duplicated URLs/data will be dropped. """ - list1_data = set([item.get('url') or item['data'] for item in subtitle_list1]) + list1_data = set((item.get('url'), item.get('data')) for item in subtitle_list1) ret = list(subtitle_list1) - ret.extend([item for item in subtitle_list2 if (item.get('url') or item['data']) not in list1_data]) + ret.extend(item for item in subtitle_list2 if (item.get('url'), item.get('data')) not in list1_data) return ret @classmethod diff --git a/yt_dlp/extractor/dropout.py b/yt_dlp/extractor/dropout.py index a7442d8f0..2fa61950c 100644 --- a/yt_dlp/extractor/dropout.py +++ b/yt_dlp/extractor/dropout.py @@ -123,7 +123,7 @@ class DropoutIE(InfoExtractor): self._login(display_id) webpage = self._download_webpage(url, display_id, note='Downloading video webpage') finally: - self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out') + self._download_webpage('https://www.dropout.tv/logout', display_id, note='Logging out', fatal=False) embed_url = self._search_regex(r'embed_url:\s*["\'](.+?)["\']', webpage, 'embed url') thumbnail = self._og_search_thumbnail(webpage) @@ -139,7 +139,7 @@ class DropoutIE(InfoExtractor): '_type': 'url_transparent', 'ie_key': VHXEmbedIE.ie_key(), 'url': embed_url, - 'id': self._search_regex(r'embed.vhx.tv/videos/(.+?)\?', embed_url, 'id'), + 'id': self._search_regex(r'embed\.vhx\.tv/videos/(.+?)\?', embed_url, 'id'), 'display_id': display_id, 'title': title, 'description': self._html_search_meta('description', webpage, fatal=False), diff --git a/yt_dlp/extractor/facebook.py b/yt_dlp/extractor/facebook.py index 2deed585f..5e0e2facf 100644 --- a/yt_dlp/extractor/facebook.py +++ b/yt_dlp/extractor/facebook.py @@ -397,8 +397,10 @@ class FacebookIE(InfoExtractor): r'handleWithCustomApplyEach\(\s*ScheduledApplyEach\s*,\s*(\{.+?\})\s*\);', webpage)] post = traverse_obj(post_data, ( ..., 'require', ..., ..., ..., '__bbox', 'result', 'data'), expected_type=dict) or [] - media = [m for m in traverse_obj(post, (..., 'attachments', ..., 'media'), expected_type=dict) or [] - if str(m.get('id')) == video_id and m.get('__typename') == 'Video'] + media = traverse_obj( + post, + (..., 'attachments', ..., 'media', lambda _, m: str(m['id']) == video_id and m['__typename'] == 'Video'), + expected_type=dict) title = get_first(media, ('title', 'text')) description = get_first(media, ('creation_story', 'comet_sections', 'message', 'story', 'message', 'text')) uploader_data = get_first(media, 'owner') or get_first(post, ('node', 'actors', ...)) or {} diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2c503e581..bd56ad289 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2523,7 +2523,7 @@ class GenericIE(InfoExtractor): 'title': 'Riku ja Tunna lähtevät peurajahtiin tv:stä tutun biologin kanssa – metsästysreissu huipentuu kasvissyöjän painajaiseen!', 'thumbnail': r're:^https?://.+\.jpg$', 'duration': 108, - 'series' : 'Madventures Suomi', + 'series': 'Madventures Suomi', 'description': 'md5:aa55b44bd06a1e337a6f1d0b46507381', 'categories': ['Matkailu', 'Elämäntyyli'], 'age_limit': 0, @@ -3886,8 +3886,8 @@ class GenericIE(InfoExtractor): if RtmpIE.suitable(vurl): return True vpath = compat_urlparse.urlparse(vurl).path - vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml') + vext = determine_ext(vpath, None) + return vext not in (None, 'swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml') def filter_video(urls): return list(filter(check_video, urls)) diff --git a/yt_dlp/extractor/limelight.py b/yt_dlp/extractor/limelight.py index 369141d67..b20681ad1 100644 --- a/yt_dlp/extractor/limelight.py +++ b/yt_dlp/extractor/limelight.py @@ -194,7 +194,7 @@ class LimelightBaseIE(InfoExtractor): cc_url = cc.get('webvttFileUrl') if not cc_url: continue - lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en') + lang = cc.get('languageCode') or self._search_regex(r'/([a-z]{2})\.vtt', cc_url, 'lang', default='en') subtitles.setdefault(lang, []).append({ 'url': cc_url, }) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index a5a1a01e0..4eb6ed070 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -469,7 +469,7 @@ class NiconicoIE(InfoExtractor): comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey')) user_id_str = session_api_data.get('serviceUserId') - thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']] + thread_ids = traverse_obj(api_data, ('comment', 'threads', lambda _, v: v['isActive'])) raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key) if not raw_danmaku: self.report_warning(f'Failed to get comments. {bug_reports_message()}') diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 6f8c32882..c1d6c5477 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -264,7 +264,7 @@ class TikTokBaseIE(InfoExtractor): return { 'id': aweme_id, 'title': aweme_detail.get('desc'), - 'description': aweme_detail['desc'], + 'description': aweme_detail.get('desc'), 'view_count': int_or_none(stats_info.get('play_count')), 'like_count': int_or_none(stats_info.get('digg_count')), 'repost_count': int_or_none(stats_info.get('share_count')), @@ -387,6 +387,9 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'artist': 'Ysrbeats', + 'album': 'Lehanga', + 'track': 'Lehanga', } }, { 'url': 'https://www.tiktok.com/@patroxofficial/video/6742501081818877190?langCountry=en', @@ -410,6 +413,8 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, + 'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson', + 'track': 'Big Fun', } }, { # Banned audio, only available on the app @@ -463,7 +468,7 @@ class TikTokIE(TikTokBaseIE): 'info_dict': { 'id': '7059698374567611694', 'ext': 'mp4', - 'title': 'N/A', + 'title': 'tiktok video #7059698374567611694', 'description': '', 'uploader': 'pokemonlife22', 'creator': 'Pokemon', @@ -480,7 +485,7 @@ class TikTokIE(TikTokBaseIE): 'repost_count': int, 'comment_count': int, }, - 'expected_warnings': ['Video not available'] + 'expected_warnings': ['Video not available', 'Creating a generic title'] }, { # Auto-captions available 'url': 'https://www.tiktok.com/@hankgreen1/video/7047596209028074758', diff --git a/yt_dlp/extractor/yandexvideo.py b/yt_dlp/extractor/yandexvideo.py index a101af67e..7d3966bf1 100644 --- a/yt_dlp/extractor/yandexvideo.py +++ b/yt_dlp/extractor/yandexvideo.py @@ -163,7 +163,6 @@ class YandexVideoPreviewIE(InfoExtractor): 'thumbnail': 'https://i.mycdn.me/videoPreview?id=544866765315&type=37&idx=13&tkn=TY5qjLYZHxpmcnK8U2LgzYkgmaU&fn=external_8', 'uploader_id': '481054701571', 'title': 'LOFT - summer, summer, summer HD', - 'manifest_stream_number': 0, 'uploader': 'АРТЁМ КУДРОВ', }, }, { # youtube diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 485849ba9..017554c88 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -837,17 +837,20 @@ class YoutubeBaseInfoExtractor(InfoExtractor): uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( - renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) + renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), + expected_type=str, get_all=False) timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText') scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) overlay_style = traverse_obj( - renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) + renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), + get_all=False, expected_type=str) badges = self._extract_badges(renderer) thumbnails = self._extract_thumbnails(renderer, 'thumbnail') navigation_url = urljoin('https://www.youtube.com/', traverse_obj( - renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), expected_type=str)) + renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), + expected_type=str)) or '' url = f'https://www.youtube.com/watch?v={video_id}' - if overlay_style == 'SHORTS' or (navigation_url and '/shorts/' in navigation_url): + if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: url = f'https://www.youtube.com/shorts/{video_id}' return { @@ -862,7 +865,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'uploader': uploader, 'channel_id': channel_id, 'thumbnails': thumbnails, - 'upload_date': strftime_or_none(timestamp, '%Y%m%d') if self._configuration_arg('approximate_date', ie_key='youtubetab') else None, + 'upload_date': (strftime_or_none(timestamp, '%Y%m%d') + if self._configuration_arg('approximate_date', ie_key='youtubetab') + else None), 'live_status': ('is_upcoming' if scheduled_timestamp is not None else 'was_live' if 'streamed' in time_text.lower() else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges |