aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/bilibili.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/bilibili.py')
-rw-r--r--yt_dlp/extractor/bilibili.py86
1 files changed, 73 insertions, 13 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 91d436dd8..faa2218ce 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -134,7 +134,7 @@ class BilibiliBaseIE(InfoExtractor):
class BiliBiliIE(BilibiliBaseIE):
- _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
+ _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.bilibili.com/video/BV13x41117TL',
@@ -282,19 +282,60 @@ class BiliBiliIE(BilibiliBaseIE):
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
},
'params': {'skip_download': True},
+ }, {
+ 'note': 'video redirects to festival page',
+ 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
+ 'info_dict': {
+ 'id': 'BV1wP4y1P72h',
+ 'ext': 'mp4',
+ 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
+ 'timestamp': 1643947497,
+ 'upload_date': '20220204',
+ 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
+ 'uploader': '叨叨冯聊音乐',
+ 'duration': 246.719,
+ 'uploader_id': '528182630',
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ 'params': {'skip_download': True},
+ }, {
+ 'note': 'newer festival video',
+ 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
+ 'info_dict': {
+ 'id': 'BV1ay4y1d77f',
+ 'ext': 'mp4',
+ 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
+ 'timestamp': 1674273600,
+ 'upload_date': '20230121',
+ 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
+ 'uploader': '果蝇轰',
+ 'duration': 1111.722,
+ 'uploader_id': '8469526',
+ 'view_count': int,
+ 'like_count': int,
+ 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+ },
+ 'params': {'skip_download': True},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
- play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
- video_data = initial_state['videoData']
+ is_festival = 'videoData' not in initial_state
+ if is_festival:
+ video_data = initial_state['videoInfo']
+ else:
+ play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
+ video_data = initial_state['videoData']
+
video_id, title = video_data['bvid'], video_data.get('title')
# Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
- page_list_json = traverse_obj(
+ page_list_json = not is_festival and traverse_obj(
self._download_json(
'https://api.bilibili.com/x/player/pagelist', video_id,
fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
@@ -317,20 +358,39 @@ class BiliBiliIE(BilibiliBaseIE):
cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
+ festival_info = {}
+ if is_festival:
+ play_info = self._download_json(
+ 'https://api.bilibili.com/x/player/playurl', video_id,
+ query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
+ note='Extracting festival video formats')['data']
+
+ festival_info = traverse_obj(initial_state, {
+ 'uploader': ('videoInfo', 'upName'),
+ 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
+ 'like_count': ('videoStatus', 'like', {int_or_none}),
+ 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
+ }, get_all=False)
+
return {
+ **traverse_obj(initial_state, {
+ 'uploader': ('upData', 'name'),
+ 'uploader_id': ('upData', 'mid', {str_or_none}),
+ 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
+ 'tags': ('tags', ..., 'tag_name'),
+ 'thumbnail': ('videoData', 'pic', {url_or_none}),
+ }),
+ **festival_info,
+ **traverse_obj(video_data, {
+ 'description': 'desc',
+ 'timestamp': ('pubdate', {int_or_none}),
+ 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
+ 'comment_count': ('stat', 'reply', {int_or_none}),
+ }, get_all=False),
'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
'formats': self.extract_formats(play_info),
'_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
'title': title,
- 'description': traverse_obj(initial_state, ('videoData', 'desc')),
- 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
- 'uploader': traverse_obj(initial_state, ('upData', 'name')),
- 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
- 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
- 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
- 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
- 'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
- 'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
'duration': float_or_none(play_info.get('timelength'), scale=1000),
'chapters': self._get_chapters(aid, cid),
'subtitles': self.extract_subtitles(video_id, aid, cid),