diff options
author | Locke <hamannsun@gmail.com> | 2022-09-16 23:59:02 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-16 21:29:02 +0530 |
commit | 2b9d02167fdf2fbe5bd8306144ab45027da263c1 (patch) | |
tree | a2f01329beaef68390174e7a4d27ebc860954e0d | |
parent | 2314b4d89fc111ddfcb25937210f1f1c2390cc4a (diff) | |
download | hypervideo-pre-2b9d02167fdf2fbe5bd8306144ab45027da263c1.tar.lz hypervideo-pre-2b9d02167fdf2fbe5bd8306144ab45027da263c1.tar.xz hypervideo-pre-2b9d02167fdf2fbe5bd8306144ab45027da263c1.zip |
[extractor/bilibili] Add space.bilibili extractors (#4468)
Authored by: lockmatrix
-rw-r--r-- | yt_dlp/extractor/_extractors.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/bilibili.py | 144 |
2 files changed, 119 insertions, 29 deletions
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index aedf063f6..6bf769a9e 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -190,7 +190,9 @@ from .bilibili import ( BilibiliAudioIE, BilibiliAudioAlbumIE, BiliBiliPlayerIE, - BilibiliChannelIE, + BilibiliSpaceVideoIE, + BilibiliSpaceAudioIE, + BilibiliSpacePlaylistIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 2c29bf3ce..2e03aee85 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -2,8 +2,8 @@ import base64 import hashlib import itertools import functools -import re import math +import re from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( @@ -13,23 +13,24 @@ from ..compat import ( ) from ..utils import ( ExtractorError, + InAdvancePagedList, + OnDemandPagedList, filter_dict, - int_or_none, float_or_none, + int_or_none, mimetype2ext, + parse_count, parse_iso8601, qualities, - traverse_obj, - parse_count, smuggle_url, srt_subtitles_timecode, str_or_none, strip_jsonp, + traverse_obj, unified_timestamp, unsmuggle_url, urlencode_postdata, url_or_none, - OnDemandPagedList ) @@ -505,39 +506,126 @@ class BiliBiliBangumiIE(InfoExtractor): season_info.get('bangumi_title'), season_info.get('evaluate')) -class BilibiliChannelIE(InfoExtractor): - _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)' - _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp" +class BilibiliSpaceBaseIE(InfoExtractor): + def _extract_playlist(self, fetch_page, get_metadata, get_entries): + first_page = fetch_page(1) + metadata = get_metadata(first_page) + + paged_list = InAdvancePagedList( + lambda idx: get_entries(fetch_page(idx) if idx > 1 else first_page), + metadata['page_count'], metadata['page_size']) + + return metadata, paged_list + + +class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE): + _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)' _TESTS = [{ 'url': 'https://space.bilibili.com/3985676/video', - 'info_dict': {}, - 'playlist_mincount': 112, + 'info_dict': { + 'id': '3985676', + }, + 'playlist_mincount': 178, }] - def _entries(self, list_id): - count, max_count = 0, None + def _real_extract(self, url): + playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video') + if not is_video_url: + self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. ' + 'To download audios, add a "/audio" to the URL') + + def fetch_page(page_idx): + return self._download_json( + 'https://api.bilibili.com/x/space/arc/search', playlist_id, + note=f'Downloading page {page_idx}', + query={'mid': playlist_id, 'pn': page_idx, 'jsonp': 'jsonp'})['data'] + + def get_metadata(page_data): + page_size = page_data['page']['ps'] + entry_count = page_data['page']['count'] + return { + 'page_count': math.ceil(entry_count / page_size), + 'page_size': page_size, + } - for page_num in itertools.count(1): - data = self._download_json( - self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data'] + def get_entries(page_data): + for entry in traverse_obj(page_data, ('list', 'vlist')) or []: + yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid']) - max_count = max_count or traverse_obj(data, ('page', 'count')) + metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries) + return self.playlist_result(paged_list, playlist_id) - entries = traverse_obj(data, ('list', 'vlist')) - if not entries: - return - for entry in entries: - yield self.url_result( - 'https://www.bilibili.com/video/%s' % entry['bvid'], - BiliBiliIE.ie_key(), entry['bvid']) - count += len(entries) - if max_count and count >= max_count: - return +class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE): + _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio' + _TESTS = [{ + 'url': 'https://space.bilibili.com/3985676/audio', + 'info_dict': { + 'id': '3985676', + }, + 'playlist_mincount': 1, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + def fetch_page(page_idx): + return self._download_json( + 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id, + note=f'Downloading page {page_idx}', + query={'uid': playlist_id, 'pn': page_idx, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data'] + + def get_metadata(page_data): + return { + 'page_count': page_data['pageCount'], + 'page_size': page_data['pageSize'], + } + + def get_entries(page_data): + for entry in page_data.get('data', []): + yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id']) + + metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries) + return self.playlist_result(paged_list, playlist_id) + + +class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE): + _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)' + _TESTS = [{ + 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445', + 'info_dict': { + 'id': '2142762_57445', + 'title': '《底特律 变人》' + }, + 'playlist_mincount': 31, + }] def _real_extract(self, url): - list_id = self._match_id(url) - return self.playlist_result(self._entries(list_id), list_id) + mid, sid = self._match_valid_url(url).group('mid', 'sid') + playlist_id = f'{mid}_{sid}' + + def fetch_page(page_idx): + return self._download_json( + 'https://api.bilibili.com/x/polymer/space/seasons_archives_list', + playlist_id, note=f'Downloading page {page_idx}', + query={'mid': mid, 'season_id': sid, 'page_num': page_idx, 'page_size': 30})['data'] + + def get_metadata(page_data): + page_size = page_data['page']['page_size'] + entry_count = page_data['page']['total'] + return { + 'page_count': math.ceil(entry_count / page_size), + 'page_size': page_size, + 'title': traverse_obj(page_data, ('meta', 'name')) + } + + def get_entries(page_data): + for entry in page_data.get('archives', []): + yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', + BiliBiliIE, entry['bvid']) + + metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries) + return self.playlist_result(paged_list, playlist_id, metadata['title']) class BilibiliCategoryIE(InfoExtractor): |