aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/bilibili.py
diff options
context:
space:
mode:
authorAshish Gupta <39122144+Ashish0804@users.noreply.github.com>2021-09-11 18:59:48 +0530
committerGitHub <noreply@github.com>2021-09-11 18:59:48 +0530
commit16f7e6be3a9d38352c630544b91c1e86b8cf2332 (patch)
treeab21c1fc5233dc77430668c89d6b34af77e6257e /yt_dlp/extractor/bilibili.py
parentffecd3034b00671dc9438ff70474dcc57220e558 (diff)
downloadhypervideo-pre-16f7e6be3a9d38352c630544b91c1e86b8cf2332.tar.lz
hypervideo-pre-16f7e6be3a9d38352c630544b91c1e86b8cf2332.tar.xz
hypervideo-pre-16f7e6be3a9d38352c630544b91c1e86b8cf2332.zip
[bilibili]Add BiliIntlIE and BiliIntlSeriesIE (#907)
Closes #611 Authored by: Ashish0804
Diffstat (limited to 'yt_dlp/extractor/bilibili.py')
-rw-r--r--yt_dlp/extractor/bilibili.py140
1 files changed, 140 insertions, 0 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 8aab6a01b..0a81452c3 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -23,6 +23,7 @@ from ..utils import (
try_get,
smuggle_url,
str_or_none,
+ str_to_int,
strip_jsonp,
unified_timestamp,
unsmuggle_url,
@@ -774,3 +775,142 @@ class BiliBiliPlayerIE(InfoExtractor):
return self.url_result(
'http://www.bilibili.tv/video/av%s/' % video_id,
ie=BiliBiliIE.ie_key(), video_id=video_id)
+
+
+class BiliIntlBaseIE(InfoExtractor):
+ _API_URL = 'https://api.bili{}/intl/gateway{}'
+
+ def _call_api(self, type, endpoint, id):
+ return self._download_json(self._API_URL.format(type, endpoint), id)['data']
+
+ def _get_subtitles(self, type, ep_id):
+ sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id)
+ subtitles = {}
+ for sub in sub_json.get('subtitles', []):
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles.setdefault(sub.get('key', 'en'), []).append({
+ 'url': sub_url,
+ })
+ return subtitles
+
+ def _get_formats(self, type, ep_id):
+ video_json = self._call_api(type, f'/web/playurl?ep_id={ep_id}&platform=web', ep_id)
+ if not video_json:
+ self.raise_login_required(method='cookies')
+ video_json = video_json['playurl']
+ formats = []
+ for vid in video_json.get('video', []):
+ video_res = vid.get('video_resource') or {}
+ video_info = vid.get('stream_info') or {}
+ if not video_res.get('url'):
+ continue
+ formats.append({
+ 'url': video_res['url'],
+ 'ext': 'mp4',
+ 'format_note': video_info.get('desc_words'),
+ 'width': video_res.get('width'),
+ 'height': video_res.get('height'),
+ 'vbr': video_res.get('bandwidth'),
+ 'acodec': 'none',
+ 'vcodec': video_res.get('codecs'),
+ 'filesize': video_res.get('size'),
+ })
+ for aud in video_json.get('audio_resource', []):
+ if not aud.get('url'):
+ continue
+ formats.append({
+ 'url': aud['url'],
+ 'ext': 'mp4',
+ 'abr': aud.get('bandwidth'),
+ 'acodec': aud.get('codecs'),
+ 'vcodec': 'none',
+ 'filesize': aud.get('size'),
+ })
+
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_ep_info(self, type, episode_data, ep_id):
+ return {
+ 'id': ep_id,
+ 'title': episode_data.get('long_title') or episode_data['title'],
+ 'thumbnail': episode_data.get('cover'),
+ 'episode_number': str_to_int(episode_data.get('title')),
+ 'formats': self._get_formats(type, ep_id),
+ 'subtitles': self._get_subtitles(type, ep_id),
+ 'extractor_key': BiliIntlIE.ie_key(),
+ }
+
+
+class BiliIntlIE(BiliIntlBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.tv/en/play/34613/341736',
+ 'info_dict': {
+ 'id': '341736',
+ 'ext': 'mp4',
+ 'title': 'The First Night',
+ 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613/341736',
+ 'info_dict': {
+ 'id': '341736',
+ 'ext': 'mp4',
+ 'title': 'The First Night',
+ 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
+ 'episode_number': 2,
+ },
+ 'params': {
+ 'format': 'bv',
+ },
+ }]
+
+ def _real_extract(self, url):
+ type, season_id, id = self._match_valid_url(url).groups()
+ data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={season_id}', id)
+ episode_data = next(
+ episode for episode in data_json.get('episodes', [])
+ if str(episode.get('ep_id')) == id)
+ return self._extract_ep_info(type, episode_data, id)
+
+
+class BiliIntlSeriesIE(BiliIntlBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
+ _TESTS = [{
+ 'url': 'https://www.bilibili.tv/en/play/34613',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': '34613',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bv',
+ },
+ }, {
+ 'url': 'https://www.biliintl.com/en/play/34613',
+ 'playlist_mincount': 15,
+ 'info_dict': {
+ 'id': '34613',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bv',
+ },
+ }]
+
+ def _entries(self, id, type):
+ data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={id}', id)
+ for episode in data_json.get('episodes', []):
+ episode_id = str(episode.get('ep_id'))
+ yield self._extract_ep_info(type, episode, episode_id)
+
+ def _real_extract(self, url):
+ type, id = self._match_valid_url(url).groups()
+ return self.playlist_result(self._entries(id, type), playlist_id=id)