aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/bibeltv.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/bibeltv.py')
-rw-r--r--hypervideo_dl/extractor/bibeltv.py202
1 files changed, 186 insertions, 16 deletions
diff --git a/hypervideo_dl/extractor/bibeltv.py b/hypervideo_dl/extractor/bibeltv.py
index fd20aad..34464da 100644
--- a/hypervideo_dl/extractor/bibeltv.py
+++ b/hypervideo_dl/extractor/bibeltv.py
@@ -1,27 +1,197 @@
+from functools import partial
+
from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ clean_html,
+ determine_ext,
+ format_field,
+ int_or_none,
+ js_to_json,
+ orderedSet,
+ parse_iso8601,
+ traverse_obj,
+ url_or_none,
+)
+
+
+class BibelTVBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['AT', 'CH', 'DE']
+ _GEO_BYPASS = False
+
+ API_URL = 'https://www.bibeltv.de/mediathek/api'
+ AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
+
+ def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
+ formats = []
+ subtitles = {}
+ for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
+ media_ext = determine_ext(media_url)
+ if media_ext == 'm3u8':
+ m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ media_url, crn_id, live=is_live)
+ formats.extend(m3u8_formats)
+ subtitles.update(m3u8_subs)
+ elif media_ext == 'mpd':
+ mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
+ formats.extend(mpd_formats)
+ subtitles.update(mpd_subs)
+ elif media_ext == 'mp4':
+ formats.append({'url': media_url})
+ else:
+ self.report_warning(f'Unknown format {media_ext!r}')
+
+ return formats, subtitles
+
+ @staticmethod
+ def _extract_base_info(data):
+ return {
+ 'id': data['crn'],
+ **traverse_obj(data, {
+ 'title': 'title',
+ 'description': 'description',
+ 'duration': ('duration', {partial(int_or_none, scale=1000)}),
+ 'timestamp': ('schedulingStart', {parse_iso8601}),
+ 'season_number': 'seasonNumber',
+ 'episode_number': 'episodeNumber',
+ 'view_count': 'viewCount',
+ 'like_count': 'likeCount',
+ }),
+ 'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
+ 'url': ('url', {url_or_none}),
+ }))),
+ }
+
+ def _extract_url_info(self, data):
+ return {
+ '_type': 'url',
+ 'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
+ **self._extract_base_info(data),
+ }
+
+ def _extract_video_info(self, data):
+ crn_id = data['crn']
+ if data.get('drm'):
+ self.report_drm(crn_id)
+
+ json_data = self._download_json(
+ format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
+ headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
+ errnote='No formats available') or {}
+
+ formats, subtitles = self._extract_formats_and_subtitles(
+ traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
+
+ return {
+ '_type': 'video',
+ **self._extract_base_info(data),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class BibelTVVideoIE(BibelTVBaseIE):
+ IE_DESC = 'BibelTV single video'
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
+ IE_NAME = 'bibeltv:video'
-class BibelTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
_TESTS = [{
- 'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
- 'md5': '252f908192d611de038b8504b08bf97f',
+ 'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
+ 'md5': 'ec1c07efe54353780512e8a4103b612e',
'info_dict': {
- 'id': 'ref:329703',
+ 'id': '344436',
'ext': 'mp4',
- 'title': 'Sprachkurs in Malaiisch',
- 'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
- 'timestamp': 1608316701,
- 'uploader_id': '5840105145001',
- 'upload_date': '20201218',
- }
+ 'title': 'Alte Wege',
+ 'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
+ 'timestamp': 1677877071,
+ 'duration': 150.0,
+ 'upload_date': '20230303',
+ 'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ 'view_count': int,
+ 'like_count': int,
+ },
+ 'params': {
+ 'format': '6',
+ },
+ }]
+
+ def _real_extract(self, url):
+ crn_id = self._match_id(url)
+ video_data = traverse_obj(
+ self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
+ ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
+ if not video_data:
+ raise ExtractorError('Missing video data.')
+
+ return self._extract_video_info(video_data)
+
+
+class BibelTVSeriesIE(BibelTVBaseIE):
+ IE_DESC = 'BibelTV series playlist'
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
+ IE_NAME = 'bibeltv:series'
+
+ _TESTS = [{
+ 'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
+ 'playlist_mincount': 400,
+ 'info_dict': {
+ 'id': '333485',
+ 'title': 'Ein Wunder für jeden Tag',
+ 'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
+ },
+ }]
+
+ def _real_extract(self, url):
+ crn_id = self._match_id(url)
+ webpage = self._download_webpage(url, crn_id)
+ nextjs_data = self._search_nextjs_data(webpage, crn_id)
+ series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
+ if not series_data:
+ raise ExtractorError('Missing series data.')
+
+ return self.playlist_result(
+ traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
+ crn_id, series_data.get('title'), clean_html(series_data.get('description')))
+
+
+class BibelTVLiveIE(BibelTVBaseIE):
+ IE_DESC = 'BibelTV live program'
+ _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
+ IE_NAME = 'bibeltv:live'
+
+ _TESTS = [{
+ 'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
+ 'info_dict': {
+ 'id': 'bibeltv',
+ 'ext': 'mp4',
+ 'title': 're:Bibel TV',
+ 'live_status': 'is_live',
+ 'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
+ },
+ 'params': {'skip_download': 'm3u8'},
}, {
- 'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
+ 'url': 'https://www.bibeltv.de/livestreams/impuls/',
'only_matching': True,
}]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
def _real_extract(self, url):
- crn_id = self._match_id(url)
- return self.url_result(
- self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')
+ stream_id = self._match_id(url)
+ webpage = self._download_webpage(url, stream_id)
+ stream_data = self._search_json(
+ r'\\"video\\":', webpage, 'bibeltvData', stream_id,
+ transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
+
+ formats, subtitles = self._extract_formats_and_subtitles(
+ traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
+
+ return {
+ 'id': stream_id,
+ 'title': stream_data.get('title'),
+ 'thumbnail': stream_data.get('poster'),
+ 'is_live': True,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }