aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/tvp.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/tvp.py')
-rw-r--r--hypervideo_dl/extractor/tvp.py130
1 files changed, 86 insertions, 44 deletions
diff --git a/hypervideo_dl/extractor/tvp.py b/hypervideo_dl/extractor/tvp.py
index 8483564..2aa0dd8 100644
--- a/hypervideo_dl/extractor/tvp.py
+++ b/hypervideo_dl/extractor/tvp.py
@@ -268,8 +268,11 @@ class TVPIE(InfoExtractor):
class TVPStreamIE(InfoExtractor):
IE_NAME = 'tvp:stream'
- _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)'
+ _VALID_URL = r'(?:tvpstream:|https?://(?:tvpstream\.vod|stream)\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)'
_TESTS = [{
+ 'url': 'https://stream.tvp.pl/?channel_id=56969941',
+ 'only_matching': True,
+ }, {
# untestable as "video" id changes many times across a day
'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455',
'only_matching': True,
@@ -285,28 +288,21 @@ class TVPStreamIE(InfoExtractor):
'only_matching': True,
}]
- _PLAYER_BOX_RE = r'<div\s[^>]*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)'
- _BUTTON_RE = r'<div\s[^>]*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')'
-
def _real_extract(self, url):
channel_id = self._match_id(url)
- channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default')
- webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage')
- if not channel_id:
- channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel',
- webpage, 'default channel id')
- video_id = self._search_regex(self._PLAYER_BOX_RE % 'video',
- webpage, 'video id')
- audition_title, station_name = self._search_regex(
- self._BUTTON_RE % (re.escape(channel_id)), webpage,
- 'audition title and station name',
- group=(1, 2))
+ channel_url = self._proto_relative_url('//stream.tvp.pl/?channel_id=%s' % channel_id or 'default')
+ webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage')
+ channels = self._search_json(
+ r'window\.__channels\s*=', webpage, 'channel list', channel_id,
+ contains_pattern=r'\[\s*{(?s:.+)}\s*]')
+ channel = traverse_obj(channels, (lambda _, v: channel_id == str(v['id'])), get_all=False) if channel_id else channels[0]
+ audition = traverse_obj(channel, ('items', lambda _, v: v['is_live'] is True), get_all=False)
return {
'_type': 'url_transparent',
- 'id': channel_id,
- 'url': 'tvp:%s' % video_id,
- 'title': audition_title,
- 'alt_title': station_name,
+ 'id': channel_id or channel['id'],
+ 'url': 'tvp:%s' % audition['video_id'],
+ 'title': audition.get('title'),
+ 'alt_title': channel.get('title'),
'is_live': True,
'ie_key': 'TVPEmbed',
}
@@ -486,21 +482,34 @@ class TVPEmbedIE(InfoExtractor):
class TVPVODBaseIE(InfoExtractor):
_API_BASE_URL = 'https://vod.tvp.pl/api/products'
- def _call_api(self, resource, video_id, **kwargs):
- return self._download_json(
+ def _call_api(self, resource, video_id, query={}, **kwargs):
+ is_valid = lambda x: 200 <= x < 300
+ document, urlh = self._download_json_handle(
f'{self._API_BASE_URL}/{resource}', video_id,
- query={'lang': 'pl', 'platform': 'BROWSER'}, **kwargs)
-
- def _parse_video(self, video):
- return {
- '_type': 'url',
- 'url': 'tvp:' + video['externalUid'],
- 'ie_key': TVPEmbedIE.ie_key(),
- 'title': video.get('title'),
- 'description': traverse_obj(video, ('lead', 'description')),
- 'age_limit': int_or_none(video.get('rating')),
- 'duration': int_or_none(video.get('duration')),
- }
+ query={'lang': 'pl', 'platform': 'BROWSER', **query},
+ expected_status=lambda x: is_valid(x) or 400 <= x < 500, **kwargs)
+ if is_valid(urlh.status):
+ return document
+ raise ExtractorError(f'Woronicza said: {document.get("code")} (HTTP {urlh.status})')
+
+ def _parse_video(self, video, with_url=True):
+ info_dict = traverse_obj(video, {
+ 'id': ('id', {str_or_none}),
+ 'title': 'title',
+ 'age_limit': ('rating', {int_or_none}),
+ 'duration': ('duration', {int_or_none}),
+ 'episode_number': ('number', {int_or_none}),
+ 'series': ('season', 'serial', 'title', {str_or_none}),
+ 'thumbnails': ('images', ..., ..., {'url': ('url', {url_or_none})}),
+ })
+ info_dict['description'] = clean_html(dict_get(video, ('lead', 'description')))
+ if with_url:
+ info_dict.update({
+ '_type': 'url',
+ 'url': video['webUrl'],
+ 'ie_key': TVPVODVideoIE.ie_key(),
+ })
+ return info_dict
class TVPVODVideoIE(TVPVODBaseIE):
@@ -510,37 +519,70 @@ class TVPVODVideoIE(TVPVODBaseIE):
_TESTS = [{
'url': 'https://vod.tvp.pl/dla-dzieci,24/laboratorium-alchemika-odcinki,309338/odcinek-24,S01E24,311357',
'info_dict': {
- 'id': '60468609',
+ 'id': '311357',
'ext': 'mp4',
- 'title': 'Laboratorium alchemika, Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
+ 'title': 'Tusze termiczne. Jak zobaczyć niewidoczne. Odcinek 24',
'description': 'md5:1d4098d3e537092ccbac1abf49b7cd4c',
'duration': 300,
'episode_number': 24,
'episode': 'Episode 24',
'age_limit': 0,
'series': 'Laboratorium alchemika',
- 'thumbnail': 're:https://.+',
+ 'thumbnail': 're:https?://.+',
},
+ 'params': {'skip_download': 'm3u8'},
}, {
'url': 'https://vod.tvp.pl/filmy-dokumentalne,163/ukrainski-sluga-narodu,339667',
'info_dict': {
- 'id': '51640077',
+ 'id': '339667',
'ext': 'mp4',
- 'title': 'Ukraiński sługa narodu, Ukraiński sługa narodu',
- 'series': 'Ukraiński sługa narodu',
+ 'title': 'Ukraiński sługa narodu',
'description': 'md5:b7940c0a8e439b0c81653a986f544ef3',
'age_limit': 12,
- 'episode': 'Episode 0',
- 'episode_number': 0,
'duration': 3051,
- 'thumbnail': 're:https://.+',
+ 'thumbnail': 're:https?://.+',
+ 'subtitles': 'count:2',
+ },
+ 'params': {'skip_download': 'm3u8'},
+ }, {
+ 'note': 'embed fails with "payment required"',
+ 'url': 'https://vod.tvp.pl/seriale,18/polowanie-na-cmy-odcinki,390116/odcinek-7,S01E07,398869',
+ 'info_dict': {
+ 'id': '398869',
+ 'ext': 'mp4',
+ 'title': 'odc. 7',
+ 'description': 'md5:dd2bb33f023dc5c2fbaddfbe4cb5dba0',
+ 'duration': 2750,
+ 'age_limit': 16,
+ 'series': 'Polowanie na ćmy',
+ 'episode_number': 7,
+ 'episode': 'Episode 7',
+ 'thumbnail': 're:https?://.+',
},
+ 'params': {'skip_download': 'm3u8'},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
- return self._parse_video(self._call_api(f'vods/{video_id}', video_id))
+ info_dict = self._parse_video(self._call_api(f'vods/{video_id}', video_id), with_url=False)
+
+ playlist = self._call_api(f'{video_id}/videos/playlist', video_id, query={'videoType': 'MOVIE'})
+
+ info_dict['formats'] = []
+ for manifest_url in traverse_obj(playlist, ('sources', 'HLS', ..., 'src')):
+ info_dict['formats'].extend(self._extract_m3u8_formats(manifest_url, video_id, fatal=False))
+ for manifest_url in traverse_obj(playlist, ('sources', 'DASH', ..., 'src')):
+ info_dict['formats'].extend(self._extract_mpd_formats(manifest_url, video_id, fatal=False))
+
+ info_dict['subtitles'] = {}
+ for sub in playlist.get('subtitles') or []:
+ info_dict['subtitles'].setdefault(sub.get('language') or 'und', []).append({
+ 'url': sub['url'],
+ 'ext': 'ttml',
+ })
+
+ return info_dict
class TVPVODSeriesIE(TVPVODBaseIE):
@@ -555,7 +597,7 @@ class TVPVODSeriesIE(TVPVODBaseIE):
'age_limit': 12,
'categories': ['seriale'],
},
- 'playlist_count': 129,
+ 'playlist_count': 130,
}, {
'url': 'https://vod.tvp.pl/programy,88/rolnik-szuka-zony-odcinki,284514',
'only_matching': True,