diff options
Diffstat (limited to 'hypervideo_dl/extractor/ceskatelevize.py')
-rw-r--r-- | hypervideo_dl/extractor/ceskatelevize.py | 77 |
1 files changed, 49 insertions, 28 deletions
diff --git a/hypervideo_dl/extractor/ceskatelevize.py b/hypervideo_dl/extractor/ceskatelevize.py index ddf66b2..be2b0bb 100644 --- a/hypervideo_dl/extractor/ceskatelevize.py +++ b/hypervideo_dl/extractor/ceskatelevize.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -12,6 +9,7 @@ from ..utils import ( ExtractorError, float_or_none, sanitized_Request, + str_or_none, traverse_obj, urlencode_postdata, USER_AGENTS, @@ -19,13 +17,13 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _TESTS = [{ 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { 'id': '61924494877028507', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Bonus 01 - En', + 'title': 'Bonus 01 - En - Hyde Park Civilizace', 'description': 'English Subtittles', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 81.3, @@ -36,18 +34,29 @@ class CeskaTelevizeIE(InfoExtractor): }, }, { # live stream - 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'url': 'http://www.ceskatelevize.cz/zive/ct1/', 'info_dict': { - 'id': 402, + 'id': '102', 'ext': 'mp4', - 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'title': r'ČT1 - živé vysílání online', + 'description': 'Sledujte živé vysílání kanálu ČT1 online. Vybírat si můžete i z dalších kanálů České televize na kterémkoli z vašich zařízení.', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, - 'skip': 'Georestricted to Czech Republic', + }, { + # another + 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'only_matching': True, + 'info_dict': { + 'id': 402, + 'ext': 'mp4', + 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', + 'is_live': True, + }, + # 'skip': 'Georestricted to Czech Republic', }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, @@ -56,21 +65,21 @@ class CeskaTelevizeIE(InfoExtractor): 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', 'info_dict': { 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', + 'title': 'Bogotart - Queer', + 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti', }, 'playlist': [{ 'info_dict': { 'id': '61924494877311053', 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', + 'title': 'Bogotart - Queer (Varování 18+)', 'duration': 11.9, }, }, { 'info_dict': { 'id': '61924494877068022', 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', + 'title': 'Bogotart - Queer (Queer)', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 1558.3, }, @@ -87,28 +96,42 @@ class CeskaTelevizeIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - parsed_url = compat_urllib_parse_urlparse(url) - webpage = self._download_webpage(url, playlist_id) - site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) + webpage, urlh = self._download_webpage_handle(url, playlist_id) + parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) + site_name = self._og_search_property('site_name', webpage, fatal=False, default='Česká televize') playlist_title = self._og_search_title(webpage, default=None) if site_name and playlist_title: - playlist_title = playlist_title.replace(f' — {site_name}', '', 1) + playlist_title = re.split(r'\s*[—|]\s*%s' % (site_name, ), playlist_title, 1)[0] playlist_description = self._og_search_description(webpage, default=None) if playlist_description: playlist_description = playlist_description.replace('\xa0', ' ') - if parsed_url.path.startswith('/porady/'): + type_ = 'IDEC' + if re.search(r'(^/porady|/zive)/', parsed_url.path): next_data = self._search_nextjs_data(webpage, playlist_id) - idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if '/zive/' in parsed_url.path: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) + else: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if not idec: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) + if idec: + type_ = 'bonus' if not idec: raise ExtractorError('Failed to find IDEC id') - iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) - webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, - query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) + iframe_hash = self._download_webpage( + 'https://www.ceskatelevize.cz/v-api/iframe-hash/', + playlist_id, note='Getting IFRAME hash') + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + webpage = self._download_webpage( + 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', + playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s</p>' % NOT_AVAILABLE_STRING in webpage: - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + self.raise_geo_restricted(NOT_AVAILABLE_STRING) + if any(not_found in webpage for not_found in ('Neplatný parametr pro videopřehrávač', 'IDEC nebyl nalezen', )): + raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None episode_id = None @@ -177,7 +200,6 @@ class CeskaTelevizeIE(InfoExtractor): is_live = item.get('type') == 'LIVE' formats = [] for format_id, stream_url in item.get('streamUrls', {}).items(): - stream_url = stream_url.replace('https://', 'http://') if 'playerType=flash' in stream_url: stream_formats = self._extract_m3u8_formats( stream_url, playlist_id, 'mp4', 'm3u8_native', @@ -199,7 +221,7 @@ class CeskaTelevizeIE(InfoExtractor): entries[num]['formats'].extend(formats) continue - item_id = item.get('id') or item['assetId'] + item_id = str_or_none(item.get('id') or item['assetId']) title = item['title'] duration = float_or_none(item.get('duration')) @@ -227,9 +249,8 @@ class CeskaTelevizeIE(InfoExtractor): 'is_live': is_live, }) - for e in entries: - self._sort_formats(e['formats']) - + if len(entries) == 1: + return entries[0] return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) def _get_subtitles(self, episode_id, subs): |