diff options
author | Jesús <heckyel@hyperbola.info> | 2021-10-18 15:24:21 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-10-18 15:24:21 -0500 |
commit | 5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e (patch) | |
tree | 65209bc739db35e31f1c9b5b868eb5df4fe12ae3 /hypervideo_dl/extractor/tvnow.py | |
parent | 27fe903c511691c078942bef5ee9a05a43b15c8f (diff) | |
download | hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.lz hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.tar.xz hypervideo-5122028a4bcac4ae577ef7fbd55ccad5cb34ef5e.zip |
update from upstream
Diffstat (limited to 'hypervideo_dl/extractor/tvnow.py')
-rw-r--r-- | hypervideo_dl/extractor/tvnow.py | 172 |
1 files changed, 165 insertions, 7 deletions
diff --git a/hypervideo_dl/extractor/tvnow.py b/hypervideo_dl/extractor/tvnow.py index 9c8a8a0..b318184 100644 --- a/hypervideo_dl/extractor/tvnow.py +++ b/hypervideo_dl/extractor/tvnow.py @@ -7,10 +7,12 @@ from .common import InfoExtractor from ..compat import compat_str from ..utils import ( ExtractorError, + get_element_by_id, int_or_none, parse_iso8601, parse_duration, str_or_none, + try_get, update_url_query, urljoin, ) @@ -67,7 +69,7 @@ class TVNowBaseIE(InfoExtractor): if formats: break else: - if info.get('isDrm'): + if not self.get_param('allow_unplayable_formats') and info.get('isDrm'): raise ExtractorError( 'Video %s is DRM protected' % video_id, expected=True) if info.get('geoblocked'): @@ -167,7 +169,7 @@ class TVNowIE(TVNowBaseIE): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + mobj = self._match_valid_url(url) display_id = '%s/%s' % mobj.group(2, 3) info = self._call_api( @@ -194,7 +196,7 @@ class TVNowNewIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) + mobj = self._match_valid_url(url) base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url')) show, episode = mobj.group('show', 'episode') return self.url_result( @@ -204,6 +206,86 @@ class TVNowNewIE(InfoExtractor): ie=TVNowIE.ie_key(), video_id=mobj.group('id')) +class TVNowFilmIE(TVNowBaseIE): + _VALID_URL = r'''(?x) + (?P<base_url>https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/ + (?:filme))/ + (?P<title>[^/?$&]+)-(?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959', + 'info_dict': { + 'id': '1426690', + 'display_id': 'lord-of-war-haendler-des-todes', + 'ext': 'mp4', + 'title': 'Lord of War', + 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9', + 'timestamp': 1550010000, + 'upload_date': '20190212', + 'duration': 7016, + }, + }, { + 'url': 'https://www.tvnow.de/filme/the-machinist-12157', + 'info_dict': { + 'id': '328160', + 'display_id': 'the-machinist', + 'ext': 'mp4', + 'title': 'The Machinist', + 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28', + 'timestamp': 1496469720, + 'upload_date': '20170603', + 'duration': 5836, + }, + }, { + 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777', + 'only_matching': True, # DRM protected + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('title') + + webpage = self._download_webpage(url, display_id, fatal=False) + if not webpage: + raise ExtractorError('Cannot download "%s"' % url, expected=True) + + json_text = get_element_by_id('now-web-state', webpage) + if not json_text: + raise ExtractorError('Cannot read video data', expected=True) + + json_data = self._parse_json( + json_text, + display_id, + transform_source=lambda x: x.replace('&q;', '"'), + fatal=False) + if not json_data: + raise ExtractorError('Cannot read video data', expected=True) + + player_key = next( + (key for key in json_data.keys() if 'module/player' in key), + None) + page_key = next( + (key for key in json_data.keys() if 'page/filme' in key), + None) + movie_id = try_get( + json_data, + [ + lambda x: x[player_key]['body']['id'], + lambda x: x[page_key]['body']['modules'][0]['id'], + lambda x: x[page_key]['body']['modules'][1]['id']], + int) + if not movie_id: + raise ExtractorError('Cannot extract movie ID', expected=True) + + info = self._call_api( + 'movies/%d' % movie_id, + display_id, + query={'fields': ','.join(self._VIDEO_FIELDS)}) + + return self._extract_video(info, display_id) + + class TVNowNewBaseIE(InfoExtractor): def _call_api(self, path, video_id, query={}): result = self._download_json( @@ -342,9 +424,85 @@ class TVNowIE(TVNowNewBaseIE): } def _real_extract(self, url): - display_id, video_id = re.match(self._VALID_URL, url).groups() + display_id, video_id = self._match_valid_url(url).groups() info = self._call_api('player/' + video_id, video_id) return self._extract_video(info, video_id, display_id) + + +class TVNowFilmIE(TVNowIE): + _VALID_URL = r'''(?x) + (?P<base_url>https?:// + (?:www\.)?tvnow\.(?:de|at|ch)/ + (?:filme))/ + (?P<title>[^/?$&]+)-(?P<id>\d+) + ''' + _TESTS = [{ + 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959', + 'info_dict': { + 'id': '1426690', + 'display_id': 'lord-of-war-haendler-des-todes', + 'ext': 'mp4', + 'title': 'Lord of War', + 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9', + 'timestamp': 1550010000, + 'upload_date': '20190212', + 'duration': 7016, + }, + }, { + 'url': 'https://www.tvnow.de/filme/the-machinist-12157', + 'info_dict': { + 'id': '328160', + 'display_id': 'the-machinist', + 'ext': 'mp4', + 'title': 'The Machinist', + 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28', + 'timestamp': 1496469720, + 'upload_date': '20170603', + 'duration': 5836, + }, + }, { + 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777', + 'only_matching': True, # DRM protected + }] + + def _real_extract(self, url): + mobj = self._match_valid_url(url) + display_id = mobj.group('title') + + webpage = self._download_webpage(url, display_id, fatal=False) + if not webpage: + raise ExtractorError('Cannot download "%s"' % url, expected=True) + + json_text = get_element_by_id('now-web-state', webpage) + if not json_text: + raise ExtractorError('Cannot read video data', expected=True) + + json_data = self._parse_json( + json_text, + display_id, + transform_source=lambda x: x.replace('&q;', '"'), + fatal=False) + if not json_data: + raise ExtractorError('Cannot read video data', expected=True) + + player_key = next( + (key for key in json_data.keys() if 'module/player' in key), + None) + page_key = next( + (key for key in json_data.keys() if 'page/filme' in key), + None) + movie_id = try_get( + json_data, + [ + lambda x: x[player_key]['body']['id'], + lambda x: x[page_key]['body']['modules'][0]['id'], + lambda x: x[page_key]['body']['modules'][1]['id']], + int) + if not movie_id: + raise ExtractorError('Cannot extract movie ID', expected=True) + + info = self._call_api('player/%d' % movie_id, display_id) + return self._extract_video(info, url, display_id) """ @@ -394,7 +552,7 @@ class TVNowSeasonIE(TVNowListBaseIE): }] def _real_extract(self, url): - _, show_id, season_id = re.match(self._VALID_URL, url).groups() + _, show_id, season_id = self._match_valid_url(url).groups() return self._extract_items( url, show_id, season_id, {'season': season_id}) @@ -410,7 +568,7 @@ class TVNowAnnualIE(TVNowListBaseIE): }] def _real_extract(self, url): - _, show_id, year, month = re.match(self._VALID_URL, url).groups() + _, show_id, year, month = self._match_valid_url(url).groups() return self._extract_items( url, show_id, '%s-%s' % (year, month), { 'year': int(year), @@ -442,7 +600,7 @@ class TVNowShowIE(TVNowListBaseIE): else super(TVNowShowIE, cls).suitable(url)) def _real_extract(self, url): - base_url, show_id = re.match(self._VALID_URL, url).groups() + base_url, show_id = self._match_valid_url(url).groups() result = self._call_api( 'teaserrow/format/navigation/' + show_id, show_id) |