diff options
Diffstat (limited to 'hypervideo_dl/extractor/nexx.py')
-rw-r--r-- | hypervideo_dl/extractor/nexx.py | 147 |
1 files changed, 118 insertions, 29 deletions
diff --git a/hypervideo_dl/extractor/nexx.py b/hypervideo_dl/extractor/nexx.py index 860d636..a521bb6 100644 --- a/hypervideo_dl/extractor/nexx.py +++ b/hypervideo_dl/extractor/nexx.py @@ -12,6 +12,8 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + srt_subtitles_timecode, + traverse_obj, try_get, urlencode_postdata, ) @@ -20,7 +22,7 @@ from ..utils import ( class NexxIE(InfoExtractor): _VALID_URL = r'''(?x) (?: - https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/| + https?://api\.nexx(?:\.cloud|cdn\.com)/v3(?:\.\d)?/(?P<domain_id>\d+)/videos/byid/| nexx:(?:(?P<domain_id_s>\d+):)?| https?://arc\.nexx\.cloud/api/video/ ) @@ -42,35 +44,37 @@ class NexxIE(InfoExtractor): 'timestamp': 1384264416, 'upload_date': '20131112', }, + 'skip': 'Spiegel nexx CDNs are now disabled' }, { - # episode - 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858', + # episode with captions + 'url': 'https://api.nexx.cloud/v3.1/741/videos/byid/1701834', 'info_dict': { - 'id': '247858', + 'id': '1701834', 'ext': 'mp4', - 'title': 'Return of the Golden Child (OV)', - 'description': 'md5:5d969537509a92b733de21bae249dc63', - 'release_year': 2017, + 'title': 'Mein Leben mit \'nem TikTok E-Boy 😤', + 'alt_title': 'Mein Leben mit \'nem TikTok E-Boy 😤', + 'description': 'md5:f84f395a881fd143f952c892deab528d', 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 1397, - 'timestamp': 1495033267, - 'upload_date': '20170517', + 'duration': 770, + 'timestamp': 1595600027, + 'upload_date': '20200724', 'episode_number': 2, 'season_number': 2, + 'episode': 'Episode 2', + 'season': 'Season 2', }, 'params': { 'skip_download': True, }, - 'skip': 'HTTP Error 404: Not Found', }, { - # does not work via arc 'url': 'nexx:741:1269984', - 'md5': 'c714b5b238b2958dc8d5642addba6886', + 'md5': 'd5f14e14b592501e51addd5abef95a7f', 'info_dict': { 'id': '1269984', 'ext': 'mp4', - 'title': '1 TAG ohne KLO... wortwörtlich! 😑', - 'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑', + 'title': '1 TAG ohne KLO... wortwörtlich! ?', + 'alt_title': '1 TAG ohne KLO... wortwörtlich! ?', + 'description': 'md5:2016393a31991a900946432ccdd09a6f', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 607, 'timestamp': 1518614955, @@ -91,6 +95,7 @@ class NexxIE(InfoExtractor): 'timestamp': 1527874460, 'upload_date': '20180601', }, + 'skip': 'Spiegel nexx CDNs are now disabled' }, { 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907', 'only_matching': True, @@ -138,6 +143,8 @@ class NexxIE(InfoExtractor): return NexxIE._extract_urls(webpage)[0] def _handle_error(self, response): + if traverse_obj(response, ('metadata', 'notice'), expected_type=str): + self.report_warning('%s said: %s' % (self.IE_NAME, response['metadata']['notice'])) status = int_or_none(try_get( response, lambda x: x['metadata']['status']) or 200) if 200 <= status < 300: @@ -220,6 +227,65 @@ class NexxIE(InfoExtractor): return formats + def _extract_3q_formats(self, video, video_id): + stream_data = video['streamdata'] + cdn = stream_data['cdnType'] + assert cdn == '3q' + + q_acc, q_prefix, q_locator, q_hash = stream_data['qAccount'], stream_data['qPrefix'], stream_data['qLocator'], stream_data['qHash'] + protection_key = traverse_obj( + video, ('protectiondata', 'key'), expected_type=str) + + def get_cdn_shield_base(shield_type=''): + for secure in ('', 's'): + cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper())) + if cdn_shield: + return 'http%s://%s' % (secure, cdn_shield) + return f'http://sdn-global-{"prog" if shield_type.lower() == "prog" else "streaming"}-cache.3qsdn.com/' + (f's/{protection_key}/' if protection_key else '') + + stream_base = get_cdn_shield_base() + + formats = [] + formats.extend(self._extract_m3u8_formats( + f'{stream_base}{q_acc}/files/{q_prefix}/{q_locator}/{q_acc}-{stream_data.get("qHEVCHash") or q_hash}.ism/manifest.m3u8', + video_id, 'mp4', m3u8_id=f'{cdn}-hls', fatal=False)) + formats.extend(self._extract_mpd_formats( + f'{stream_base}{q_acc}/files/{q_prefix}/{q_locator}/{q_acc}-{q_hash}.ism/manifest.mpd', + video_id, mpd_id=f'{cdn}-dash', fatal=False)) + + progressive_base = get_cdn_shield_base('Prog') + q_references = stream_data.get('qReferences') or '' + fds = q_references.split(',') + for fd in fds: + ss = fd.split(':') + if len(ss) != 3: + continue + tbr = int_or_none(ss[1], scale=1000) + formats.append({ + 'url': f'{progressive_base}{q_acc}/uploads/{q_acc}-{ss[2]}.webm', + 'format_id': f'{cdn}-{ss[0]}{"-%s" % tbr if tbr else ""}', + 'tbr': tbr, + }) + + azure_file_distribution = stream_data.get('azureFileDistribution') or '' + fds = azure_file_distribution.split(',') + for fd in fds: + ss = fd.split(':') + if len(ss) != 3: + continue + tbr = int_or_none(ss[0]) + width, height = ss[1].split('x') if len(ss[1].split('x')) == 2 else (None, None) + f = { + 'url': f'{progressive_base}{q_acc}/files/{q_prefix}/{q_locator}/{ss[2]}.mp4', + 'format_id': f'{cdn}-http-{"-%s" % tbr if tbr else ""}', + 'tbr': tbr, + 'width': int_or_none(width), + 'height': int_or_none(height), + } + formats.append(f) + + return formats + def _extract_azure_formats(self, video, video_id): stream_data = video['streamdata'] cdn = stream_data['cdnType'] @@ -345,10 +411,11 @@ class NexxIE(InfoExtractor): # md5( operation + domain_id + domain_secret ) # where domain_secret is a static value that will be given by nexx.tv # as per [1]. Here is how this "secret" is generated (reversed - # from _play.api.init function, search for clienttoken). So it's - # actually not static and not that much of a secret. + # from _play._factory.data.getDomainData function, search for + # domaintoken or enableAPIAccess). So it's actually not static + # and not that much of a secret. # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf - secret = result['device']['clienttoken'][int(device_id[0]):] + secret = result['device']['domaintoken'][int(device_id[0]):] secret = secret[0:len(secret) - int(device_id[-1])] op = 'byid' @@ -360,15 +427,18 @@ class NexxIE(InfoExtractor): result = self._call_api( domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={ - 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description', + 'additionalfields': 'language,channel,format,licenseby,slug,fileversion,episode,season', 'addInteractionOptions': '1', 'addStatusDetails': '1', 'addStreamDetails': '1', - 'addCaptions': '1', + 'addFeatures': '1', + # Caption format selection doesn't seem to be enforced? + 'addCaptions': 'vtt', 'addScenes': '1', + 'addChapters': '1', 'addHotSpots': '1', + 'addConnectedMedia': 'persons', 'addBumpers': '1', - 'captionFormat': 'data', }, headers={ 'X-Request-CID': cid, 'X-Request-Token': request_token, @@ -384,28 +454,48 @@ class NexxIE(InfoExtractor): formats = self._extract_azure_formats(video, video_id) elif cdn == 'free': formats = self._extract_free_formats(video, video_id) + elif cdn == '3q': + formats = self._extract_3q_formats(video, video_id) else: - # TODO: reverse more cdns - assert False + self.raise_no_formats(f'{cdn} formats are currently not supported', video_id) self._sort_formats(formats) + subtitles = {} + for sub in video.get('captiondata') or []: + if sub.get('data'): + subtitles.setdefault(sub.get('language', 'en'), []).append({ + 'ext': 'srt', + 'data': '\n\n'.join( + f'{i + 1}\n{srt_subtitles_timecode(line["fromms"] / 1000)} --> {srt_subtitles_timecode(line["toms"] / 1000)}\n{line["caption"]}' + for i, line in enumerate(sub['data'])), + 'name': sub.get('language_long') or sub.get('title') + }) + elif sub.get('url'): + subtitles.setdefault(sub.get('language', 'en'), []).append({ + 'url': sub['url'], + 'ext': sub.get('format'), + 'name': sub.get('language_long') or sub.get('title') + }) + return { 'id': video_id, 'title': title, 'alt_title': general.get('subtitle'), 'description': general.get('description'), 'release_year': int_or_none(general.get('year')), - 'creator': general.get('studio') or general.get('studio_adref'), + 'creator': general.get('studio') or general.get('studio_adref') or None, 'thumbnail': try_get( video, lambda x: x['imagedata']['thumb'], compat_str), 'duration': parse_duration(general.get('runtime')), 'timestamp': int_or_none(general.get('uploaded')), - 'episode_number': int_or_none(try_get( - video, lambda x: x['episodedata']['episode'])), - 'season_number': int_or_none(try_get( - video, lambda x: x['episodedata']['season'])), + 'episode_number': traverse_obj( + video, (('episodedata', 'general'), 'episode'), expected_type=int, get_all=False), + 'season_number': traverse_obj( + video, (('episodedata', 'general'), 'season'), expected_type=int, get_all=False), + 'cast': traverse_obj(video, ('connectedmedia', ..., 'title'), expected_type=str), 'formats': formats, + 'subtitles': subtitles, } @@ -427,7 +517,6 @@ class NexxEmbedIE(InfoExtractor): 'upload_date': '20140305', }, 'params': { - 'format': 'bestvideo', 'skip_download': True, }, }, { |