diff options
Diffstat (limited to 'hypervideo_dl/extractor/theplatform.py')
-rw-r--r-- | hypervideo_dl/extractor/theplatform.py | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/hypervideo_dl/extractor/theplatform.py b/hypervideo_dl/extractor/theplatform.py index e659b8e..99caeb5 100644 --- a/hypervideo_dl/extractor/theplatform.py +++ b/hypervideo_dl/extractor/theplatform.py @@ -7,19 +7,23 @@ import hashlib from .once import OnceIE from .adobepass import AdobePassIE +from ..networking import Request from ..utils import ( determine_ext, ExtractorError, float_or_none, int_or_none, parse_qs, - sanitized_Request, unsmuggle_url, update_url_query, xpath_with_ns, mimetype2ext, find_xpath_attr, + traverse_obj, + update_url, + urlhandle_detect_ext, ) +from ..networking import HEADRequest default_ns = 'http://www.w3.org/2005/SMIL21/Language' _x = lambda p: xpath_with_ns(p, {'smil': default_ns}) @@ -45,7 +49,7 @@ class ThePlatformBaseIE(OnceIE): raise ExtractorError( error_element.attrib['abstract'], expected=True) - smil_formats = self._parse_smil_formats( + smil_formats, subtitles = self._parse_smil_formats_and_subtitles( meta, smil_url, video_id, namespace=default_ns, # the parameters are from syfy.com, other sites may use others, # they also work for nbc.com @@ -65,8 +69,6 @@ class ThePlatformBaseIE(OnceIE): formats.append(_format) - subtitles = self._parse_smil_subtitles(meta, default_ns) - return formats, subtitles def _download_theplatform_metadata(self, path, video_id): @@ -164,7 +166,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): 'params': { # rtmp download 'skip_download': True, - } + }, + 'skip': '404 Not Found', }, { 'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD', 'info_dict': { @@ -173,7 +176,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): 'description': 'md5:644ad9188d655b742f942bf2e06b002d', 'title': 'HIGHLIGHTS: USA bag first ever series Cup win', 'uploader': 'EGSM', - } + }, + 'skip': '404 Not Found', }, { 'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7', 'only_matching': True, @@ -191,6 +195,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): 'upload_date': '20150701', 'uploader': 'NBCU-NEWS', }, + 'skip': '404 Not Found', }, { # From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1 # geo-restricted (US), HLS encrypted with AES-128 @@ -270,7 +275,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): source_url = smuggled_data.get('source_url') if source_url: headers['Referer'] = source_url - request = sanitized_Request(url, headers=headers) + request = Request(url, headers=headers) webpage = self._download_webpage(request, video_id) smil_url = self._search_regex( r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml', @@ -297,6 +302,17 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE): formats, subtitles = self._extract_theplatform_smil(smil_url, video_id) + # With some sites, manifest URL must be forced to extract HLS formats + if not traverse_obj(formats, lambda _, v: v['format_id'].startswith('hls')): + m3u8_url = update_url(url, query='mbr=true&manifest=m3u', fragment=None) + urlh = self._request_webpage( + HEADRequest(m3u8_url), video_id, 'Checking for HLS formats', 'No HLS formats found', fatal=False) + if urlh and urlhandle_detect_ext(urlh) == 'm3u8': + m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + m3u8_url, video_id, m3u8_id='hls', fatal=False) + formats.extend(m3u8_fmts) + self._merge_subtitles(m3u8_subs, target=subtitles) + ret = self._extract_theplatform_metadata(path, video_id) combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles) ret.update({ |