aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/theplatform.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/theplatform.py')
-rw-r--r--hypervideo_dl/extractor/theplatform.py30
1 files changed, 23 insertions, 7 deletions
diff --git a/hypervideo_dl/extractor/theplatform.py b/hypervideo_dl/extractor/theplatform.py
index e659b8e..99caeb5 100644
--- a/hypervideo_dl/extractor/theplatform.py
+++ b/hypervideo_dl/extractor/theplatform.py
@@ -7,19 +7,23 @@ import hashlib
from .once import OnceIE
from .adobepass import AdobePassIE
+from ..networking import Request
from ..utils import (
determine_ext,
ExtractorError,
float_or_none,
int_or_none,
parse_qs,
- sanitized_Request,
unsmuggle_url,
update_url_query,
xpath_with_ns,
mimetype2ext,
find_xpath_attr,
+ traverse_obj,
+ update_url,
+ urlhandle_detect_ext,
)
+from ..networking import HEADRequest
default_ns = 'http://www.w3.org/2005/SMIL21/Language'
_x = lambda p: xpath_with_ns(p, {'smil': default_ns})
@@ -45,7 +49,7 @@ class ThePlatformBaseIE(OnceIE):
raise ExtractorError(
error_element.attrib['abstract'], expected=True)
- smil_formats = self._parse_smil_formats(
+ smil_formats, subtitles = self._parse_smil_formats_and_subtitles(
meta, smil_url, video_id, namespace=default_ns,
# the parameters are from syfy.com, other sites may use others,
# they also work for nbc.com
@@ -65,8 +69,6 @@ class ThePlatformBaseIE(OnceIE):
formats.append(_format)
- subtitles = self._parse_smil_subtitles(meta, default_ns)
-
return formats, subtitles
def _download_theplatform_metadata(self, path, video_id):
@@ -164,7 +166,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'params': {
# rtmp download
'skip_download': True,
- }
+ },
+ 'skip': '404 Not Found',
}, {
'url': 'https://player.theplatform.com/p/D6x-PC/pulse_preview/embed/select/media/yMBg9E8KFxZD',
'info_dict': {
@@ -173,7 +176,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'description': 'md5:644ad9188d655b742f942bf2e06b002d',
'title': 'HIGHLIGHTS: USA bag first ever series Cup win',
'uploader': 'EGSM',
- }
+ },
+ 'skip': '404 Not Found',
}, {
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
'only_matching': True,
@@ -191,6 +195,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
'upload_date': '20150701',
'uploader': 'NBCU-NEWS',
},
+ 'skip': '404 Not Found',
}, {
# From http://www.nbc.com/the-blacklist/video/sir-crispin-crandall/2928790?onid=137781#vc137781=1
# geo-restricted (US), HLS encrypted with AES-128
@@ -270,7 +275,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
source_url = smuggled_data.get('source_url')
if source_url:
headers['Referer'] = source_url
- request = sanitized_Request(url, headers=headers)
+ request = Request(url, headers=headers)
webpage = self._download_webpage(request, video_id)
smil_url = self._search_regex(
r'<link[^>]+href=(["\'])(?P<url>.+?)\1[^>]+type=["\']application/smil\+xml',
@@ -297,6 +302,17 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
formats, subtitles = self._extract_theplatform_smil(smil_url, video_id)
+ # With some sites, manifest URL must be forced to extract HLS formats
+ if not traverse_obj(formats, lambda _, v: v['format_id'].startswith('hls')):
+ m3u8_url = update_url(url, query='mbr=true&manifest=m3u', fragment=None)
+ urlh = self._request_webpage(
+ HEADRequest(m3u8_url), video_id, 'Checking for HLS formats', 'No HLS formats found', fatal=False)
+ if urlh and urlhandle_detect_ext(urlh) == 'm3u8':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ m3u8_url, video_id, m3u8_id='hls', fatal=False)
+ formats.extend(m3u8_fmts)
+ self._merge_subtitles(m3u8_subs, target=subtitles)
+
ret = self._extract_theplatform_metadata(path, video_id)
combined_subtitles = self._merge_subtitles(ret.get('subtitles', {}), subtitles)
ret.update({