diff options
Diffstat (limited to 'youtube_dlc/extractor/mtv.py')
-rw-r--r-- | youtube_dlc/extractor/mtv.py | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py index fedd5f46b..e545a9ef3 100644 --- a/youtube_dlc/extractor/mtv.py +++ b/youtube_dlc/extractor/mtv.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import ( compat_str, compat_xpath, + compat_urlparse, ) from ..utils import ( ExtractorError, @@ -22,6 +23,7 @@ from ..utils import ( unescapeHTML, update_url_query, url_basename, + get_domain, xpath_text, ) @@ -253,7 +255,42 @@ class MTVServicesInfoExtractor(InfoExtractor): return try_get(feed, lambda x: x['result']['data']['id'], compat_str) - def _extract_mgid(self, webpage): + def _extract_new_triforce_mgid(self, webpage, url='', video_id=None): + # print(compat_urlparse.urlparse(url).netloc) + if url == '': + return + domain = get_domain(url) + if domain is None: + raise ExtractorError( + '[%s] could not get domain' % self.IE_NAME, + expected=True) + url = url.replace("https://", "http://") + enc_url = compat_urlparse.quote(url, safe='') + _TRIFORCE_V8_TEMPLATE = 'https://%s/feeds/triforce/manifest/v8?url=%s' + triforce_manifest_url = _TRIFORCE_V8_TEMPLATE % (domain, enc_url) + + manifest = self._download_json(triforce_manifest_url, video_id, fatal=False) + if manifest: + if manifest.get('manifest').get('type') == 'redirect': + self.to_screen('Found a redirect. Downloading manifest from new location') + new_loc = manifest.get('manifest').get('newLocation') + new_loc = new_loc.replace("https://", "http://") + enc_new_loc = compat_urlparse.quote(new_loc, safe='') + triforce_manifest_new_loc = _TRIFORCE_V8_TEMPLATE % (domain, enc_new_loc) + manifest = self._download_json(triforce_manifest_new_loc, video_id, fatal=False) + + item_id = try_get(manifest, lambda x: x['manifest']['reporting']['itemId'], compat_str) + if not item_id: + self.to_screen('Found no id!') + return + + # 'episode' can be anything. 'content' is used often as well + _MGID_TEMPLATE = 'mgid:arc:episode:%s:%s' + mgid = _MGID_TEMPLATE % (domain, item_id) + + return mgid + + def _extract_mgid(self, webpage, url, data_zone=None): try: # the url can be http://media.mtvnservices.com/fb/{mgid}.swf # or http://media.mtvnservices.com/{mgid} @@ -276,14 +313,17 @@ class MTVServicesInfoExtractor(InfoExtractor): r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None) if not mgid: - mgid = self._extract_triforce_mgid(webpage) + mgid = self._extract_new_triforce_mgid(webpage, url) + + if not mgid: + mgid = self._extract_triforce_mgid(webpage, data_zone) return mgid def _real_extract(self, url): title = url_basename(url) webpage = self._download_webpage(url, title) - mgid = self._extract_mgid(webpage) + mgid = self._extract_mgid(webpage, url) videos_info = self._get_videos_info(mgid) return videos_info |