aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/mtv.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/mtv.py')
-rw-r--r--hypervideo_dl/extractor/mtv.py188
1 files changed, 181 insertions, 7 deletions
diff --git a/hypervideo_dl/extractor/mtv.py b/hypervideo_dl/extractor/mtv.py
index 5a5205c..e060884 100644
--- a/hypervideo_dl/extractor/mtv.py
+++ b/hypervideo_dl/extractor/mtv.py
@@ -14,6 +14,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
HEADRequest,
+ int_or_none,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
@@ -43,7 +44,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
# Remove the templates, like &device={device}
return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
- def _get_feed_url(self, uri):
+ def _get_feed_url(self, uri, url=None):
return self._FEED_URL
def _get_thumbnail_url(self, uri, itemdoc):
@@ -176,6 +177,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
raise ExtractorError('Could not find video title')
title = title.strip()
+ series = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:franchise')
+ season = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:seasonN')
+ episode = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:episodeN')
+ series = series.text if series is not None else None
+ season = season.text if season is not None else None
+ episode = episode.text if episode is not None else None
+ if season and episode:
+ # episode number includes season, so remove it
+ episode = re.sub(r'^%s' % season, '', episode)
+
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
@@ -201,6 +218,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
'description': description,
'duration': float_or_none(content_el.attrib.get('duration')),
'timestamp': timestamp,
+ 'series': series,
+ 'season_number': int_or_none(season),
+ 'episode_number': int_or_none(episode),
}
def _get_feed_query(self, uri):
@@ -209,9 +229,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
data['lang'] = self._LANG
return data
- def _get_videos_info(self, uri, use_hls=True):
+ def _get_videos_info(self, uri, use_hls=True, url=None):
video_id = self._id_from_uri(uri)
- feed_url = self._get_feed_url(uri)
+ feed_url = self._get_feed_url(uri, url)
info_url = update_url_query(feed_url, self._get_feed_query(uri))
return self._get_videos_info_from_url(info_url, video_id, use_hls)
@@ -229,6 +249,7 @@ class MTVServicesInfoExtractor(InfoExtractor):
if info:
entries.append(info)
+ # TODO: should be multi-video
return self.playlist_result(
entries, playlist_title=title, playlist_description=description)
@@ -292,13 +313,17 @@ class MTVServicesInfoExtractor(InfoExtractor):
video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
mgid = video_player['props']['media']['video']['config']['uri']
+ if not mgid:
+ mgid = self._search_regex(
+ r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
+
return mgid
def _real_extract(self, url):
title = url_basename(url)
webpage = self._download_webpage(url, title)
mgid = self._extract_mgid(webpage)
- videos_info = self._get_videos_info(mgid)
+ videos_info = self._get_videos_info(mgid, url=url)
return videos_info
@@ -327,14 +352,14 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
if mobj:
return mobj.group('url')
- def _get_feed_url(self, uri):
+ def _get_feed_url(self, uri, url=None):
video_id = self._id_from_uri(uri)
config = self._download_json(
'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
return self._remove_template_parameter(config['feedWithQueryParams'])
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
mgid = mobj.group('mgid')
return self._get_videos_info(mgid)
@@ -416,7 +441,7 @@ class MTVVideoIE(MTVServicesInfoExtractor):
return 'http://mtv.mtvnimages.com/uri/' + uri
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
+ mobj = self._match_valid_url(url)
video_id = mobj.group('videoid')
uri = mobj.groupdict().get('mgid')
if uri is None:
@@ -486,3 +511,152 @@ class MTVDEIE(MTVServicesInfoExtractor):
'arcEp': 'mtv.de',
'mgid': uri,
}
+
+
+class MTVItaliaIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv.it'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
+ 'info_dict': {
+ 'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
+ 'ext': 'mp4',
+ 'title': 'Cavoli amario (episodio completo)',
+ 'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
+ 'series': 'Mario - Una Serie Di Maccio Capatonda',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.it',
+ 'mgid': uri,
+ }
+
+
+class MTVItaliaProgrammaIE(MTVItaliaIE):
+ IE_NAME = 'mtv.it:programma'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ # program page: general
+ 'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
+ 'info_dict': {
+ 'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda',
+ 'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
+ },
+ 'playlist_count': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # program page: specific season
+ 'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
+ 'info_dict': {
+ 'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
+ },
+ 'playlist_count': 34,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # playlist page + redirect
+ 'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
+ 'info_dict': {
+ 'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
+ 'title': 'Sexy Videos',
+ },
+ 'playlist_mincount': 145,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
+
+ def _get_entries(self, title, url):
+ while True:
+ pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
+ entries = self._download_json(url, title, 'page %s' % pg)
+ url = try_get(
+ entries, lambda x: x['result']['nextPageURL'], compat_str)
+ entries = try_get(
+ entries, (
+ lambda x: x['result']['data']['items'],
+ lambda x: x['result']['data']['seasons']),
+ list)
+ for entry in entries or []:
+ if entry.get('canonicalURL'):
+ yield self.url_result(entry['canonicalURL'])
+ if not url:
+ break
+
+ def _real_extract(self, url):
+ query = {'url': url}
+ info_url = update_url_query(self._FEED_URL, query)
+ video_id = self._match_id(url)
+ info = self._download_json(info_url, video_id).get('manifest')
+
+ redirect = try_get(
+ info, lambda x: x['newLocation']['url'], compat_str)
+ if redirect:
+ return self.url_result(redirect)
+
+ title = info.get('title')
+ video_id = try_get(
+ info, lambda x: x['reporting']['itemId'], compat_str)
+ parent_id = try_get(
+ info, lambda x: x['reporting']['parentId'], compat_str)
+
+ playlist_url = current_url = None
+ for z in (info.get('zones') or {}).values():
+ if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
+ info_url = z.get('feed')
+ if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
+ playlist_url = playlist_url or z.get('feed')
+ if z.get('moduleName') in ('INTL_M300',):
+ current_url = current_url or z.get('feed')
+
+ if not info_url:
+ raise ExtractorError('No info found')
+
+ if video_id == parent_id:
+ video_id = self._search_regex(
+ r'([^\/]+)/[^\/]+$', info_url, 'video_id')
+
+ info = self._download_json(info_url, video_id, 'Show infos')
+ info = try_get(info, lambda x: x['result']['data'], dict)
+ title = title or try_get(
+ info, (
+ lambda x: x['title'],
+ lambda x: x['headline']),
+ compat_str)
+ description = try_get(info, lambda x: x['content'], compat_str)
+
+ if current_url:
+ season = try_get(
+ self._download_json(playlist_url, video_id, 'Seasons info'),
+ lambda x: x['result']['data'], dict)
+ current = try_get(
+ season, lambda x: x['currentSeason'], compat_str)
+ seasons = try_get(
+ season, lambda x: x['seasons'], list) or []
+
+ if current in [s.get('eTitle') for s in seasons]:
+ playlist_url = current_url
+
+ title = re.sub(
+ r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
+ '', title, flags=re.IGNORECASE).strip()
+
+ return self.playlist_result(
+ self._get_entries(title, playlist_url),
+ video_id, title, description)