aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yt_dlp/extractor/extractors.py2
-rw-r--r--yt_dlp/extractor/mtv.py169
2 files changed, 171 insertions, 0 deletions
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index 580838d0f..3dc2c10f8 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -732,6 +732,8 @@ from .mtv import (
MTVServicesEmbeddedIE,
MTVDEIE,
MTVJapanIE,
+ MTVItaliaIE,
+ MTVItaliaProgrammaIE,
)
from .muenchentv import MuenchenTVIE
from .mwave import MwaveIE, MwaveMeetGreetIE
diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py
index f5e30d22d..f96226e56 100644
--- a/yt_dlp/extractor/mtv.py
+++ b/yt_dlp/extractor/mtv.py
@@ -14,6 +14,7 @@ from ..utils import (
fix_xml_ampersands,
float_or_none,
HEADRequest,
+ int_or_none,
RegexNotFoundError,
sanitized_Request,
strip_or_none,
@@ -176,6 +177,22 @@ class MTVServicesInfoExtractor(InfoExtractor):
raise ExtractorError('Could not find video title')
title = title.strip()
+ series = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:franchise')
+ season = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:seasonN')
+ episode = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:episodeN')
+ series = series.text if series is not None else None
+ season = season.text if season is not None else None
+ episode = episode.text if episode is not None else None
+ if season and episode:
+ # episode number includes season, so remove it
+ episode = re.sub(r'^%s' % season, '', episode)
+
# This a short id that's used in the webpage urls
mtvn_id = None
mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
@@ -201,6 +218,9 @@ class MTVServicesInfoExtractor(InfoExtractor):
'description': description,
'duration': float_or_none(content_el.attrib.get('duration')),
'timestamp': timestamp,
+ 'series': series,
+ 'season_number': int_or_none(season),
+ 'episode_number': int_or_none(episode),
}
def _get_feed_query(self, uri):
@@ -483,3 +503,152 @@ class MTVDEIE(MTVServicesInfoExtractor):
'arcEp': 'mtv.de',
'mgid': uri,
}
+
+
+class MTVItaliaIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv.it'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:episodi|video|musica)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.it/episodi/24bqab/mario-una-serie-di-maccio-capatonda-cavoli-amario-episodio-completo-S1-E1',
+ 'info_dict': {
+ 'id': '0f0fc78e-45fc-4cce-8f24-971c25477530',
+ 'ext': 'mp4',
+ 'title': 'Cavoli amario (episodio completo)',
+ 'description': 'md5:4962bccea8fed5b7c03b295ae1340660',
+ 'series': 'Mario - Una Serie Di Maccio Capatonda',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.it',
+ 'mgid': uri,
+ }
+
+
+class MTVItaliaProgrammaIE(MTVItaliaIE):
+ IE_NAME = 'mtv.it:programma'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.it/(?:programmi|playlist)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ # program page: general
+ 'url': 'http://www.mtv.it/programmi/s2rppv/mario-una-serie-di-maccio-capatonda',
+ 'info_dict': {
+ 'id': 'a6f155bc-8220-4640-aa43-9b95f64ffa3d',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda',
+ 'description': 'md5:72fbffe1f77ccf4e90757dd4e3216153',
+ },
+ 'playlist_count': 2,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # program page: specific season
+ 'url': 'http://www.mtv.it/programmi/d9ncjf/mario-una-serie-di-maccio-capatonda-S2',
+ 'info_dict': {
+ 'id': '4deeb5d8-f272-490c-bde2-ff8d261c6dd1',
+ 'title': 'Mario - Una Serie Di Maccio Capatonda - Stagione 2',
+ },
+ 'playlist_count': 34,
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # playlist page + redirect
+ 'url': 'http://www.mtv.it/playlist/sexy-videos/ilctal',
+ 'info_dict': {
+ 'id': 'dee8f9ee-756d-493b-bf37-16d1d2783359',
+ 'title': 'Sexy Videos',
+ },
+ 'playlist_mincount': 145,
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _GEO_COUNTRIES = ['IT']
+ _FEED_URL = 'http://www.mtv.it/feeds/triforce/manifest/v8'
+
+ def _get_entries(self, title, url):
+ while True:
+ pg = self._search_regex(r'/(\d+)$', url, 'entries', '1')
+ entries = self._download_json(url, title, 'page %s' % pg)
+ url = try_get(
+ entries, lambda x: x['result']['nextPageURL'], compat_str)
+ entries = try_get(
+ entries, (
+ lambda x: x['result']['data']['items'],
+ lambda x: x['result']['data']['seasons']),
+ list)
+ for entry in entries or []:
+ if entry.get('canonicalURL'):
+ yield self.url_result(entry['canonicalURL'])
+ if not url:
+ break
+
+ def _real_extract(self, url):
+ query = {'url': url}
+ info_url = update_url_query(self._FEED_URL, query)
+ video_id = self._match_id(url)
+ info = self._download_json(info_url, video_id).get('manifest')
+
+ redirect = try_get(
+ info, lambda x: x['newLocation']['url'], compat_str)
+ if redirect:
+ return self.url_result(redirect)
+
+ title = info.get('title')
+ video_id = try_get(
+ info, lambda x: x['reporting']['itemId'], compat_str)
+ parent_id = try_get(
+ info, lambda x: x['reporting']['parentId'], compat_str)
+
+ playlist_url = current_url = None
+ for z in (info.get('zones') or {}).values():
+ if z.get('moduleName') in ('INTL_M304', 'INTL_M209'):
+ info_url = z.get('feed')
+ if z.get('moduleName') in ('INTL_M308', 'INTL_M317'):
+ playlist_url = playlist_url or z.get('feed')
+ if z.get('moduleName') in ('INTL_M300',):
+ current_url = current_url or z.get('feed')
+
+ if not info_url:
+ raise ExtractorError('No info found')
+
+ if video_id == parent_id:
+ video_id = self._search_regex(
+ r'([^\/]+)/[^\/]+$', info_url, 'video_id')
+
+ info = self._download_json(info_url, video_id, 'Show infos')
+ info = try_get(info, lambda x: x['result']['data'], dict)
+ title = title or try_get(
+ info, (
+ lambda x: x['title'],
+ lambda x: x['headline']),
+ compat_str)
+ description = try_get(info, lambda x: x['content'], compat_str)
+
+ if current_url:
+ season = try_get(
+ self._download_json(playlist_url, video_id, 'Seasons info'),
+ lambda x: x['result']['data'], dict)
+ current = try_get(
+ season, lambda x: x['currentSeason'], compat_str)
+ seasons = try_get(
+ season, lambda x: x['seasons'], list) or []
+
+ if current in [s.get('eTitle') for s in seasons]:
+ playlist_url = current_url
+
+ title = re.sub(
+ r'[-|]\s*(?:mtv\s*italia|programma|playlist)',
+ '', title, flags=re.IGNORECASE).strip()
+
+ return self.playlist_result(
+ self._get_entries(title, playlist_url),
+ video_id, title, description)