diff options
Diffstat (limited to 'hypervideo_dl/extractor/hotstar.py')
-rw-r--r-- | hypervideo_dl/extractor/hotstar.py | 138 |
1 files changed, 118 insertions, 20 deletions
diff --git a/hypervideo_dl/extractor/hotstar.py b/hypervideo_dl/extractor/hotstar.py index 61eec7b..02183ad 100644 --- a/hypervideo_dl/extractor/hotstar.py +++ b/hypervideo_dl/extractor/hotstar.py @@ -6,7 +6,8 @@ import time import uuid from .common import InfoExtractor -from ..compat import compat_HTTPError, compat_str +from ..compat import compat_str +from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, determine_ext, @@ -83,7 +84,7 @@ class HotStarIE(HotStarBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?hotstar\.com(?:/in)?/(?!in/) (?: - (?P<type>movies|sports|episode|(?P<tv>tv))/ + (?P<type>movies|sports|clips|episode|(?P<tv>tv|shows))/ (?(tv)(?:[^/?#]+/){2}|[^?#]*) )? [^/?#]+/ @@ -123,6 +124,70 @@ class HotStarIE(HotStarBaseIE): 'episode_number': 8, } }, { + 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/anupama-anuj-share-a-moment/1000282843', + 'info_dict': { + 'id': '1000282843', + 'ext': 'mp4', + 'title': 'Anupama, Anuj Share a Moment', + 'season': 'Chapter 1', + 'description': 'md5:8d74ed2248423b8b06d5c8add4d7a0c0', + 'timestamp': 1678149000, + 'channel': 'StarPlus', + 'series': 'Anupama', + 'season_number': 1, + 'season_id': 7399, + 'upload_date': '20230307', + 'episode': 'Anupama, Anuj Share a Moment', + 'episode_number': 853, + 'duration': 1272, + 'channel_id': 3, + }, + 'skip': 'HTTP Error 504: Gateway Time-out', # XXX: Investigate 504 errors on some episodes + }, { + 'url': 'https://www.hotstar.com/in/shows/kana-kaanum-kaalangal/1260097087/back-to-school/1260097320', + 'info_dict': { + 'id': '1260097320', + 'ext': 'mp4', + 'title': 'Back To School', + 'season': 'Chapter 1', + 'description': 'md5:b0d6a4c8a650681491e7405496fc7e13', + 'timestamp': 1650564000, + 'channel': 'Hotstar Specials', + 'series': 'Kana Kaanum Kaalangal', + 'season_number': 1, + 'season_id': 9441, + 'upload_date': '20220421', + 'episode': 'Back To School', + 'episode_number': 1, + 'duration': 1810, + 'channel_id': 54, + }, + }, { + 'url': 'https://www.hotstar.com/in/clips/e3-sairat-kahani-pyaar-ki/1000262286', + 'info_dict': { + 'id': '1000262286', + 'ext': 'mp4', + 'title': 'E3 - SaiRat, Kahani Pyaar Ki', + 'description': 'md5:e3b4b3203bc0c5396fe7d0e4948a6385', + 'episode': 'E3 - SaiRat, Kahani Pyaar Ki', + 'upload_date': '20210606', + 'timestamp': 1622943900, + 'duration': 5395, + }, + }, { + 'url': 'https://www.hotstar.com/in/movies/premam/1000091195', + 'info_dict': { + 'id': '1000091195', + 'ext': 'mp4', + 'title': 'Premam', + 'release_year': 2015, + 'description': 'md5:d833c654e4187b5e34757eafb5b72d7f', + 'timestamp': 1462149000, + 'upload_date': '20160502', + 'episode': 'Premam', + 'duration': 8994, + }, + }, { 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157', 'only_matching': True, }, { @@ -139,6 +204,8 @@ class HotStarIE(HotStarBaseIE): 'sports': 'match', 'episode': 'episode', 'tv': 'episode', + 'shows': 'episode', + 'clips': 'content', None: 'content', } @@ -148,6 +215,12 @@ class HotStarIE(HotStarBaseIE): 'dr': 'dynamic_range', } + _TAG_FIELDS = { + 'language': 'language', + 'acodec': 'audio_codec', + 'vcodec': 'video_codec', + } + @classmethod def _video_url(cls, video_id, video_type=None, *, slug='ignore_me', root=None): assert None in (video_type, root) @@ -160,8 +233,10 @@ class HotStarIE(HotStarBaseIE): video_type = self._TYPE.get(video_type, video_type) cookies = self._get_cookies(url) # Cookies before any request - video_data = self._call_api_v1(f'{video_type}/detail', video_id, - query={'tas': 10000, 'contentId': video_id})['body']['results']['item'] + video_data = traverse_obj( + self._call_api_v1( + f'{video_type}/detail', video_id, fatal=False, query={'tas': 10000, 'contentId': video_id}), + ('body', 'results', 'item', {dict})) or {} if not self.get_param('allow_unplayable_formats') and video_data.get('drmProtected'): self.report_drm(video_id) @@ -182,24 +257,22 @@ class HotStarIE(HotStarBaseIE): for key, prefix in self._IGNORE_MAP.items() for ignore in self._configuration_arg(key)): continue + tag_dict = dict((t.split(':', 1) + [None])[:2] for t in tags.split(';')) format_url = url_or_none(playback_set.get('playbackUrl')) if not format_url: continue format_url = re.sub(r'(?<=//staragvod)(\d)', r'web\1', format_url) - dr = re.search(r'dynamic_range:(?P<dr>[a-z]+)', playback_set.get('tagsCombination')).group('dr') ext = determine_ext(format_url) current_formats, current_subs = [], {} try: if 'package:hls' in tags or ext == 'm3u8': current_formats, current_subs = self._extract_m3u8_formats_and_subtitles( - format_url, video_id, 'mp4', - entry_protocol='m3u8_native', - m3u8_id=f'{dr}-hls', headers=headers) + format_url, video_id, ext='mp4', headers=headers) elif 'package:dash' in tags or ext == 'mpd': current_formats, current_subs = self._extract_mpd_formats_and_subtitles( - format_url, video_id, mpd_id=f'{dr}-dash', headers=headers) + format_url, video_id, headers=headers) elif ext == 'f4m': pass # XXX: produce broken files else: @@ -209,24 +282,36 @@ class HotStarIE(HotStarBaseIE): 'height': int_or_none(playback_set.get('height')), }] except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + if isinstance(e.cause, HTTPError) and e.cause.status == 403: geo_restricted = True continue - if tags and 'encryption:plain' not in tags: + if tag_dict.get('encryption') not in ('plain', None): for f in current_formats: f['has_drm'] = True - if tags and 'language' in tags: - lang = re.search(r'language:(?P<lang>[a-z]+)', tags).group('lang') - for f in current_formats: - if not f.get('langauge'): - f['language'] = lang + for f in current_formats: + for k, v in self._TAG_FIELDS.items(): + if not f.get(k): + f[k] = tag_dict.get(v) + if f.get('vcodec') != 'none' and not f.get('dynamic_range'): + f['dynamic_range'] = tag_dict.get('dynamic_range') + if f.get('acodec') != 'none' and not f.get('audio_channels'): + f['audio_channels'] = { + 'stereo': 2, + 'dolby51': 6, + }.get(tag_dict.get('audio_channel')) + f['format_note'] = join_nonempty( + tag_dict.get('ladder'), + tag_dict.get('audio_channel') if f.get('acodec') != 'none' else None, + f.get('format_note'), + delim=', ') formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: self.raise_geo_restricted(countries=['IN'], metadata_available=True) + self._remove_duplicate_formats(formats) for f in formats: f.setdefault('http_headers', {}).update(headers) @@ -235,7 +320,8 @@ class HotStarIE(HotStarBaseIE): 'title': video_data.get('title'), 'description': video_data.get('description'), 'duration': int_or_none(video_data.get('duration')), - 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')), + 'timestamp': int_or_none(traverse_obj(video_data, 'broadcastDate', 'startDate')), + 'release_year': int_or_none(video_data.get('year')), 'formats': formats, 'subtitles': subs, 'channel': video_data.get('channelName'), @@ -288,7 +374,7 @@ class HotStarPrefixIE(InfoExtractor): class HotStarPlaylistIE(HotStarBaseIE): IE_NAME = 'hotstar:playlist' - _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/tv(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)' + _VALID_URL = r'https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)(?:/[^/]+){2}/list/[^/]+/t-(?P<id>\w+)' _TESTS = [{ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26', 'info_dict': { @@ -296,6 +382,9 @@ class HotStarPlaylistIE(HotStarBaseIE): }, 'playlist_mincount': 20, }, { + 'url': 'https://www.hotstar.com/shows/savdhaan-india/s-26/list/popular-clips/t-3_2_26', + 'only_matching': True, + }, { 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480', 'only_matching': True, }, { @@ -311,7 +400,7 @@ class HotStarPlaylistIE(HotStarBaseIE): class HotStarSeasonIE(HotStarBaseIE): IE_NAME = 'hotstar:season' - _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)' + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/\w+)/seasons/[^/]+/ss-(?P<id>\w+)' _TESTS = [{ 'url': 'https://www.hotstar.com/tv/radhakrishn/1260000646/seasons/season-2/ss-8028', 'info_dict': { @@ -330,6 +419,9 @@ class HotStarSeasonIE(HotStarBaseIE): 'id': '8208', }, 'playlist_mincount': 19, + }, { + 'url': 'https://www.hotstar.com/in/shows/bigg-boss/14714/seasons/season-4/ss-8208/', + 'only_matching': True, }] def _real_extract(self, url): @@ -340,7 +432,7 @@ class HotStarSeasonIE(HotStarBaseIE): class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))/?(?:[#?]|$)' + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/(?:tv|shows)/[^/]+/(?P<id>\d+))/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { @@ -359,6 +451,12 @@ class HotStarSeriesIE(HotStarBaseIE): 'id': '435', }, 'playlist_mincount': 267, + }, { + 'url': 'https://www.hotstar.com/in/shows/anupama/1260022017/', + 'info_dict': { + 'id': '1260022017', + }, + 'playlist_mincount': 940, }] def _real_extract(self, url): |