aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/extractor/viewlift.py
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/extractor/viewlift.py')
-rw-r--r--hypervideo_dl/extractor/viewlift.py189
1 files changed, 143 insertions, 46 deletions
diff --git a/hypervideo_dl/extractor/viewlift.py b/hypervideo_dl/extractor/viewlift.py
index c3b2e86..4627f66 100644
--- a/hypervideo_dl/extractor/viewlift.py
+++ b/hypervideo_dl/extractor/viewlift.py
@@ -9,6 +9,7 @@ from ..utils import (
ExtractorError,
int_or_none,
parse_age_limit,
+ traverse_obj,
)
@@ -32,26 +33,33 @@ class ViewLiftBaseIE(InfoExtractor):
}
_TOKENS = {}
- def _call_api(self, site, path, video_id, query):
- token = self._TOKENS.get(site)
- if not token:
- token_query = {'site': site}
- email, password = self._get_login_info(netrc_machine=site)
- if email:
- resp = self._download_json(
- self._API_BASE + 'identity/signin', video_id,
- 'Logging in', query=token_query, data=json.dumps({
- 'email': email,
- 'password': password,
- }).encode())
- else:
- resp = self._download_json(
- self._API_BASE + 'identity/anonymous-token', video_id,
- 'Downloading authorization token', query=token_query)
- self._TOKENS[site] = token = resp['authorizationToken']
- return self._download_json(
- self._API_BASE + path, video_id,
- headers={'Authorization': token}, query=query)
+ def _fetch_token(self, site, url):
+ if self._TOKENS.get(site):
+ return
+
+ cookies = self._get_cookies(url)
+ if cookies and cookies.get('token'):
+ self._TOKENS[site] = self._search_regex(r'22authorizationToken\%22:\%22([^\%]+)\%22', cookies['token'].value, 'token')
+ if not self._TOKENS.get(site):
+ self.raise_login_required('Cookies (not necessarily logged in) are needed to download from this website', method='cookies')
+
+ def _call_api(self, site, path, video_id, url, query):
+ self._fetch_token(site, url)
+ try:
+ return self._download_json(
+ self._API_BASE + path, video_id, headers={'Authorization': self._TOKENS.get(site)}, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ webpage = e.cause.read().decode()
+ try:
+ error_message = traverse_obj(json.loads(webpage), 'errorMessage', 'message')
+ except json.JSONDecodeError:
+ raise ExtractorError(f'{site} said: {webpage}', cause=e.cause)
+ if error_message:
+ if 'has not purchased' in error_message:
+ self.raise_login_required(method='cookies')
+ raise ExtractorError(error_message, expected=True)
+ raise
class ViewLiftEmbedIE(ViewLiftBaseIE):
@@ -96,27 +104,24 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
site = domain.split('.')[-2]
if site in self._SITE_MAP:
site = self._SITE_MAP[site]
- try:
- content_data = self._call_api(
- site, 'entitlement/video/status', film_id, {
- 'id': film_id
- })['video']
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
- if error_message == 'User does not have a valid subscription or has not purchased this content.':
- self.raise_login_required()
- raise ExtractorError(error_message, expected=True)
- raise
+
+ content_data = self._call_api(
+ site, 'entitlement/video/status', film_id, url, {
+ 'id': film_id
+ })['video']
gist = content_data['gist']
title = gist['title']
video_assets = content_data['streamingInfo']['videoAssets']
- formats = []
- mpeg_video_assets = video_assets.get('mpeg') or []
- for video_asset in mpeg_video_assets:
+ hls_url = video_assets.get('hls')
+ formats, subtitles = [], {}
+ if hls_url:
+ formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ for video_asset in video_assets.get('mpeg') or []:
video_asset_url = video_asset.get('url')
- if not video_asset:
+ if not video_asset_url:
continue
bitrate = int_or_none(video_asset.get('bitrate'))
height = int_or_none(self._search_regex(
@@ -130,13 +135,17 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'vcodec': video_asset.get('codec'),
})
- hls_url = video_assets.get('hls')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
+ subs = {}
+ for sub in traverse_obj(content_data, ('contentDetails', 'closedCaptions')) or []:
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subs.setdefault(sub.get('language', 'English'), []).append({
+ 'url': sub_url,
+ })
- info = {
+ self._sort_formats(formats)
+ return {
'id': film_id,
'title': title,
'description': gist.get('description'),
@@ -145,14 +154,15 @@ class ViewLiftEmbedIE(ViewLiftBaseIE):
'age_limit': parse_age_limit(content_data.get('parentalRating')),
'timestamp': int_or_none(gist.get('publishDate'), 1000),
'formats': formats,
+ 'subtitles': self._merge_subtitles(subs, subtitles),
+ 'categories': traverse_obj(content_data, ('categories', ..., 'title')),
+ 'tags': traverse_obj(content_data, ('tags', ..., 'title')),
}
- for k in ('categories', 'tags'):
- info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
- return info
class ViewLiftIE(ViewLiftBaseIE):
IE_NAME = 'viewlift'
+ _API_BASE = 'https://prod-api-cached-2.viewlift.com/'
_VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
_TESTS = [{
'url': 'http://www.snagfilms.com/films/title/lost_for_life',
@@ -222,24 +232,111 @@ class ViewLiftIE(ViewLiftBaseIE):
}, {
'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
'only_matching': True,
+ }, { # Free film with langauge code
+ 'url': 'https://www.hoichoi.tv/bn/films/title/shuyopoka',
+ 'info_dict': {
+ 'id': '7a7a9d33-1f4c-4771-9173-ee4fb6dbf196',
+ 'ext': 'mp4',
+ 'title': 'Shuyopoka',
+ 'description': 'md5:e28f2fb8680096a69c944d37c1fa5ffc',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211006',
+ 'series': None
+ },
+ 'params': {'skip_download': True},
+ }, { # Free film
+ 'url': 'https://www.hoichoi.tv/films/title/dadu-no1',
+ 'info_dict': {
+ 'id': '0000015b-b009-d126-a1db-b81ff3780000',
+ 'ext': 'mp4',
+ 'title': 'Dadu No.1',
+ 'description': 'md5:605cba408e51a79dafcb824bdeded51e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20210827',
+ 'series': None
+ },
+ 'params': {'skip_download': True},
+ }, { # Free episode
+ 'url': 'https://www.hoichoi.tv/webseries/case-jaundice-s01-e01',
+ 'info_dict': {
+ 'id': 'f779e07c-30c8-459c-8612-5a834ab5e5ba',
+ 'ext': 'mp4',
+ 'title': 'Humans Vs. Corona',
+ 'description': 'md5:ca30a682b4528d02a3eb6d0427dd0f87',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20210830',
+ 'series': 'Case Jaundice'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free video
+ 'url': 'https://www.hoichoi.tv/videos/1549072415320-six-episode-02-hindi',
+ 'info_dict': {
+ 'id': 'b41fa1ce-aca6-47b6-b208-283ff0a2de30',
+ 'ext': 'mp4',
+ 'title': 'Woman in red - Hindi',
+ 'description': 'md5:9d21edc1827d32f8633eb67c2054fc31',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211006',
+ 'series': 'Six (Hindi)'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free episode
+ 'url': 'https://www.hoichoi.tv/shows/watch-asian-paints-moner-thikana-online-season-1-episode-1',
+ 'info_dict': {
+ 'id': '1f45d185-8500-455c-b88d-13252307c3eb',
+ 'ext': 'mp4',
+ 'title': 'Jisshu Sengupta',
+ 'description': 'md5:ef6ffae01a3d83438597367400f824ed',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20211004',
+ 'series': 'Asian Paints Moner Thikana'
+ },
+ 'params': {'skip_download': True},
+ }, { # Free series
+ 'url': 'https://www.hoichoi.tv/shows/watch-moner-thikana-bengali-web-series-online',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'watch-moner-thikana-bengali-web-series-online',
+ },
+ }, { # Premium series
+ 'url': 'https://www.hoichoi.tv/shows/watch-byomkesh-bengali-web-series-online',
+ 'playlist_mincount': 14,
+ 'info_dict': {
+ 'id': 'watch-byomkesh-bengali-web-series-online',
+ },
+ }, { # Premium movie
+ 'url': 'https://www.hoichoi.tv/movies/detective-2020',
+ 'only_matching': True
}]
@classmethod
def suitable(cls, url):
return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
+ def _show_entries(self, domain, seasons):
+ for season in seasons:
+ for episode in season.get('episodes') or []:
+ path = traverse_obj(episode, ('gist', 'permalink'))
+ if path:
+ yield self.url_result(f'https://www.{domain}{path}', ie=self.ie_key())
+
def _real_extract(self, url):
domain, path, display_id = self._match_valid_url(url).groups()
site = domain.split('.')[-2]
if site in self._SITE_MAP:
site = self._SITE_MAP[site]
modules = self._call_api(
- site, 'content/pages', display_id, {
+ site, 'content/pages', display_id, url, {
'includeContent': 'true',
'moduleOffset': 1,
'path': path,
'site': site,
})['modules']
+
+ seasons = next((m['contentData'][0]['seasons'] for m in modules if m.get('moduleType') == 'ShowDetailModule'), None)
+ if seasons:
+ return self.playlist_result(self._show_entries(domain, seasons), display_id)
+
film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
return {
'_type': 'url_transparent',