diff options
author | Jesús <heckyel@hyperbola.info> | 2022-04-10 07:57:14 +0800 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2022-04-10 07:57:14 +0800 |
commit | c8046abd97bae36fa50320c32843cf6141752f93 (patch) | |
tree | 1dd684a1936a08e7d5229dfb99f2d6d7fb604559 /yt_dlp/extractor | |
parent | 40748dd6d0bb053a1c90bcfe874d3cee4b306744 (diff) | |
parent | ca5300c7edadad46ede0249ad9fa8feaa4ccddd4 (diff) | |
download | hypervideo-pre-c8046abd97bae36fa50320c32843cf6141752f93.tar.lz hypervideo-pre-c8046abd97bae36fa50320c32843cf6141752f93.tar.xz hypervideo-pre-c8046abd97bae36fa50320c32843cf6141752f93.zip |
updated from upstream | 10/04/2022 at 07:57
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r-- | yt_dlp/extractor/afreecatv.py | 58 | ||||
-rw-r--r-- | yt_dlp/extractor/bilibili.py | 36 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 6 | ||||
-rw-r--r-- | yt_dlp/extractor/extractors.py | 12 | ||||
-rw-r--r-- | yt_dlp/extractor/jable.py | 107 | ||||
-rw-r--r-- | yt_dlp/extractor/moviepilot.py | 115 | ||||
-rw-r--r-- | yt_dlp/extractor/nrk.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/piapro.py | 14 | ||||
-rw-r--r-- | yt_dlp/extractor/rai.py | 1 | ||||
-rw-r--r-- | yt_dlp/extractor/tiktok.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/tver.py | 111 | ||||
-rw-r--r-- | yt_dlp/extractor/twitcasting.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 187 | ||||
-rw-r--r-- | yt_dlp/extractor/zattoo.py | 283 | ||||
-rw-r--r-- | yt_dlp/extractor/zee5.py | 4 |
15 files changed, 745 insertions, 209 deletions
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py index 77f0e3c10..28946e9dd 100644 --- a/yt_dlp/extractor/afreecatv.py +++ b/yt_dlp/extractor/afreecatv.py @@ -1,14 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals +import functools import re from .common import InfoExtractor from ..compat import compat_xpath from ..utils import ( + ExtractorError, + OnDemandPagedList, date_from_str, determine_ext, - ExtractorError, int_or_none, qualities, traverse_obj, @@ -482,3 +484,57 @@ class AfreecaTVLiveIE(AfreecaTVIE): 'formats': formats, 'is_live': True, } + + +class AfreecaTVUserIE(InfoExtractor): + IE_NAME = 'afreecatv:user' + _VALID_URL = r'https?://bj\.afreeca(?:tv)?\.com/(?P<id>[^/]+)/vods/?(?P<slug_type>[^/]+)?' + _TESTS = [{ + 'url': 'https://bj.afreecatv.com/ryuryu24/vods/review', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - review', + }, + 'playlist_count': 218, + }, { + 'url': 'https://bj.afreecatv.com/parang1995/vods/highlight', + 'info_dict': { + '_type': 'playlist', + 'id': 'parang1995', + 'title': 'parang1995 - highlight', + }, + 'playlist_count': 997, + }, { + 'url': 'https://bj.afreecatv.com/ryuryu24/vods', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - all', + }, + 'playlist_count': 221, + }, { + 'url': 'https://bj.afreecatv.com/ryuryu24/vods/balloonclip', + 'info_dict': { + '_type': 'playlist', + 'id': 'ryuryu24', + 'title': 'ryuryu24 - balloonclip', + }, + 'playlist_count': 0, + }] + _PER_PAGE = 60 + + def _fetch_page(self, user_id, user_type, page): + page += 1 + info = self._download_json(f'https://bjapi.afreecatv.com/api/{user_id}/vods/{user_type}', user_id, + query={'page': page, 'per_page': self._PER_PAGE, 'orderby': 'reg_date'}, + note=f'Downloading {user_type} video page {page}') + for item in info['data']: + yield self.url_result( + f'https://vod.afreecatv.com/player/{item["title_no"]}/', AfreecaTVIE, item['title_no']) + + def _real_extract(self, url): + user_id, user_type = self._match_valid_url(url).group('id', 'slug_type') + user_type = user_type or 'all' + entries = OnDemandPagedList(functools.partial(self._fetch_page, user_id, user_type), self._PER_PAGE) + return self.playlist_result(entries, user_id, f'{user_id} - {user_type}') diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 3212f3328..a9574758c 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -51,7 +51,7 @@ class BiliBiliIE(InfoExtractor): _TESTS = [{ 'url': 'http://www.bilibili.com/video/av1074402/', - 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', + 'md5': '7ac275ec84a99a6552c5d229659a0fe1', 'info_dict': { 'id': '1074402_part1', 'ext': 'mp4', @@ -61,6 +61,11 @@ class BiliBiliIE(InfoExtractor): 'upload_date': '20140420', 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', 'timestamp': 1398012678, + 'tags': ['顶上去报复社会', '该来的总会来的', '金克拉是检验歌曲的唯一标准', '坷垃教主', '金坷垃', '邓紫棋', '治愈系坷垃'], + 'bv_id': 'BV11x411K7CN', + 'cid': '1554319', + 'thumbnail': 'http://i2.hdslb.com/bfs/archive/c79a8cf0347cd7a897c53a2f756e96aead128e8c.jpg', + 'duration': 308.36, }, }, { # Tested in BiliBiliBangumiIE @@ -91,6 +96,11 @@ class BiliBiliIE(InfoExtractor): 'timestamp': 1488382634, 'uploader_id': '65880958', 'uploader': '阿滴英文', + 'thumbnail': 'http://i2.hdslb.com/bfs/archive/49267ce20bc246be6304bf369a3ded0256854c23.jpg', + 'cid': '14694589', + 'duration': 554.117, + 'bv_id': 'BV13x41117TL', + 'tags': ['人文', '英语', '文化', '公开课', '阿滴英文'], }, 'params': { 'skip_download': True, @@ -107,6 +117,27 @@ class BiliBiliIE(InfoExtractor): 'title': '物语中的人物是如何吐槽自己的OP的' }, 'playlist_count': 17, + }, { + # Correct matching of single and double quotes in title + 'url': 'https://www.bilibili.com/video/BV1NY411E7Rx/', + 'info_dict': { + 'id': '255513412_part1', + 'ext': 'mp4', + 'title': 'Vid"eo" Te\'st', + 'cid': '570602418', + 'thumbnail': 'http://i2.hdslb.com/bfs/archive/0c0de5a90b6d5b991b8dcc6cde0afbf71d564791.jpg', + 'upload_date': '20220408', + 'timestamp': 1649436552, + 'description': 'Vid"eo" Te\'st', + 'uploader_id': '1630758804', + 'bv_id': 'BV1NY411E7Rx', + 'duration': 60.394, + 'uploader': 'bili_31244483705', + 'tags': ['VLOG'], + }, + 'params': { + 'skip_download': True, + }, }] _APP_KEY = 'iVGUTjsxvpLeuDCf' @@ -258,7 +289,8 @@ class BiliBiliIE(InfoExtractor): self._sort_formats(formats) title = self._html_search_regex(( - r'<h1[^>]+title=(["\'])(?P<content>[^"\']+)', + r'<h1[^>]+title=(["])(?P<content>[^"]+)', + r'<h1[^>]+title=([\'])(?P<content>[^\']+)', r'(?s)<h1[^>]*>(?P<content>.+?)</h1>', self._meta_regex('title') ), webpage, 'title', group='content', fatal=False) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index e2605c1f4..9914910d0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -23,6 +23,7 @@ from ..compat import ( compat_getpass, compat_http_client, compat_os_name, + compat_Pattern, compat_str, compat_urllib_error, compat_urllib_parse_unquote, @@ -41,7 +42,6 @@ from ..utils import ( base_url, bug_reports_message, clean_html, - compiled_regex_type, determine_ext, determine_protocol, dict_get, @@ -1203,7 +1203,9 @@ class InfoExtractor(object): In case of failure return a default value or raise a WARNING or a RegexNotFoundError, depending on fatal, specifying the field name. """ - if isinstance(pattern, (str, compat_str, compiled_regex_type)): + if string is None: + mobj = None + elif isinstance(pattern, (str, compat_Pattern)): mobj = re.search(pattern, string, flags) else: for p in pattern: diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 457f4c2aa..0cb686304 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -44,6 +44,7 @@ from .aenetworks import ( from .afreecatv import ( AfreecaTVIE, AfreecaTVLiveIE, + AfreecaTVUserIE, ) from .airmozilla import AirMozillaIE from .aljazeera import AlJazeeraIE @@ -700,6 +701,10 @@ from .ivi import ( from .ivideon import IvideonIE from .iwara import IwaraIE from .izlesene import IzleseneIE +from .jable import ( + JableIE, + JablePlaylistIE, +) from .jamendo import ( JamendoIE, JamendoAlbumIE, @@ -915,6 +920,7 @@ from .motherless import ( ) from .motorsport import MotorsportIE from .movieclips import MovieClipsIE +from .moviepilot import MoviepilotIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE @@ -2094,6 +2100,7 @@ from .youtube import ( YoutubeIE, YoutubeClipIE, YoutubeFavouritesIE, + YoutubeNotificationsIE, YoutubeHistoryIE, YoutubeTabIE, YoutubeLivestreamEmbedIE, @@ -2117,18 +2124,17 @@ from .zattoo import ( EWETVIE, GlattvisionTVIE, MNetTVIE, - MyVisionTVIE, NetPlusIE, OsnatelTVIE, QuantumTVIE, - QuicklineIE, - QuicklineLiveIE, SaltTVIE, SAKTVIE, VTXTVIE, WalyTVIE, ZattooIE, ZattooLiveIE, + ZattooMoviesIE, + ZattooRecordingsIE, ) from .zdf import ZDFIE, ZDFChannelIE from .zee5 import ( diff --git a/yt_dlp/extractor/jable.py b/yt_dlp/extractor/jable.py new file mode 100644 index 000000000..b294aee70 --- /dev/null +++ b/yt_dlp/extractor/jable.py @@ -0,0 +1,107 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + InAdvancePagedList, + int_or_none, + orderedSet, + unified_strdate, +) + + +class JableIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?jable.tv/videos/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://jable.tv/videos/pppd-812/', + 'md5': 'f1537283a9bc073c31ff86ca35d9b2a6', + 'info_dict': { + 'id': 'pppd-812', + 'ext': 'mp4', + 'title': 'PPPD-812 只要表現好巨乳女教師吉根柚莉愛就獎勵學生們在白虎穴內射出精液', + 'description': 'md5:5b6d4199a854f62c5e56e26ccad19967', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + 'like_count': int, + 'view_count': int, + }, + }, { + 'url': 'https://jable.tv/videos/apak-220/', + 'md5': '71f9239d69ced58ab74a816908847cc1', + 'info_dict': { + 'id': 'apak-220', + 'ext': 'mp4', + 'title': 'md5:5c3861b7cf80112a6e2b70bccf170824', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpg$', + 'age_limit': 18, + 'like_count': int, + 'view_count': int, + 'upload_date': '20220319', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + formats = self._extract_m3u8_formats( + self._search_regex(r'var\s+hlsUrl\s*=\s*\'([^\']+)', webpage, 'hls_url'), video_id, 'mp4', m3u8_id='hls') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage, default=''), + 'thumbnail': self._og_search_thumbnail(webpage, default=None), + 'formats': formats, + 'age_limit': 18, + 'upload_date': unified_strdate(self._search_regex( + r'class="inactive-color">\D+\s+(\d{4}-\d+-\d+)', webpage, 'upload_date', default=None)), + 'view_count': int_or_none(self._search_regex( + r'#icon-eye"></use></svg>\n*<span class="mr-3">([\d ]+)', + webpage, 'view_count', default='').replace(' ', '')), + 'like_count': int_or_none(self._search_regex( + r'#icon-heart"></use></svg><span class="count">(\d+)', webpage, 'link_count', default=None)), + } + + +class JablePlaylistIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?jable.tv/(?:categories|models|tags)/(?P<id>[\w-]+)' + _TESTS = [{ + 'url': 'https://jable.tv/models/kaede-karen/', + 'info_dict': { + 'id': 'kaede-karen', + 'title': '楓カレン', + }, + 'playlist_count': 34, + }, { + 'url': 'https://jable.tv/categories/roleplay/', + 'only_matching': True, + }, { + 'url': 'https://jable.tv/tags/girl/', + 'only_matching': True, + }] + + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + + def page_func(page_num): + return [ + self.url_result(player_url, JableIE) + for player_url in orderedSet(re.findall( + r'href="(https://jable.tv/videos/[\w-]+/?)"', + self._download_webpage(url, playlist_id, query={ + 'mode': 'async', + 'from': page_num + 1, + 'function': 'get_block', + 'block_id': 'list_videos_common_videos_list', + }, note=f'Downloading page {page_num + 1}')))] + + return self.playlist_result( + InAdvancePagedList(page_func, int_or_none(self._search_regex( + r'from:(\d+)">[^<]+\s*»', webpage, 'last page number', default=1)), 24), + playlist_id, self._search_regex( + r'<h2 class="h3-md mb-1">([^<]+)', webpage, 'playlist title', default=None)) diff --git a/yt_dlp/extractor/moviepilot.py b/yt_dlp/extractor/moviepilot.py new file mode 100644 index 000000000..4605d3481 --- /dev/null +++ b/yt_dlp/extractor/moviepilot.py @@ -0,0 +1,115 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .dailymotion import DailymotionIE +from .common import InfoExtractor +from ..utils import ( + parse_iso8601, + try_get, +) + +import re + + +class MoviepilotIE(InfoExtractor): + _IE_NAME = 'moviepilot' + _IE_DESC = 'Moviepilot trailer' + _VALID_URL = r'https?://(?:www\.)?moviepilot\.de/movies/(?P<id>[^/]+)' + + _TESTS = [{ + 'url': 'https://www.moviepilot.de/movies/interstellar-2/', + 'info_dict': { + 'id': 'x7xdut5', + 'display_id': 'interstellar-2', + 'ext': 'mp4', + 'title': 'Interstellar', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaXev1VvzitVZMFsR/x720', + 'timestamp': 1400491705, + 'description': 'md5:7dfc5c1758e7322a7346934f1f0c489c', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'uploader_id': 'x6nd9k', + 'upload_date': '20140519', + 'duration': 140, + 'age_limit': 0, + 'tags': ['Alle Trailer', 'Movie', 'Third Party'], + }, + }, { + 'url': 'https://www.moviepilot.de/movies/interstellar-2/trailer', + 'only_matching': True, + }, { + 'url': 'https://www.moviepilot.de/movies/interstellar-2/kinoprogramm/berlin', + 'only_matching': True, + }, { + 'url': 'https://www.moviepilot.de/movies/queen-slim/trailer', + 'info_dict': { + 'id': 'x7xj6o7', + 'display_id': 'queen-slim', + 'title': 'Queen & Slim', + 'ext': 'mp4', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SbUM71WtomSjVmI_q/x720', + 'timestamp': 1571838685, + 'description': 'md5:73058bcd030aa12d991e4280d65fbebe', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'uploader_id': 'x6nd9k', + 'upload_date': '20191023', + 'duration': 138, + 'age_limit': 0, + 'tags': ['Movie', 'Verleih', 'Neue Trailer'], + }, + }, { + 'url': 'https://www.moviepilot.de/movies/der-geiger-von-florenz/trailer', + 'info_dict': { + 'id': 'der-geiger-von-florenz', + 'title': 'Der Geiger von Florenz', + 'ext': 'mp4', + }, + 'skip': 'No trailer for this movie.', + }, { + 'url': 'https://www.moviepilot.de/movies/muellers-buero/', + 'info_dict': { + 'id': 'x7xcw1i', + 'display_id': 'muellers-buero', + 'title': 'Müllers Büro', + 'ext': 'mp4', + 'description': 'md5:57501251c05cdc61ca314b7633e0312e', + 'timestamp': 1287584475, + 'age_limit': 0, + 'duration': 82, + 'upload_date': '20101020', + 'thumbnail': r're:https://\w+\.dmcdn\.net/v/SaMes1WfAm1d6maq_/x720', + 'uploader': 'Moviepilot', + 'like_count': int, + 'view_count': int, + 'tags': ['Alle Trailer', 'Movie', 'Verleih'], + 'uploader_id': 'x6nd9k', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(f'https://www.moviepilot.de/movies/{video_id}/trailer', video_id) + + duration = try_get( + re.match(r'P(?P<hours>\d+)H(?P<mins>\d+)M(?P<secs>\d+)S', + self._html_search_meta('duration', webpage, fatal=False) or ''), + lambda mobj: sum(float(x) * y for x, y in zip(mobj.groups(), (3600, 60, 1)))) + # _html_search_meta is not used since we don't want name=description to match + description = self._html_search_regex( + '<meta[^>]+itemprop="description"[^>]+content="([^>"]+)"', webpage, 'description', fatal=False) + + return { + '_type': 'url_transparent', + 'ie_key': DailymotionIE.ie_key(), + 'display_id': video_id, + 'title': self._og_search_title(webpage), + 'url': self._html_search_meta('embedURL', webpage), + 'thumbnail': self._html_search_meta('thumbnailURL', webpage), + 'description': description, + 'duration': duration, + 'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage), delimiter=' ') + } diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py index 4d723e886..0cf26d598 100644 --- a/yt_dlp/extractor/nrk.py +++ b/yt_dlp/extractor/nrk.py @@ -13,6 +13,7 @@ from ..utils import ( ExtractorError, int_or_none, parse_duration, + parse_iso8601, str_or_none, try_get, urljoin, @@ -247,6 +248,7 @@ class NRKIE(NRKBaseIE): 'age_limit': age_limit, 'formats': formats, 'subtitles': subtitles, + 'timestamp': parse_iso8601(try_get(manifest, lambda x: x['availability']['onDemand']['from'], str)) } if is_series: @@ -797,7 +799,7 @@ class NRKPlaylistBaseIE(InfoExtractor): for video_id in re.findall(self._ITEM_RE, webpage) ] - playlist_title = self. _extract_title(webpage) + playlist_title = self._extract_title(webpage) playlist_description = self._extract_description(webpage) return self.playlist_result( diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py index c4eb4913f..ae160623b 100644 --- a/yt_dlp/extractor/piapro.py +++ b/yt_dlp/extractor/piapro.py @@ -27,6 +27,18 @@ class PiaproIE(InfoExtractor): 'title': '裏表ラバーズ', 'thumbnail': r're:^https?://.*\.jpg$', } + }, { + 'note': 'There are break lines in description, mandating (?s) flag', + 'url': 'https://piapro.jp/t/9cSd', + 'md5': '952bb6d1e8de95050206408a87790676', + 'info_dict': { + 'id': '9cSd', + 'ext': 'mp3', + 'title': '青に溶けた風船 / 初音ミク', + 'description': 'md5:d395a9bd151447631a5a1460bc7f9132', + 'uploader': 'シアン・キノ', + 'uploader_id': 'cyankino', + } }] _login_status = False @@ -81,7 +93,7 @@ class PiaproIE(InfoExtractor): return { 'id': video_id, 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False), - 'description': self._html_search_regex(r'<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False), + 'description': self._html_search_regex(r'(?s)<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False), 'uploader': uploader, 'uploader_id': uploader_id, 'timestamp': unified_timestamp(create_date, False), diff --git a/yt_dlp/extractor/rai.py b/yt_dlp/extractor/rai.py index 6864129c6..7c72d60c6 100644 --- a/yt_dlp/extractor/rai.py +++ b/yt_dlp/extractor/rai.py @@ -340,6 +340,7 @@ class RaiPlayIE(RaiBaseIE): 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'subtitles': subtitles, + 'release_year': traverse_obj(media, ('track_info', 'edit_year')), } info.update(relinker_info) diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index c1d6c5477..987b0c43b 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -15,6 +15,7 @@ from ..compat import ( from ..utils import ( ExtractorError, HEADRequest, + UnsupportedError, get_first, int_or_none, join_nonempty, @@ -890,5 +891,8 @@ class TikTokVMIE(InfoExtractor): }] def _real_extract(self, url): - return self.url_result(self._request_webpage( - HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).geturl(), TikTokIE) + new_url = self._request_webpage( + HEADRequest(url), self._match_id(url), headers={'User-Agent': 'facebookexternalhit/1.1'}).geturl() + if self.suitable(new_url): # Prevent infinite loop in case redirect fails + raise UnsupportedError(new_url) + return self.url_result(new_url) diff --git a/yt_dlp/extractor/tver.py b/yt_dlp/extractor/tver.py index 9ff3136e2..f23af1f14 100644 --- a/yt_dlp/extractor/tver.py +++ b/yt_dlp/extractor/tver.py @@ -1,77 +1,94 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, - int_or_none, - remove_start, smuggle_url, + str_or_none, traverse_obj, ) class TVerIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>corner|episode|feature|lp|tokyo2020/video)/(?P<id>[fc]?\d+)' - # videos are only available for 7 days + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?:(?P<type>lp|corner|series|episodes?|feature|tokyo2020/video)/)+(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ - 'url': 'https://tver.jp/corner/f0062178', - 'only_matching': True, + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/episodes/ephss8yveb', + 'info_dict': { + 'title': '#44 料理と値段と店主にびっくり オモてなしすぎウマい店 2時間SP', + 'description': 'md5:66985373a66fed8ad3cd595a3cfebb13', + }, + 'add_ie': ['BrightcoveNew'], }, { - 'url': 'https://tver.jp/feature/f0062413', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/episode/79622438', - 'only_matching': True, - }, { - # subtitle = ' ' - 'url': 'https://tver.jp/corner/f0068870', - 'only_matching': True, - }, { - 'url': 'https://tver.jp/lp/f0009694', - 'only_matching': True, + 'skip': 'videos are only available for 7 days', + 'url': 'https://tver.jp/lp/episodes/ep6f16g26p', + 'info_dict': { + # sorry but this is "correct" + 'title': '4月11日(月)23時06分 ~ 放送予定', + 'description': 'md5:4029cc5f4b1e8090dfc5b7bd2bc5cd0b', + }, + 'add_ie': ['BrightcoveNew'], }, { - 'url': 'https://tver.jp/lp/c0000239', + 'url': 'https://tver.jp/corner/f0103888', 'only_matching': True, }, { - 'url': 'https://tver.jp/tokyo2020/video/6264525510001', + 'url': 'https://tver.jp/lp/f0033031', 'only_matching': True, }] - _TOKEN = None BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' + _PLATFORM_UID = None + _PLATFORM_TOKEN = None def _real_initialize(self): - self._TOKEN = self._download_json( - 'https://tver.jp/api/access_token.php', None)['token'] + create_response = self._download_json( + 'https://platform-api.tver.jp/v2/api/platform_users/browser/create', None, + note='Creating session', data=b'device_type=pc', headers={ + 'Origin': 'https://s.tver.jp', + 'Referer': 'https://s.tver.jp/', + 'Content-Type': 'application/x-www-form-urlencoded', + }) + self._PLATFORM_UID = traverse_obj(create_response, ('result', 'platform_uid')) + self._PLATFORM_TOKEN = traverse_obj(create_response, ('result', 'platform_token')) def _real_extract(self, url): - path, video_id = self._match_valid_url(url).groups() - if path == 'lp': - webpage = self._download_webpage(url, video_id) - redirect_path = self._search_regex(r'to_href="([^"]+)', webpage, 'redirect path') - path, video_id = self._match_valid_url(f'https://tver.jp{redirect_path}').groups() - api_response = self._download_json(f'https://api.tver.jp/v4/{path}/{video_id}', video_id, query={'token': self._TOKEN}) - p_id = traverse_obj(api_response, ('main', 'publisher_id')) - if not p_id: - error_msg, expected = traverse_obj(api_response, ('episode', 0, 'textbar', 0, ('text', 'longer')), get_all=False), True - if not error_msg: - error_msg, expected = 'Failed to extract publisher ID', False - raise ExtractorError(error_msg, expected=expected) - service = remove_start(traverse_obj(api_response, ('main', 'service')), 'ts_') + video_id, video_type = self._match_valid_url(url).group('id', 'type') + if video_type not in {'series', 'episodes'}: + webpage = self._download_webpage(url, video_id, note='Resolving to new URL') + video_id = self._match_id(self._search_regex( + (r'canonical"\s*href="(https?://tver\.jp/[^"]+)"', r'&link=(https?://tver\.jp/[^?&]+)[?&]'), + webpage, 'url regex')) + video_info = self._download_json( + f'https://statics.tver.jp/content/episode/{video_id}.json', video_id, + query={'v': '5'}, headers={ + 'Origin': 'https://tver.jp', + 'Referer': 'https://tver.jp/', + }) + p_id = video_info['video']['accountID'] + r_id = traverse_obj(video_info, ('video', ('videoRefID', 'videoID')), get_all=False) + if not r_id: + raise ExtractorError('Failed to extract reference ID for Brightcove') + if not r_id.isdigit(): + r_id = f'ref:{r_id}' - r_id = traverse_obj(api_response, ('main', 'reference_id')) - if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): - r_id = 'ref:' + r_id - bc_url = smuggle_url( - self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), - {'geo_countries': ['JP']}) + additional_info = self._download_json( + f'https://platform-api.tver.jp/service/api/v1/callEpisode/{video_id}?require_data=mylist,later[epefy106ur],good[epefy106ur],resume[epefy106ur]', + video_id, fatal=False, + query={ + 'platform_uid': self._PLATFORM_UID, + 'platform_token': self._PLATFORM_TOKEN, + }, headers={ + 'x-tver-platform-type': 'web' + }) return { '_type': 'url_transparent', - 'description': traverse_obj(api_response, ('main', 'note', 0, 'text'), expected_type=compat_str), - 'episode_number': int_or_none(traverse_obj(api_response, ('main', 'ext', 'episode_number'), expected_type=compat_str)), - 'url': bc_url, + 'title': str_or_none(video_info.get('title')), + 'description': str_or_none(video_info.get('description')), + 'url': smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), {'geo_countries': ['JP']}), + 'series': traverse_obj( + additional_info, ('result', ('episode', 'series'), 'content', ('seriesTitle', 'title')), + get_all=False), 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index af911de98..7f3fa0735 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -183,6 +183,14 @@ class TwitCastingIE(InfoExtractor): infodict = { 'formats': formats } + elif len(m3u8_urls) == 1: + formats = self._extract_m3u8_formats( + m3u8_urls[0], video_id, 'mp4', headers=self._M3U8_HEADERS) + self._sort_formats(formats) + infodict = { + # No problem here since there's only one manifest + 'formats': formats, + } else: infodict = { '_type': 'multi_video', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 017554c88..f284487b8 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -384,6 +384,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _real_initialize(self): self._initialize_pref() self._initialize_consent() + self._check_login_required() + + def _check_login_required(self): if (self._LOGIN_REQUIRED and self.get_param('cookiefile') is None and self.get_param('cookiesfrombrowser') is None): @@ -563,6 +566,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor): headers['X-Origin'] = origin return {h: v for h, v in headers.items() if v is not None} + def _download_ytcfg(self, client, video_id): + url = { + 'web': 'https://www.youtube.com', + 'web_music': 'https://music.youtube.com', + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + }.get(client) + if not url: + return {} + webpage = self._download_webpage( + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + return self.extract_ytcfg(video_id, webpage) or {} + @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { @@ -728,6 +743,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return None def _extract_time_text(self, renderer, *path_list): + """@returns (timestamp, time_text)""" text = self._get_text(renderer, *path_list) or '' dt = self.extract_relative_time(text) timestamp = None @@ -2959,16 +2975,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return orderedSet(requested_clients) - def _extract_player_ytcfg(self, client, video_id): - url = { - 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' - }.get(client) - if not url: - return {} - webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip()) - return self.extract_ytcfg(video_id, webpage) or {} - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): initial_pr = None if webpage: @@ -3005,8 +3011,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): while clients: client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip'): - player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg + if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') @@ -4109,14 +4115,15 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): if fatal: raise ExtractorError('Unable to find selected tab') - @classmethod - def _extract_uploader(cls, data): + def _extract_uploader(self, data): uploader = {} - renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} + renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} owner = try_get( renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) if owner: - uploader['uploader'] = owner.get('text') + owner_text = owner.get('text') + uploader['uploader'] = self._search_regex( + r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text) uploader['uploader_id'] = try_get( owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) uploader['uploader_url'] = urljoin( @@ -4346,6 +4353,10 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') + @property + def skip_webpage(self): + return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) + def _extract_webpage(self, url, item_id, fatal=True): retries = self.get_param('extractor_retries', 3) count = -1 @@ -4392,9 +4403,21 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): return webpage, data + def _report_playlist_authcheck(self, ytcfg, fatal=True): + """Use if failed to extract ytcfg (and data) from initial webpage""" + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download' + if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal: + raise ExtractorError( + f'{msg}. If you are not downloading private content, or ' + 'your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): data = None - if 'webpage' not in self._configuration_arg('skip'): + if not self.skip_webpage: webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) # Reject webpage data if redirected to home page without explicitly requesting @@ -4408,14 +4431,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): raise ExtractorError(msg, expected=True) self.report_warning(msg, only_once=True) if not data: - if not ytcfg and self.is_authenticated: - msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' - if 'authcheck' not in self._configuration_arg('skip') and fatal: - raise ExtractorError( - msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' - ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', - expected=True) - self.report_warning(msg, only_once=True) + self._report_playlist_authcheck(ytcfg, fatal=fatal) data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) return data, ytcfg @@ -4453,14 +4469,20 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'), ('continuationContents', ), ) + display_id = f'query "{query}"' check_get_keys = tuple(set(keys[0] for keys in content_keys)) + ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg, fatal=False) continuation_list = [None] + search = None for page_num in itertools.count(1): data.update(continuation_list[0] or {}) + headers = self.generate_api_headers( + ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client) search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - default_client=default_client, check_get_keys=check_get_keys) + item_id=f'{display_id} page {page_num}', ep='search', query=data, + default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers) slr_contents = traverse_obj(search, *content_keys) yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list) if not continuation_list[0]: @@ -5136,6 +5158,24 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'note': 'non-standard redirect to regional channel', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', 'only_matching': True + }, { + 'note': 'collaborative playlist (uploader name in the form "by <uploader> and x other(s)")', + 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'info_dict': { + 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'modified_date': '20220407', + 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + 'tags': [], + 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'uploader': 'pukkandan', + 'availability': 'unlisted', + 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'channel': 'pukkandan', + 'description': 'Test for collaborative playlist', + 'title': 'yt-dlp test - collaborative playlist', + 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + }, + 'playlist_mincount': 2 }] @classmethod @@ -5486,6 +5526,95 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): ie=YoutubeTabIE.ie_key()) +class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor): + IE_NAME = 'youtube:notif' + IE_DESC = 'YouTube notifications; ":ytnotif" keyword (requires cookies)' + _VALID_URL = r':ytnotif(?:ication)?s?' + _LOGIN_REQUIRED = True + _TESTS = [{ + 'url': ':ytnotif', + 'only_matching': True, + }, { + 'url': ':ytnotifications', + 'only_matching': True, + }] + + def _extract_notification_menu(self, response, continuation_list): + notification_list = traverse_obj( + response, + ('actions', 0, 'openPopupAction', 'popup', 'multiPageMenuRenderer', 'sections', 0, 'multiPageMenuNotificationSectionRenderer', 'items'), + ('actions', 0, 'appendContinuationItemsAction', 'continuationItems'), + expected_type=list) or [] + continuation_list[0] = None + for item in notification_list: + entry = self._extract_notification_renderer(item.get('notificationRenderer')) + if entry: + yield entry + continuation = item.get('continuationItemRenderer') + if continuation: + continuation_list[0] = continuation + + def _extract_notification_renderer(self, notification): + video_id = traverse_obj( + notification, ('navigationEndpoint', 'watchEndpoint', 'videoId'), expected_type=str) + url = f'https://www.youtube.com/watch?v={video_id}' + channel_id = None + if not video_id: + browse_ep = traverse_obj( + notification, ('navigationEndpoint', 'browseEndpoint'), expected_type=dict) + channel_id = traverse_obj(browse_ep, 'browseId', expected_type=str) + post_id = self._search_regex( + r'/post/(.+)', traverse_obj(browse_ep, 'canonicalBaseUrl', expected_type=str), + 'post id', default=None) + if not channel_id or not post_id: + return + # The direct /post url redirects to this in the browser + url = f'https://www.youtube.com/channel/{channel_id}/community?lb={post_id}' + + channel = traverse_obj( + notification, ('contextualMenu', 'menuRenderer', 'items', 1, 'menuServiceItemRenderer', 'text', 'runs', 1, 'text'), + expected_type=str) + title = self._search_regex( + rf'{re.escape(channel)} [^:]+: (.+)', self._get_text(notification, 'shortMessage'), + 'video title', default=None) + if title: + title = title.replace('\xad', '') # remove soft hyphens + upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d') + if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key()) + else None) + return { + '_type': 'url', + 'url': url, + 'ie_key': (YoutubeIE if video_id else YoutubeTabIE).ie_key(), + 'video_id': video_id, + 'title': title, + 'channel_id': channel_id, + 'channel': channel, + 'thumbnails': self._extract_thumbnails(notification, 'videoThumbnail'), + 'upload_date': upload_date, + } + + def _notification_menu_entries(self, ytcfg): + continuation_list = [None] + response = None + for page in itertools.count(1): + ctoken = traverse_obj( + continuation_list, (0, 'continuationEndpoint', 'getNotificationMenuEndpoint', 'ctoken'), expected_type=str) + response = self._extract_response( + item_id=f'page {page}', query={'ctoken': ctoken} if ctoken else {}, ytcfg=ytcfg, + ep='notification/get_notification_menu', check_get_keys='actions', + headers=self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))) + yield from self._extract_notification_menu(response, continuation_list) + if not continuation_list[0]: + break + + def _real_extract(self, url): + display_id = 'notifications' + ytcfg = self._download_ytcfg('web', display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg) + return self.playlist_result(self._notification_menu_entries(ytcfg), display_id, display_id) + + class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' @@ -5615,7 +5744,9 @@ class YoutubeFeedsInfoExtractor(InfoExtractor): Subclasses must define the _FEED_NAME property. """ _LOGIN_REQUIRED = True - _TESTS = [] + + def _real_initialize(self): + YoutubeBaseInfoExtractor._check_login_required(self) @property def IE_NAME(self): diff --git a/yt_dlp/extractor/zattoo.py b/yt_dlp/extractor/zattoo.py index c02b4ca14..8614ca23d 100644 --- a/yt_dlp/extractor/zattoo.py +++ b/yt_dlp/extractor/zattoo.py @@ -51,25 +51,30 @@ class ZattooPlatformBaseIE(InfoExtractor): self._power_guide_hash = data['session']['power_guide_hash'] def _initialize_pre_login(self): - webpage = self._download_webpage( - self._host_url(), None, 'Downloading app token') - app_token = self._html_search_regex( - r'appToken\s*=\s*(["\'])(?P<token>(?:(?!\1).)+?)\1', - webpage, 'app token', group='token') - app_version = self._html_search_regex( - r'<!--\w+-(.+?)-', webpage, 'app version', default='2.8.2') + session_token = self._download_json( + f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token'] # Will setup appropriate cookies self._request_webpage( - '%s/zapi/v2/session/hello' % self._host_url(), None, + '%s/zapi/v3/session/hello' % self._host_url(), None, 'Opening session', data=urlencode_postdata({ - 'client_app_token': app_token, 'uuid': compat_str(uuid4()), 'lang': 'en', - 'app_version': app_version, + 'app_version': '1.8.2', 'format': 'json', + 'client_app_token': session_token, })) + def _extract_video_id_from_recording(self, recid): + playlist = self._download_json( + f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist') + try: + return next( + str(item['program_id']) for item in playlist['recordings'] + if item.get('program_id') and str(item.get('id')) == recid) + except (StopIteration, KeyError): + raise ExtractorError('Could not extract video id from recording') + def _extract_cid(self, video_id, channel_name): channel_groups = self._download_json( '%s/zapi/v2/cached/channels/%s' % (self._host_url(), @@ -118,7 +123,26 @@ class ZattooPlatformBaseIE(InfoExtractor): return cid, info_dict - def _extract_formats(self, cid, video_id, record_id=None, is_live=False): + def _extract_ondemand_info(self, ondemand_id): + """ + @returns (ondemand_token, ondemand_type, info_dict) + """ + data = self._download_json( + '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id), + ondemand_id, 'Downloading ondemand information') + info_dict = { + 'id': ondemand_id, + 'title': data.get('title'), + 'description': data.get('description'), + 'duration': int_or_none(data.get('duration')), + 'release_year': int_or_none(data.get('year')), + 'episode_number': int_or_none(data.get('episode_number')), + 'season_number': int_or_none(data.get('season_number')), + 'categories': try_get(data, lambda x: x['categories'], list), + } + return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict + + def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False): postdata_common = { 'https_watch_urls': True, } @@ -128,11 +152,18 @@ class ZattooPlatformBaseIE(InfoExtractor): url = '%s/zapi/watch/live/%s' % (self._host_url(), cid) elif record_id: url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id) + elif ondemand_id: + postdata_common.update({ + 'teasable_id': ondemand_id, + 'term_token': ondemand_termtoken, + 'teasable_type': ondemand_type + }) + url = '%s/zapi/watch/vod/video' % self._host_url() else: - url = '%s/zapi/watch/recall/%s/%s' % (self._host_url(), cid, video_id) - + url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id) formats = [] - for stream_type in ('dash', 'hls', 'hls5', 'hds'): + subtitles = {} + for stream_type in ('dash', 'hls7'): postdata = postdata_common.copy() postdata['stream_type'] = stream_type @@ -156,14 +187,16 @@ class ZattooPlatformBaseIE(InfoExtractor): audio_channel = watch.get('audio_channel') preference = 1 if audio_channel == 'A' else None format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel) - if stream_type in ('dash', 'dash_widevine', 'dash_playready'): - this_formats = self._extract_mpd_formats( + if stream_type.startswith('dash'): + this_formats, subs = self._extract_mpd_formats_and_subtitles( watch_url, video_id, mpd_id=format_id, fatal=False) - elif stream_type in ('hls', 'hls5', 'hls5_fairplay'): - this_formats = self._extract_m3u8_formats( + self._merge_subtitles(subs, target=subtitles) + elif stream_type.startswith('hls'): + this_formats, subs = self._extract_m3u8_formats_and_subtitles( watch_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False) + self._merge_subtitles(subs, target=subtitles) elif stream_type == 'hds': this_formats = self._extract_f4m_formats( watch_url, video_id, f4m_id=format_id, fatal=False) @@ -176,109 +209,131 @@ class ZattooPlatformBaseIE(InfoExtractor): this_format['quality'] = preference formats.extend(this_formats) self._sort_formats(formats) - return formats + return formats, subtitles - def _extract_video(self, channel_name, video_id, record_id=None, is_live=False): - if is_live: - cid = self._extract_cid(video_id, channel_name) - info_dict = { - 'id': channel_name, - 'title': channel_name, - 'is_live': True, - } - else: - cid, info_dict = self._extract_cid_and_video_info(video_id) - formats = self._extract_formats( - cid, video_id, record_id=record_id, is_live=is_live) - info_dict['formats'] = formats + def _extract_video(self, video_id, record_id=None): + cid, info_dict = self._extract_cid_and_video_info(video_id) + info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id) return info_dict + def _extract_live(self, channel_name): + cid = self._extract_cid(channel_name, channel_name) + formats, subtitles = self._extract_formats(cid, cid, is_live=True) + return { + 'id': channel_name, + 'title': channel_name, + 'is_live': True, + 'format': formats, + 'subtitles': subtitles + } -class QuicklineBaseIE(ZattooPlatformBaseIE): - _NETRC_MACHINE = 'quickline' - _HOST = 'mobiltv.quickline.com' - - -class QuicklineIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+)/(?P<id>[0-9]+)' % re.escape(QuicklineBaseIE._HOST) - - _TEST = { - 'url': 'https://mobiltv.quickline.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', - 'only_matching': True, - } - - def _real_extract(self, url): - channel_name, video_id = self._match_valid_url(url).groups() - return self._extract_video(channel_name, video_id) - - -class QuicklineLiveIE(QuicklineBaseIE): - _VALID_URL = r'https?://(?:www\.)?%s/watch/(?P<id>[^/]+)' % re.escape(QuicklineBaseIE._HOST) + def _extract_record(self, record_id): + video_id = self._extract_video_id_from_recording(record_id) + cid, info_dict = self._extract_cid_and_video_info(video_id) + info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id) + return info_dict - _TEST = { - 'url': 'https://mobiltv.quickline.com/watch/srf1', - 'only_matching': True, - } + def _extract_ondemand(self, ondemand_id): + ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id) + info_dict['formats'], info_dict['subtitles'] = self._extract_formats( + None, ondemand_id, ondemand_id=ondemand_id, + ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type) + return info_dict - @classmethod - def suitable(cls, url): - return False if QuicklineIE.suitable(url) else super(QuicklineLiveIE, cls).suitable(url) - def _real_extract(self, url): - channel_name = video_id = self._match_id(url) - return self._extract_video(channel_name, video_id, is_live=True) +def _make_valid_url(host): + return rf'https?://(?:www\.)?{re.escape(host)}/watch/[^/]+?/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?' class ZattooBaseIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'zattoo' _HOST = 'zattoo.com' + @staticmethod + def _create_valid_url(match, qs, base_re=None): + match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)' + return rf'''(?x)https?://(?:www\.)?zattoo\.com/(?: + [^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match}) + {match_base} + )''' -def _make_valid_url(tmpl, host): - return tmpl % re.escape(host) + def _real_extract(self, url): + vid1, vid2 = self._match_valid_url(url).group('vid1', 'vid2') + return getattr(self, f'_extract_{self._TYPE}')(vid1 or vid2) class ZattooIE(ZattooBaseIE): - _VALID_URL_TEMPLATE = r'https?://(?:www\.)?%s/watch/(?P<channel>[^/]+?)/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?' - _VALID_URL = _make_valid_url(_VALID_URL_TEMPLATE, ZattooBaseIE._HOST) - - # Since regular videos are only available for 7 days and recorded videos - # are only available for a specific user, we cannot have detailed tests. + _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'program', '(?:program|watch)/[^/]+') + _TYPE = 'video' _TESTS = [{ - 'url': 'https://zattoo.com/watch/prosieben/130671867-maze-runner-die-auserwaehlten-in-der-brandwueste', + 'url': 'https://zattoo.com/program/zdf/250170418', + 'info_dict': { + 'id': '250170418', + 'ext': 'mp4', + 'title': 'Markus Lanz', + 'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc', + 'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg', + 'creator': 'ZDF HD', + 'release_year': 2022, + 'episode': 'Folge 1655', + 'categories': 'count:1', + 'tags': 'count:2' + }, + 'params': {'skip_download': 'm3u8'} + }, { + 'url': 'https://zattoo.com/program/daserste/210177916', 'only_matching': True, }, { - 'url': 'https://zattoo.com/watch/srf_zwei/132905652-eishockey-spengler-cup/102791477/1512211800000/1514433500000/92000', + 'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555', 'only_matching': True, }] - def _real_extract(self, url): - channel_name, video_id, record_id = self._match_valid_url(url).groups() - return self._extract_video(channel_name, video_id, record_id) - class ZattooLiveIE(ZattooBaseIE): - _VALID_URL = r'https?://(?:www\.)?zattoo\.com/watch/(?P<id>[^/]+)' - - _TEST = { - 'url': 'https://zattoo.com/watch/srf1', + _VALID_URL = ZattooBaseIE._create_valid_url(r'[^/?&#]+', 'channel', 'live') + _TYPE = 'live' + _TESTS = [{ + 'url': 'https://zattoo.com/channels/german?channel=srf_zwei', 'only_matching': True, - } + }, { + 'url': 'https://zattoo.com/live/srf1', + 'only_matching': True, + }] @classmethod def suitable(cls, url): - return False if ZattooIE.suitable(url) else super(ZattooLiveIE, cls).suitable(url) + return False if ZattooIE.suitable(url) else super().suitable(url) - def _real_extract(self, url): - channel_name = video_id = self._match_id(url) - return self._extract_video(channel_name, video_id, is_live=True) + +class ZattooMoviesIE(ZattooBaseIE): + _VALID_URL = ZattooBaseIE._create_valid_url(r'\w+', 'movie_id', 'vod/movies') + _TYPE = 'ondemand' + _TESTS = [{ + 'url': 'https://zattoo.com/vod/movies/7521', + 'only_matching': True, + }, { + 'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde', + 'only_matching': True, + }] -class NetPlusIE(ZattooIE): +class ZattooRecordingsIE(ZattooBaseIE): + _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'recording') + _TYPE = 'record' + _TESTS = [{ + 'url': 'https://zattoo.com/recordings?recording=193615508', + 'only_matching': True, + }, { + 'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420', + 'only_matching': True, + }] + + +class NetPlusIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'netplus' _HOST = 'netplus.tv' _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://www.netplus.tv/watch/abc/123-abc', @@ -286,10 +341,10 @@ class NetPlusIE(ZattooIE): }] -class MNetTVIE(ZattooIE): +class MNetTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'mnettv' _HOST = 'tvplus.m-net.de' - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://tvplus.m-net.de/watch/abc/123-abc', @@ -297,10 +352,10 @@ class MNetTVIE(ZattooIE): }] -class WalyTVIE(ZattooIE): +class WalyTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'walytv' _HOST = 'player.waly.tv' - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://player.waly.tv/watch/abc/123-abc', @@ -308,11 +363,11 @@ class WalyTVIE(ZattooIE): }] -class BBVTVIE(ZattooIE): +class BBVTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'bbvtv' _HOST = 'bbv-tv.net' _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://www.bbv-tv.net/watch/abc/123-abc', @@ -320,11 +375,11 @@ class BBVTVIE(ZattooIE): }] -class VTXTVIE(ZattooIE): +class VTXTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'vtxtv' _HOST = 'vtxtv.ch' _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://www.vtxtv.ch/watch/abc/123-abc', @@ -332,22 +387,10 @@ class VTXTVIE(ZattooIE): }] -class MyVisionTVIE(ZattooIE): - _NETRC_MACHINE = 'myvisiontv' - _HOST = 'myvisiontv.ch' - _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) - - _TESTS = [{ - 'url': 'https://www.myvisiontv.ch/watch/abc/123-abc', - 'only_matching': True, - }] - - -class GlattvisionTVIE(ZattooIE): +class GlattvisionTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'glattvisiontv' _HOST = 'iptv.glattvision.ch' - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc', @@ -355,11 +398,11 @@ class GlattvisionTVIE(ZattooIE): }] -class SAKTVIE(ZattooIE): +class SAKTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'saktv' _HOST = 'saktv.ch' _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://www.saktv.ch/watch/abc/123-abc', @@ -367,10 +410,10 @@ class SAKTVIE(ZattooIE): }] -class EWETVIE(ZattooIE): +class EWETVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'ewetv' _HOST = 'tvonline.ewe.de' - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://tvonline.ewe.de/watch/abc/123-abc', @@ -378,11 +421,11 @@ class EWETVIE(ZattooIE): }] -class QuantumTVIE(ZattooIE): +class QuantumTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'quantumtv' _HOST = 'quantum-tv.com' _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://www.quantum-tv.com/watch/abc/123-abc', @@ -390,10 +433,10 @@ class QuantumTVIE(ZattooIE): }] -class OsnatelTVIE(ZattooIE): +class OsnatelTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'osnateltv' _HOST = 'tvonline.osnatel.de' - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc', @@ -401,11 +444,11 @@ class OsnatelTVIE(ZattooIE): }] -class EinsUndEinsTVIE(ZattooIE): +class EinsUndEinsTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = '1und1tv' _HOST = '1und1.tv' _API_HOST = 'www.%s' % _HOST - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://www.1und1.tv/watch/abc/123-abc', @@ -413,10 +456,10 @@ class EinsUndEinsTVIE(ZattooIE): }] -class SaltTVIE(ZattooIE): +class SaltTVIE(ZattooPlatformBaseIE): _NETRC_MACHINE = 'salttv' _HOST = 'tv.salt.ch' - _VALID_URL = _make_valid_url(ZattooIE._VALID_URL_TEMPLATE, _HOST) + _VALID_URL = _make_valid_url(_HOST) _TESTS = [{ 'url': 'https://tv.salt.ch/watch/abc/123-abc', diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 3e3f11b15..9e411d83f 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -86,8 +86,8 @@ class Zee5IE(InfoExtractor): 'url': 'https://www.zee5.com/web-series/details/mithya/0-6-4z587408/maine-dekhi-hai-uski-mrityu/0-1-6z587412', 'only_matching': True }] - _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' - _DEVICE_ID = 'iIxsxYf40cqO3koIkwzKHZhnJzHN13zb' + _DETAIL_API_URL = 'https://spapi.zee5.com/singlePlayback/getDetails/secure?content_id={}&device_id={}&platform_name=desktop_web&country=IN&check_parental_control=false' + _DEVICE_ID = 'TszZPYPuY9Pq2cJizV0U000000000000' _USER_TOKEN = None _LOGIN_HINT = 'Use "--username <mobile_number>" to login using otp or "--username token" and "--password <user_token>" to login using user token.' _NETRC_MACHINE = 'zee5' |