diff options
Diffstat (limited to 'yt_dlp/extractor')
36 files changed, 636 insertions, 395 deletions
diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 7c5d35f47..07b1b1861 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -4,7 +4,7 @@ from ..utils import int_or_none class AmazonStoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)' + _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)' _TESTS = [{ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 672ed1ffe..85ab478a6 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -472,8 +472,7 @@ class BBCCoUkIE(InfoExtractor): f['language_preference'] = -10 formats += version_formats for tag, subformats in (version_subtitles or {}).items(): - subtitles.setdefault(tag, []) - subtitles[tag] += subformats + subtitles.setdefault(tag, []).extend(subformats) return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py index 483f93d67..e019ec6a8 100644 --- a/yt_dlp/extractor/bilibili.py +++ b/yt_dlp/extractor/bilibili.py @@ -346,7 +346,8 @@ class BiliBiliIE(InfoExtractor): def _extract_anthology_entries(self, bv_id, video_id, webpage): title = self._html_search_regex( (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', - r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', + r'(?s)<h1[^>]*>(?P<title>.+?)</h1>', + r'<title>(?P<title>.+?)</title>'), webpage, 'title', group='title') json_data = self._download_json( f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp', diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py new file mode 100644 index 000000000..77efdf45a --- /dev/null +++ b/yt_dlp/extractor/cableav.py @@ -0,0 +1,34 @@ +# coding: utf-8 +from .common import InfoExtractor + + +class CableAVIE(InfoExtractor): + _VALID_URL = r'https://cableav\.tv/(?P<id>[a-zA-Z0-9]+)' + _TESTS = [{ + 'url': 'https://cableav.tv/lS4iR9lWjN8/', + 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18', + 'info_dict': { + 'id': 'lS4iR9lWjN8', + 'ext': 'mp4', + 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV', + 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_url = self._og_search_video_url(webpage, secure=False) + + formats = self._extract_m3u8_formats(video_url, video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._og_search_title(webpage), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'formats': formats, + } diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py index 7287677c1..51d30a321 100644 --- a/yt_dlp/extractor/canalalpha.py +++ b/yt_dlp/extractor/canalalpha.py @@ -11,7 +11,7 @@ from ..utils import ( class CanalAlphaIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*' + _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*' _TESTS = [{ 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py index 6bdc4f6bb..e6841fb8b 100644 --- a/yt_dlp/extractor/chingari.py +++ b/yt_dlp/extractor/chingari.py @@ -67,7 +67,7 @@ class ChingariBaseIE(InfoExtractor): class ChingariIE(ChingariBaseIE): - _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)' + _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)' _TESTS = [{ 'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb', 'info_dict': { @@ -102,7 +102,7 @@ class ChingariIE(ChingariBaseIE): class ChingariUserIE(ChingariBaseIE): - _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)' + _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)' _TESTS = [{ 'url': 'https://chingari.io/dada1023', 'playlist_mincount': 3, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index fc28bca2e..37e69d409 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1079,7 +1079,8 @@ class InfoExtractor(object): def raise_login_required( self, msg='This video is only available for registered users', metadata_available=False, method='any'): - if metadata_available and self.get_param('ignore_no_formats_error'): + if metadata_available and ( + self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) if method is not None: msg = '%s. %s' % (msg, self._LOGIN_HINTS[method]) @@ -1088,13 +1089,15 @@ class InfoExtractor(object): def raise_geo_restricted( self, msg='This video is not available from your location due to geo restriction', countries=None, metadata_available=False): - if metadata_available and self.get_param('ignore_no_formats_error'): + if metadata_available and ( + self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg) else: raise GeoRestrictedError(msg, countries=countries) def raise_no_formats(self, msg, expected=False, video_id=None): - if expected and self.get_param('ignore_no_formats_error'): + if expected and ( + self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')): self.report_warning(msg, video_id) elif isinstance(msg, ExtractorError): raise msg @@ -1535,10 +1538,10 @@ class InfoExtractor(object): default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality', 'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr', - 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases + 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr', 'height', 'width', 'proto', 'vext', 'abr', 'aext', - 'fps', 'fs_approx', 'source', 'format_id') + 'fps', 'fs_approx', 'source', 'id') settings = { 'vcodec': {'type': 'ordered', 'regex': True, @@ -1548,7 +1551,7 @@ class InfoExtractor(object): 'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range', 'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]}, 'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol', - 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']}, + 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']}, 'vext': {'type': 'ordered', 'field': 'video_ext', 'order': ('mp4', 'webm', 'flv', '', 'none'), 'order_free': ('webm', 'mp4', 'flv', '', 'none')}, @@ -1583,7 +1586,7 @@ class InfoExtractor(object): 'res': {'type': 'multiple', 'field': ('height', 'width'), 'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))}, - # Most of these exist only for compatibility reasons + # Deprecated 'dimension': {'type': 'alias', 'field': 'res'}, 'resolution': {'type': 'alias', 'field': 'res'}, 'extension': {'type': 'alias', 'field': 'ext'}, @@ -1592,7 +1595,7 @@ class InfoExtractor(object): 'video_bitrate': {'type': 'alias', 'field': 'vbr'}, 'audio_bitrate': {'type': 'alias', 'field': 'abr'}, 'framerate': {'type': 'alias', 'field': 'fps'}, - 'language_preference': {'type': 'alias', 'field': 'lang'}, # not named as 'language' because such a field exists + 'language_preference': {'type': 'alias', 'field': 'lang'}, 'protocol': {'type': 'alias', 'field': 'proto'}, 'source_preference': {'type': 'alias', 'field': 'source'}, 'filesize_approx': {'type': 'alias', 'field': 'fs_approx'}, @@ -1612,10 +1615,20 @@ class InfoExtractor(object): 'format_id': {'type': 'alias', 'field': 'id'}, } - _order = [] + def __init__(self, ie, field_preference): + self._order = [] + self.ydl = ie._downloader + self.evaluate_params(self.ydl.params, field_preference) + if ie.get_param('verbose'): + self.print_verbose_info(self.ydl.write_debug) def _get_field_setting(self, field, key): if field not in self.settings: + if key in ('forced', 'priority'): + return False + self.ydl.deprecation_warning( + f'Using arbitrary fields ({field}) for format sorting is deprecated ' + 'and may be removed in a future version') self.settings[field] = {} propObj = self.settings[field] if key not in propObj: @@ -1698,7 +1711,10 @@ class InfoExtractor(object): if field is None: continue if self._get_field_setting(field, 'type') == 'alias': - field = self._get_field_setting(field, 'field') + alias, field = field, self._get_field_setting(field, 'field') + self.ydl.deprecation_warning( + f'Format sorting alias {alias} is deprecated ' + f'and may be removed in a future version. Please use {field} instead') reverse = match.group('reverse') is not None closest = match.group('separator') == '~' limit_text = match.group('limit') @@ -1802,10 +1818,7 @@ class InfoExtractor(object): def _sort_formats(self, formats, field_preference=[]): if not formats: return - format_sort = self.FormatSort() # params and to_screen are taken from the downloader - format_sort.evaluate_params(self._downloader.params, field_preference) - if self.get_param('verbose', False): - format_sort.print_verbose_info(self._downloader.write_debug) + format_sort = self.FormatSort(self, field_preference) formats.sort(key=lambda f: format_sort.calculate_preference(f)) def _check_formats(self, formats, video_id): diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py index 868d8d27d..d49f1ca74 100644 --- a/yt_dlp/extractor/cozytv.py +++ b/yt_dlp/extractor/cozytv.py @@ -6,7 +6,7 @@ from ..utils import unified_strdate class CozyTVIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' + _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' _TESTS = [{ 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', diff --git a/yt_dlp/extractor/discoverynetworks.py b/yt_dlp/extractor/discoverynetworks.py deleted file mode 100644 index 4f8bdf0b9..000000000 --- a/yt_dlp/extractor/discoverynetworks.py +++ /dev/null @@ -1,41 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - - -from .dplay import DPlayIE - - -class DiscoveryNetworksDeIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' - - _TESTS = [{ - 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', - 'info_dict': { - 'id': '78867', - 'ext': 'mp4', - 'title': 'Die Welt da draußen', - 'description': 'md5:61033c12b73286e409d99a41742ef608', - 'timestamp': 1554069600, - 'upload_date': '20190331', - }, - 'params': { - 'skip_download': True, - }, - }, { - 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', - 'only_matching': True, - }, { - 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', - 'only_matching': True, - }, { - 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', - 'only_matching': True, - }] - - def _real_extract(self, url): - domain, programme, alternate_id = self._match_valid_url(url).groups() - country = 'GB' if domain == 'dplay.co.uk' else 'DE' - realm = 'questuk' if country == 'GB' else domain.replace('.', '') - return self._get_disco_api_info( - url, '%s/%s' % (programme, alternate_id), - 'sonic-eu1-prod.disco-api.com', realm, country) diff --git a/yt_dlp/extractor/discoveryplusindia.py b/yt_dlp/extractor/discoveryplusindia.py deleted file mode 100644 index 8ec418a97..000000000 --- a/yt_dlp/extractor/discoveryplusindia.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import json - -from ..compat import compat_str -from ..utils import try_get -from .common import InfoExtractor -from .dplay import DPlayIE - - -class DiscoveryPlusIndiaIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX - _TESTS = [{ - 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE', - 'info_dict': { - 'id': '27104', - 'ext': 'mp4', - 'display_id': 'how-do-they-do-it/fugu-and-more', - 'title': 'Fugu and More', - 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.', - 'duration': 1319, - 'timestamp': 1582309800, - 'upload_date': '20200221', - 'series': 'How Do They Do It?', - 'season_number': 8, - 'episode_number': 2, - 'creator': 'Discovery Channel', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'Cookies (not necessarily logged in) are needed' - }] - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['x-disco-params'] = 'realm=%s' % realm - headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0' - - def _download_video_playback_info(self, disco_base, video_id, headers): - return self._download_json( - disco_base + 'playback/v3/videoPlaybackInfo', - video_id, headers=headers, data=json.dumps({ - 'deviceInfo': { - 'adBlocker': False, - }, - 'videoId': video_id, - }).encode('utf-8'))['data']['attributes']['streaming'] - - def _real_extract(self, url): - display_id = self._match_id(url) - return self._get_disco_api_info( - url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in') - - -class DiscoveryPlusIndiaShowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)' - _TESTS = [{ - 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it', - 'playlist_mincount': 140, - 'info_dict': { - 'id': 'how-do-they-do-it', - }, - }] - - def _entries(self, show_name): - headers = { - 'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod', - 'x-disco-params': 'realm=dplusindia', - 'referer': 'https://www.discoveryplus.in/', - } - show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name) - show_json = self._download_json(show_url, - video_id=show_name, - headers=headers)['included'][4]['attributes']['component'] - show_id = show_json['mandatoryParams'].split('=')[-1] - season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}' - for season in show_json['filters'][0]['options']: - season_id = season['id'] - total_pages, page_num = 1, 0 - while page_num < total_pages: - season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)), - video_id=show_id, headers=headers, - note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else '')) - if page_num == 0: - total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1 - episodes_json = season_json['data'] - for episode in episodes_json: - video_id = episode['attributes']['path'] - yield self.url_result( - 'https://discoveryplus.in/videos/%s' % video_id, - ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id) - page_num += 1 - - def _real_extract(self, url): - show_name = self._match_valid_url(url).group('show_name') - return self.playlist_result(self._entries(show_name), playlist_id=show_name) diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py index 525c8e243..f5d6540c0 100644 --- a/yt_dlp/extractor/dplay.py +++ b/yt_dlp/extractor/dplay.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import json +import uuid from .common import InfoExtractor from ..compat import compat_HTTPError @@ -11,12 +12,172 @@ from ..utils import ( float_or_none, int_or_none, strip_or_none, + try_get, unified_timestamp, ) -class DPlayIE(InfoExtractor): +class DPlayBaseIE(InfoExtractor): _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)' + _auth_token_cache = {} + + def _get_auth(self, disco_base, display_id, realm, needs_device_id=True): + key = (disco_base, realm) + st = self._get_cookies(disco_base).get('st') + token = (st and st.value) or self._auth_token_cache.get(key) + + if not token: + query = {'realm': realm} + if needs_device_id: + query['deviceId'] = uuid.uuid4().hex + token = self._download_json( + disco_base + 'token', display_id, 'Downloading token', + query=query)['data']['attributes']['token'] + + # Save cache only if cookies are not being set + if not self._get_cookies(disco_base).get('st'): + self._auth_token_cache[key] = token + + return f'Bearer {token}' + + def _process_errors(self, e, geo_countries): + info = self._parse_json(e.cause.read().decode('utf-8'), None) + error = info['errors'][0] + error_code = error.get('code') + if error_code == 'access.denied.geoblocked': + self.raise_geo_restricted(countries=geo_countries) + elif error_code in ('access.denied.missingpackage', 'invalid.token'): + raise ExtractorError( + 'This video is only available for registered users. You may want to use --cookies.', expected=True) + raise ExtractorError(info['errors'][0]['detail'], expected=True) + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False) + + def _download_video_playback_info(self, disco_base, video_id, headers): + streaming = self._download_json( + disco_base + 'playback/videoPlaybackInfo/' + video_id, + video_id, headers=headers)['data']['attributes']['streaming'] + streaming_list = [] + for format_id, format_dict in streaming.items(): + streaming_list.append({ + 'type': format_id, + 'url': format_dict.get('url'), + }) + return streaming_list + + def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''): + geo_countries = [country.upper()] + self._initialize_geo_bypass({ + 'countries': geo_countries, + }) + disco_base = 'https://%s/' % disco_host + headers = { + 'Referer': url, + } + self._update_disco_api_headers(headers, disco_base, display_id, realm) + try: + video = self._download_json( + disco_base + 'content/videos/' + display_id, display_id, + headers=headers, query={ + 'fields[channel]': 'name', + 'fields[image]': 'height,src,width', + 'fields[show]': 'name', + 'fields[tag]': 'name', + 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', + 'include': 'images,primaryChannel,show,tags' + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: + self._process_errors(e, geo_countries) + raise + video_id = video['data']['id'] + info = video['data']['attributes'] + title = info['name'].strip() + formats = [] + subtitles = {} + try: + streaming = self._download_video_playback_info( + disco_base, video_id, headers) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: + self._process_errors(e, geo_countries) + raise + for format_dict in streaming: + if not isinstance(format_dict, dict): + continue + format_url = format_dict.get('url') + if not format_url: + continue + format_id = format_dict.get('type') + ext = determine_ext(format_url) + if format_id == 'dash' or ext == 'mpd': + dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles( + format_url, display_id, mpd_id='dash', fatal=False) + formats.extend(dash_fmts) + subtitles = self._merge_subtitles(subtitles, dash_subs) + elif format_id == 'hls' or ext == 'm3u8': + m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( + format_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', + fatal=False) + formats.extend(m3u8_fmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + else: + formats.append({ + 'url': format_url, + 'format_id': format_id, + }) + self._sort_formats(formats) + + creator = series = None + tags = [] + thumbnails = [] + included = video.get('included') or [] + if isinstance(included, list): + for e in included: + attributes = e.get('attributes') + if not attributes: + continue + e_type = e.get('type') + if e_type == 'channel': + creator = attributes.get('name') + elif e_type == 'image': + src = attributes.get('src') + if src: + thumbnails.append({ + 'url': src, + 'width': int_or_none(attributes.get('width')), + 'height': int_or_none(attributes.get('height')), + }) + if e_type == 'show': + series = attributes.get('name') + elif e_type == 'tag': + name = attributes.get('name') + if name: + tags.append(name) + return { + 'id': video_id, + 'display_id': display_id, + 'title': title, + 'description': strip_or_none(info.get('description')), + 'duration': float_or_none(info.get('videoDuration'), 1000), + 'timestamp': unified_timestamp(info.get('publishStart')), + 'series': series, + 'season_number': int_or_none(info.get('seasonNumber')), + 'episode_number': int_or_none(info.get('episodeNumber')), + 'creator': creator, + 'tags': tags, + 'thumbnails': thumbnails, + 'formats': formats, + 'subtitles': subtitles, + 'http_headers': { + 'referer': domain, + }, + } + + +class DPlayIE(DPlayBaseIE): _VALID_URL = r'''(?x)https?:// (?P<domain> (?:www\.)?(?P<host>d @@ -26,7 +187,7 @@ class DPlayIE(InfoExtractor): ) )| (?P<subdomain_country>es|it)\.dplay\.com - )/[^/]+''' + _PATH_REGEX + )/[^/]+''' + DPlayBaseIE._PATH_REGEX _TESTS = [{ # non geo restricted, via secure api, unsigned download hls URL @@ -150,138 +311,6 @@ class DPlayIE(InfoExtractor): 'only_matching': True, }] - def _process_errors(self, e, geo_countries): - info = self._parse_json(e.cause.read().decode('utf-8'), None) - error = info['errors'][0] - error_code = error.get('code') - if error_code == 'access.denied.geoblocked': - self.raise_geo_restricted(countries=geo_countries) - elif error_code in ('access.denied.missingpackage', 'invalid.token'): - raise ExtractorError( - 'This video is only available for registered users. You may want to use --cookies.', expected=True) - raise ExtractorError(info['errors'][0]['detail'], expected=True) - - def _update_disco_api_headers(self, headers, disco_base, display_id, realm): - headers['Authorization'] = 'Bearer ' + self._download_json( - disco_base + 'token', display_id, 'Downloading token', - query={ - 'realm': realm, - })['data']['attributes']['token'] - - def _download_video_playback_info(self, disco_base, video_id, headers): - streaming = self._download_json( - disco_base + 'playback/videoPlaybackInfo/' + video_id, - video_id, headers=headers)['data']['attributes']['streaming'] - streaming_list = [] - for format_id, format_dict in streaming.items(): - streaming_list.append({ - 'type': format_id, - 'url': format_dict.get('url'), - }) - return streaming_list - - def _get_disco_api_info(self, url, display_id, disco_host, realm, country): - geo_countries = [country.upper()] - self._initialize_geo_bypass({ - 'countries': geo_countries, - }) - disco_base = 'https://%s/' % disco_host - headers = { - 'Referer': url, - } - self._update_disco_api_headers(headers, disco_base, display_id, realm) - try: - video = self._download_json( - disco_base + 'content/videos/' + display_id, display_id, - headers=headers, query={ - 'fields[channel]': 'name', - 'fields[image]': 'height,src,width', - 'fields[show]': 'name', - 'fields[tag]': 'name', - 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration', - 'include': 'images,primaryChannel,show,tags' - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: - self._process_errors(e, geo_countries) - raise - video_id = video['data']['id'] - info = video['data']['attributes'] - title = info['name'].strip() - formats = [] - try: - streaming = self._download_video_playback_info( - disco_base, video_id, headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - self._process_errors(e, geo_countries) - raise - for format_dict in streaming: - if not isinstance(format_dict, dict): - continue - format_url = format_dict.get('url') - if not format_url: - continue - format_id = format_dict.get('type') - ext = determine_ext(format_url) - if format_id == 'dash' or ext == 'mpd': - formats.extend(self._extract_mpd_formats( - format_url, display_id, mpd_id='dash', fatal=False)) - elif format_id == 'hls' or ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, display_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', - fatal=False)) - else: - formats.append({ - 'url': format_url, - 'format_id': format_id, - }) - self._sort_formats(formats) - - creator = series = None - tags = [] - thumbnails = [] - included = video.get('included') or [] - if isinstance(included, list): - for e in included: - attributes = e.get('attributes') - if not attributes: - continue - e_type = e.get('type') - if e_type == 'channel': - creator = attributes.get('name') - elif e_type == 'image': - src = attributes.get('src') - if src: - thumbnails.append({ - 'url': src, - 'width': int_or_none(attributes.get('width')), - 'height': int_or_none(attributes.get('height')), - }) - if e_type == 'show': - series = attributes.get('name') - elif e_type == 'tag': - name = attributes.get('name') - if name: - tags.append(name) - - return { - 'id': video_id, - 'display_id': display_id, - 'title': title, - 'description': strip_or_none(info.get('description')), - 'duration': float_or_none(info.get('videoDuration'), 1000), - 'timestamp': unified_timestamp(info.get('publishStart')), - 'series': series, - 'season_number': int_or_none(info.get('seasonNumber')), - 'episode_number': int_or_none(info.get('episodeNumber')), - 'creator': creator, - 'tags': tags, - 'thumbnails': thumbnails, - 'formats': formats, - } - def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') @@ -289,11 +318,11 @@ class DPlayIE(InfoExtractor): country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country') host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com' return self._get_disco_api_info( - url, display_id, host, 'dplay' + country, country) + url, display_id, host, 'dplay' + country, country, domain) -class HGTVDeIE(DPlayIE): - _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX +class HGTVDeIE(DPlayBaseIE): + _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/', 'info_dict': { @@ -318,8 +347,8 @@ class HGTVDeIE(DPlayIE): url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de') -class DiscoveryPlusIE(DPlayIE): - _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayIE._PATH_REGEX +class DiscoveryPlusIE(DPlayBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family', 'info_dict': { @@ -369,7 +398,7 @@ class DiscoveryPlusIE(DPlayIE): class ScienceChannelIE(DiscoveryPlusIE): - _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine', 'info_dict': { @@ -389,7 +418,7 @@ class ScienceChannelIE(DiscoveryPlusIE): class DIYNetworkIE(DiscoveryPlusIE): - _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas', 'info_dict': { @@ -409,7 +438,7 @@ class DIYNetworkIE(DiscoveryPlusIE): class AnimalPlanetIE(DiscoveryPlusIE): - _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayIE._PATH_REGEX + _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX _TESTS = [{ 'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown', 'info_dict': { @@ -426,3 +455,159 @@ class AnimalPlanetIE(DiscoveryPlusIE): _PRODUCT = 'apl' _API_URL = 'us1-prod-direct.animalplanet.com' + + +class DiscoveryPlusIndiaIE(DPlayBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayBaseIE._PATH_REGEX + _TESTS = [{ + 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE', + 'info_dict': { + 'id': '27104', + 'ext': 'mp4', + 'display_id': 'how-do-they-do-it/fugu-and-more', + 'title': 'Fugu and More', + 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.', + 'duration': 1319, + 'timestamp': 1582309800, + 'upload_date': '20200221', + 'series': 'How Do They Do It?', + 'season_number': 8, + 'episode_number': 2, + 'creator': 'Discovery Channel', + }, + 'params': { + 'skip_download': True, + } + }] + + def _update_disco_api_headers(self, headers, disco_base, display_id, realm): + headers.update({ + 'x-disco-params': 'realm=%s' % realm, + 'x-disco-client': 'WEB:UNKNOWN:dplus-india:17.0.0', + 'Authorization': self._get_auth(disco_base, display_id, realm), + }) + + def _download_video_playback_info(self, disco_base, video_id, headers): + return self._download_json( + disco_base + 'playback/v3/videoPlaybackInfo', + video_id, headers=headers, data=json.dumps({ + 'deviceInfo': { + 'adBlocker': False, + }, + 'videoId': video_id, + }).encode('utf-8'))['data']['attributes']['streaming'] + + def _real_extract(self, url): + display_id = self._match_id(url) + return self._get_disco_api_info( + url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in', 'https://www.discoveryplus.in/') + + +class DiscoveryNetworksDeIE(DPlayBaseIE): + _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)' + + _TESTS = [{ + 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100', + 'info_dict': { + 'id': '78867', + 'ext': 'mp4', + 'title': 'Die Welt da draußen', + 'description': 'md5:61033c12b73286e409d99a41742ef608', + 'timestamp': 1554069600, + 'upload_date': '20190331', + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316', + 'only_matching': True, + }, { + 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B', + 'only_matching': True, + }, { + 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/', + 'only_matching': True, + }] + + def _real_extract(self, url): + domain, programme, alternate_id = self._match_valid_url(url).groups() + country = 'GB' if domain == 'dplay.co.uk' else 'DE' + realm = 'questuk' if country == 'GB' else domain.replace('.', '') + return self._get_disco_api_info( + url, '%s/%s' % (programme, alternate_id), + 'sonic-eu1-prod.disco-api.com', realm, country) + + +class DiscoveryPlusShowBaseIE(DPlayBaseIE): + + def _entries(self, show_name): + headers = { + 'x-disco-client': self._X_CLIENT, + 'x-disco-params': f'realm={self._REALM}', + 'referer': self._DOMAIN, + 'Authentication': self._get_auth(self._BASE_API, None, self._REALM), + } + show_json = self._download_json( + f'{self._BASE_API}cms/routes/{self._SHOW_STR}/{show_name}?include=default', + video_id=show_name, headers=headers)['included'][self._INDEX]['attributes']['component'] + show_id = show_json['mandatoryParams'].split('=')[-1] + season_url = self._BASE_API + 'content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}' + for season in show_json['filters'][0]['options']: + season_id = season['id'] + total_pages, page_num = 1, 0 + while page_num < total_pages: + season_json = self._download_json( + season_url.format(season_id, show_id, str(page_num + 1)), show_name, headers=headers, + note='Downloading season %s JSON metadata%s' % (season_id, ' page %d' % page_num if page_num else '')) + if page_num == 0: + total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1 + episodes_json = season_json['data'] + for episode in episodes_json: + video_id = episode['attributes']['path'] + yield self.url_result( + '%svideos/%s' % (self._DOMAIN, video_id), + ie=self._VIDEO_IE.ie_key(), video_id=video_id) + page_num += 1 + + def _real_extract(self, url): + show_name = self._match_valid_url(url).group('show_name') + return self.playlist_result(self._entries(show_name), playlist_id=show_name) + + +class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.discoveryplus.it/programmi/deal-with-it-stai-al-gioco', + 'playlist_mincount': 168, + 'info_dict': { + 'id': 'deal-with-it-stai-al-gioco', + }, + }] + + _BASE_API = 'https://disco-api.discoveryplus.it/' + _DOMAIN = 'https://www.discoveryplus.it/' + _X_CLIENT = 'WEB:UNKNOWN:dplay-client:2.6.0' + _REALM = 'dplayit' + _SHOW_STR = 'programmi' + _INDEX = 1 + _VIDEO_IE = DPlayIE + + +class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE): + _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)' + _TESTS = [{ + 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it', + 'playlist_mincount': 140, + 'info_dict': { + 'id': 'how-do-they-do-it', + }, + }] + + _BASE_API = 'https://ap2-prod-direct.discoveryplus.in/' + _DOMAIN = 'https://www.discoveryplus.in/' + _X_CLIENT = 'WEB:UNKNOWN:dplus-india:prod' + _REALM = 'dplusindia' + _SHOW_STR = 'show' + _INDEX = 4 + _VIDEO_IE = DiscoveryPlusIndiaIE diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py index b4e544d4f..cd19325bc 100644 --- a/yt_dlp/extractor/epicon.py +++ b/yt_dlp/extractor/epicon.py @@ -8,7 +8,7 @@ from ..utils import ExtractorError class EpiconIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar', 'info_dict': { @@ -84,7 +84,7 @@ class EpiconIE(InfoExtractor): class EpiconSeriesIE(InfoExtractor): - _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)' + _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.epicon.in/tv-shows/1-of-something', 'playlist_mincount': 5, diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py index 3980c2349..2759e7436 100644 --- a/yt_dlp/extractor/euscreen.py +++ b/yt_dlp/extractor/euscreen.py @@ -10,7 +10,7 @@ from ..utils import ( class EUScreenIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)' + _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)' _TESTS = [{ 'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C', diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a4baad2da..0741a728f 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -188,6 +188,7 @@ from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE +from .cableav import CableAVIE from .cam4 import CAM4IE from .camdemy import ( CamdemyIE, @@ -341,10 +342,6 @@ from .democracynow import DemocracynowIE from .dfb import DFBIE from .dhm import DHMIE from .digg import DiggIE -from .discoveryplusindia import ( - DiscoveryPlusIndiaIE, - DiscoveryPlusIndiaShowIE, -) from .dotsub import DotsubIE from .douyutv import ( DouyuShowIE, @@ -356,7 +353,11 @@ from .dplay import ( HGTVDeIE, ScienceChannelIE, DIYNetworkIE, - AnimalPlanetIE + AnimalPlanetIE, + DiscoveryPlusIndiaIE, + DiscoveryNetworksDeIE, + DiscoveryPlusItalyShowIE, + DiscoveryPlusIndiaShowIE, ) from .dreisat import DreiSatIE from .drbonanza import DRBonanzaIE @@ -378,7 +379,6 @@ from .discoverygo import ( DiscoveryGoIE, DiscoveryGoPlaylistIE, ) -from .discoverynetworks import DiscoveryNetworksDeIE from .discoveryvr import DiscoveryVRIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE @@ -1216,7 +1216,11 @@ from .redbulltv import ( RedBullIE, ) from .reddit import RedditIE -from .redgifs import RedGifsIE +from .redgifs import ( + RedGifsIE, + RedGifsSearchIE, + RedGifsUserIE, +) from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py index bde6e8624..9ba0b1ca1 100644 --- a/yt_dlp/extractor/gab.py +++ b/yt_dlp/extractor/gab.py @@ -15,7 +15,7 @@ from ..utils import ( class GabTVIE(InfoExtractor): - _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)' + _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)' _TESTS = [{ 'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488', 'info_dict': { diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py index a7792a5e0..58cd59511 100644 --- a/yt_dlp/extractor/gronkh.py +++ b/yt_dlp/extractor/gronkh.py @@ -6,7 +6,7 @@ from ..utils import unified_strdate class GronkhIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)' _TESTS = [{ 'url': 'https://gronkh.tv/stream/536', diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 0bdf772a1..de2b30cf7 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -296,7 +296,7 @@ class HotStarPlaylistIE(HotStarBaseIE): class HotStarSeriesIE(HotStarBaseIE): IE_NAME = 'hotstar:series' - _VALID_URL = r'(?P<url>(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))' + _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))' _TESTS = [{ 'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646', 'info_dict': { diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 1fcf97a19..2ec24f3e7 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -63,6 +63,10 @@ class InstagramBaseIE(InfoExtractor): if not login.get('authenticated'): if login.get('message'): raise ExtractorError(f'Unable to login: {login["message"]}') + elif login.get('user'): + raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True) + elif login.get('user') is False: + raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True) raise ExtractorError('Unable to login') InstagramBaseIE._IS_LOGGED_IN = True @@ -495,7 +499,7 @@ class InstagramUserIE(InstagramPlaylistBaseIE): class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)' - IE_DESC = 'Instagram hashtag search' + IE_DESC = 'Instagram hashtag search URLs' IE_NAME = 'instagram:tag' _TESTS = [{ 'url': 'https://instagram.com/explore/tags/lolcats', diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py index 1706b28a0..088db1cb0 100644 --- a/yt_dlp/extractor/koo.py +++ b/yt_dlp/extractor/koo.py @@ -8,7 +8,7 @@ from ..utils import ( class KooIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)' + _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)' _TESTS = [{ # Test for video in the comments 'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde', 'info_dict': { diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py index 2d65787e2..1d6d4b804 100644 --- a/yt_dlp/extractor/mlssoccer.py +++ b/yt_dlp/extractor/mlssoccer.py @@ -6,7 +6,7 @@ from .common import InfoExtractor class MLSSoccerIE(InfoExtractor): _VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)' - _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS + _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS _TESTS = [{ 'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986', @@ -21,7 +21,6 @@ class MLSSoccerIE(InfoExtractor): 'uploader_id': '5530036772001', 'tags': ['club/canada'], 'is_live': False, - 'duration_string': '5:50', 'upload_date': '20211007', 'filesize_approx': 255193528.83200002 }, diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py index dcd26388a..09fadf8d9 100644 --- a/yt_dlp/extractor/musescore.py +++ b/yt_dlp/extractor/musescore.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class MuseScoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)' + _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)' _TESTS = [{ 'url': 'https://musescore.com/user/73797/scores/142975', 'info_dict': { @@ -13,7 +13,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'WA Mozart Marche Turque (Turkish March fingered)', 'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'PapyPiano', 'creator': 'Wolfgang Amadeus Mozart', } @@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child', 'description': 'md5:4dca71191c14abc312a0a4192492eace', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'roxbelviolin', 'creator': 'Guns N´Roses Arr. Roxbel Violin', } @@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor): 'ext': 'mp3', 'title': 'Für Elise – Beethoven', 'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34', - 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+', + 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+', 'uploader': 'ClassicMan', 'creator': 'Ludwig van Beethoven (1770–1827)', } diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py index 5874556e3..3c2afd838 100644 --- a/yt_dlp/extractor/mxplayer.py +++ b/yt_dlp/extractor/mxplayer.py @@ -180,7 +180,7 @@ class MxplayerIE(InfoExtractor): class MxplayerShowIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])' + _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])' _TESTS = [{ 'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417', 'playlist_mincount': 440, diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4bcea33d5..b46ca293f 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -703,7 +703,7 @@ class NicovideoSearchURLIE(InfoExtractor): class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): - IE_DESC = 'Nico video searches' + IE_DESC = 'Nico video search' IE_NAME = NicovideoSearchIE_NAME _SEARCH_KEY = 'nicosearch' _TESTS = [] @@ -714,7 +714,7 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE): class NicovideoSearchDateIE(NicovideoSearchIE): - IE_DESC = 'Nico video searches, newest first' + IE_DESC = 'Nico video search, newest first' IE_NAME = f'{NicovideoSearchIE_NAME}:date' _SEARCH_KEY = 'nicosearchdate' _TESTS = [{ diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py index 79501003d..826faadd2 100644 --- a/yt_dlp/extractor/onefootball.py +++ b/yt_dlp/extractor/onefootball.py @@ -5,7 +5,7 @@ from .common import InfoExtractor class OneFootballIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)' _TESTS = [{ 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py index d1d9911f7..07ac15b54 100644 --- a/yt_dlp/extractor/planetmarathi.py +++ b/yt_dlp/extractor/planetmarathi.py @@ -9,7 +9,7 @@ from ..utils import ( class PlanetMarathiIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' + _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' _TESTS = [{ 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', 'playlist_mincount': 2, diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py index 1d832a679..9e9867ba5 100644 --- a/yt_dlp/extractor/projectveritas.py +++ b/yt_dlp/extractor/projectveritas.py @@ -10,7 +10,7 @@ from ..utils import ( class ProjectVeritasIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' + _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', 'info_dict': { diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 1257d1344..55196b768 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,21 +1,94 @@ # coding: utf-8 +import functools from .common import InfoExtractor +from ..compat import compat_parse_qs from ..utils import ( ExtractorError, int_or_none, qualities, try_get, + OnDemandPagedList, ) -class RedGifsIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P<id>[^-/?#\.]+)' +class RedGifsBaseInfoExtractor(InfoExtractor): _FORMATS = { 'gif': 250, 'sd': 480, 'hd': None, } + + def _parse_gif_data(self, gif_data): + video_id = gif_data.get('id') + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif_data.get('height')) + aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = gif_data['urls'].get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'webpage_url': f'https://redgifs.com/watch/{video_id}', + 'ie_key': RedGifsIE.ie_key(), + 'extractor': 'RedGifs', + 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif_data.get('createDate')), + 'uploader': gif_data.get('userName'), + 'duration': int_or_none(gif_data.get('duration')), + 'view_count': int_or_none(gif_data.get('views')), + 'like_count': int_or_none(gif_data.get('likes')), + 'categories': gif_data.get('tags') or [], + 'tags': gif_data.get('tags'), + 'age_limit': 18, + 'formats': formats, + } + + def _call_api(self, ep, video_id, *args, **kwargs): + data = self._download_json( + f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs) + if 'error' in data: + raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) + return data + + def _fetch_page(self, ep, video_id, query, page): + query['page'] = page + 1 + data = self._call_api( + ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') + + for entry in data['gifs']: + yield self._parse_gif_data(entry) + + def _prepare_api_query(self, query, fields): + api_query = [ + (field_name, query.get(field_name, (default,))[0]) + for field_name, default in fields.items()] + + return {key: val for key, val in api_query if val is not None} + + def _paged_entries(self, ep, item_id, query, fields): + page = int_or_none(query.get('page', (None,))[0]) + page_fetcher = functools.partial( + self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) + return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) + + +class RedGifsIE(RedGifsBaseInfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)' _TESTS = [{ 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', 'info_dict': { @@ -50,45 +123,110 @@ class RedGifsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url).lower() + video_info = self._call_api( + f'gifs/{video_id}', video_id, note='Downloading video info') + return self._parse_gif_data(video_info['gif']) - video_info = self._download_json( - 'https://api.redgifs.com/v2/gifs/%s' % video_id, - video_id, 'Downloading video info') - if 'error' in video_info: - raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True) - gif = video_info['gif'] - urls = gif['urls'] +class RedGifsSearchIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs search' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)' + _PAGE_SIZE = 80 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/browse?tags=Lesbian', + 'info_dict': { + 'id': 'tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by trending' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian&page=2', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_count': 80, + } + ] - quality = qualities(tuple(self._FORMATS.keys())) + def _real_extract(self, url): + query_str = self._match_valid_url(url).group('query') + query = compat_parse_qs(query_str) + if not query.get('tags'): + raise ExtractorError('Invalid query tags', expected=True) - orig_height = int_or_none(gif.get('height')) - aspect_ratio = try_get(gif, lambda x: orig_height / x['width']) + tags = query.get('tags')[0] + order = query.get('order', ('trending',))[0] - formats = [] - for format_id, height in self._FORMATS.items(): - video_url = urls.get(format_id) - if not video_url: - continue - height = min(orig_height, height or orig_height) - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'width': height * aspect_ratio if aspect_ratio else None, - 'height': height, - 'quality': quality(format_id), - }) - self._sort_formats(formats) + query['search_text'] = [tags] + entries = self._paged_entries('gifs/search', query_str, query, { + 'search_text': None, + 'order': 'trending', + 'type': None, + }) - return { - 'id': video_id, - 'title': ' '.join(gif.get('tags') or []) or 'RedGifs', - 'timestamp': int_or_none(gif.get('createDate')), - 'uploader': gif.get('userName'), - 'duration': int_or_none(gif.get('duration')), - 'view_count': int_or_none(gif.get('views')), - 'like_count': int_or_none(gif.get('likes')), - 'categories': gif.get('tags') or [], - 'age_limit': 18, - 'formats': formats, + return self.playlist_result( + entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') + + +class RedGifsUserIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs user' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' + _PAGE_SIZE = 30 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/users/lamsinka89', + 'info_dict': { + 'id': 'lamsinka89', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', + 'info_dict': { + 'id': 'lamsinka89?page=3', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_count': 30, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', + 'info_dict': { + 'id': 'lamsinka89?order=best&type=g', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by best' + }, + 'playlist_mincount': 100, } + ] + + def _real_extract(self, url): + username, query_str = self._match_valid_url(url).group('username', 'query') + playlist_id = f'{username}?{query_str}' if query_str else username + + query = compat_parse_qs(query_str) + order = query.get('order', ('recent',))[0] + + entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { + 'order': 'recent', + 'type': None, + }) + + return self.playlist_result( + entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}') diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py index 142d5dc3a..00a5b00cd 100644 --- a/yt_dlp/extractor/shemaroome.py +++ b/yt_dlp/extractor/shemaroome.py @@ -16,7 +16,7 @@ from ..utils import ( class ShemarooMeIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)' + _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)' _TESTS = [{ 'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara', 'info_dict': { @@ -78,7 +78,7 @@ class ShemarooMeIE(InfoExtractor): iv = [0] * 16 m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv)) m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii') - formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) + formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']}) self._sort_formats(formats) release_date = self._html_search_regex( @@ -91,6 +91,7 @@ class ShemarooMeIE(InfoExtractor): subtitles.setdefault('EN', []).append({ 'url': self._proto_relative_url(sub_url), }) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False) return { diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py index b1d77951e..8e079ee31 100644 --- a/yt_dlp/extractor/skynewsau.py +++ b/yt_dlp/extractor/skynewsau.py @@ -9,7 +9,7 @@ from ..utils import ( class SkyNewsAUIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71', diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py index 60e84529d..fe6a9554a 100644 --- a/yt_dlp/extractor/threespeak.py +++ b/yt_dlp/extractor/threespeak.py @@ -11,7 +11,7 @@ from ..utils import ( class ThreeSpeakIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)' + _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)' _TESTS = [{ 'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy', @@ -75,7 +75,7 @@ class ThreeSpeakIE(InfoExtractor): class ThreeSpeakUserIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)' + _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)' _TESTS = [{ 'url': 'https://3speak.tv/user/theycallmedan', diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py index a0f0cc31c..9d49840a5 100644 --- a/yt_dlp/extractor/trovo.py +++ b/yt_dlp/extractor/trovo.py @@ -17,6 +17,11 @@ class TrovoBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/' _HEADERS = {'Origin': 'https://trovo.live'} + def _call_api(self, video_id, query=None, data=None): + return self._download_json( + 'https://gql.trovo.live/', video_id, query=query, data=data, + headers={'Accept': 'application/json'}) + def _extract_streamer_info(self, data): streamer_info = data.get('streamerInfo') or {} username = streamer_info.get('userName') @@ -32,9 +37,8 @@ class TrovoIE(TrovoBaseIE): def _real_extract(self, url): username = self._match_id(url) - live_info = self._download_json( - 'https://gql.trovo.live/', username, query={ - 'query': '''{ + live_info = self._call_api(username, query={ + 'query': '''{ getLiveInfo(params: {userName: "%s"}) { isLive programInfo { @@ -53,7 +57,7 @@ class TrovoIE(TrovoBaseIE): } } }''' % username, - })['data']['getLiveInfo'] + })['data']['getLiveInfo'] if live_info.get('isLive') == 0: raise ExtractorError('%s is offline' % username, expected=True) program_info = live_info['programInfo'] @@ -111,15 +115,14 @@ class TrovoVodIE(TrovoBaseIE): def _real_extract(self, url): vid = self._match_id(url) - resp = self._download_json( - 'https://gql.trovo.live/', vid, data=json.dumps([{ - 'query': '''{ + resp = self._call_api(vid, data=json.dumps([{ + 'query': '''{ batchGetVodDetailInfo(params: {vids: ["%s"]}) { VodDetailInfos } }''' % vid, - }, { - 'query': '''{ + }, { + 'query': '''{ getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) { commentList { author { @@ -133,9 +136,7 @@ class TrovoVodIE(TrovoBaseIE): } } }''' % vid, - }]).encode(), headers={ - 'Content-Type': 'application/json', - }) + }]).encode()) vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid] vod_info = vod_detail_info['vodInfo'] title = vod_info['title'] @@ -215,7 +216,7 @@ class TrovoChannelBaseIE(InfoExtractor): def _real_extract(self, url): id = self._match_id(url) - uid = str(self._download_json('https://gql.trovo.live/', id, query={ + uid = str(self._call_api(id, query={ 'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id })['data']['getLiveInfo']['streamerInfo']['uid']) return self.playlist_result(self._entries(uid), playlist_id=uid) @@ -237,7 +238,7 @@ class TrovoChannelVodIE(TrovoChannelBaseIE): _TYPE = 'video' def _get_vod_json(self, page, uid): - return self._download_json('https://gql.trovo.live/', uid, query={ + return self._call_api(uid, query={ 'query': self._QUERY % (page, uid) })['data']['getChannelLtvVideoInfos'] @@ -258,6 +259,6 @@ class TrovoChannelClipIE(TrovoChannelBaseIE): _TYPE = 'clip' def _get_vod_json(self, page, uid): - return self._download_json('https://gql.trovo.live/', uid, query={ + return self._call_api(uid, query={ 'query': self._QUERY % (page, uid) })['data']['getChannelClipVideoInfos'] diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py index 4a25f0c55..4986635f2 100644 --- a/yt_dlp/extractor/utreon.py +++ b/yt_dlp/extractor/utreon.py @@ -13,7 +13,7 @@ from ..utils import ( class UtreonIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)' + _VALID_URL = r'https?://(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)' _TESTS = [{ 'url': 'https://utreon.com/v/z_I7ikQbuDw', 'info_dict': { diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index e2b86662b..27d5c969d 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -119,10 +119,9 @@ class VimeoBaseInfoExtractor(InfoExtractor): self._set_cookie('vimeo.com', name, value) def _vimeo_sort_formats(self, formats): - # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps - # at the same time without actual units specified. This lead to wrong sorting. - # But since yt-dlp prefers 'res,fps' anyway, 'field_preference' is not needed - self._sort_formats(formats) + # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps + # at the same time without actual units specified. + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source')) def _parse_config(self, config, video_id): video_data = config['video'] @@ -140,6 +139,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): formats.append({ 'url': video_url, 'format_id': 'http-%s' % f.get('quality'), + 'source_preference': 10, 'width': int_or_none(f.get('width')), 'height': int_or_none(f.get('height')), 'fps': int_or_none(f.get('fps')), diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py index e2944ec63..a9b66b95c 100644 --- a/yt_dlp/extractor/voot.py +++ b/yt_dlp/extractor/voot.py @@ -15,7 +15,7 @@ class VootIE(InfoExtractor): _VALID_URL = r'''(?x) (?: voot:| - (?:https?://)(?:www\.)?voot\.com/? + https?://(?:www\.)?voot\.com/? (?: movies/[^/]+/| (?:shows|kids)/(?:[^/]+/){4} diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index ba135613b..e4854bead 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2514,7 +2514,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): query = parse_qs(fmt_url) throttled = False - if query.get('ratebypass') != ['yes'] and query.get('n'): + if query.get('n'): try: fmt_url = update_url_query(fmt_url, { 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py index 536604167..462bc4efe 100644 --- a/yt_dlp/extractor/zee5.py +++ b/yt_dlp/extractor/zee5.py @@ -21,7 +21,7 @@ class Zee5IE(InfoExtractor): _VALID_URL = r'''(?x) (?: zee5:| - (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)? + https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?: (?:tvshows|kids|zee5originals)(?:/[^#/?]+){3} |movies/[^#/?]+ @@ -174,7 +174,7 @@ class Zee5SeriesIE(InfoExtractor): _VALID_URL = r'''(?x) (?: zee5:series:| - (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)? + https?://(?:www\.)?zee5\.com/(?:[^#?]+/)? (?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/ ) (?P<id>[^#/?]+)/?(?:$|[?#]) |