aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/amazon.py2
-rw-r--r--yt_dlp/extractor/bbc.py3
-rw-r--r--yt_dlp/extractor/bilibili.py3
-rw-r--r--yt_dlp/extractor/cableav.py34
-rw-r--r--yt_dlp/extractor/canalalpha.py2
-rw-r--r--yt_dlp/extractor/chingari.py4
-rw-r--r--yt_dlp/extractor/common.py41
-rw-r--r--yt_dlp/extractor/cozytv.py2
-rw-r--r--yt_dlp/extractor/discoverynetworks.py41
-rw-r--r--yt_dlp/extractor/discoveryplusindia.py97
-rw-r--r--yt_dlp/extractor/dplay.py469
-rw-r--r--yt_dlp/extractor/epicon.py4
-rw-r--r--yt_dlp/extractor/euscreen.py2
-rw-r--r--yt_dlp/extractor/extractors.py18
-rw-r--r--yt_dlp/extractor/gab.py2
-rw-r--r--yt_dlp/extractor/gronkh.py2
-rw-r--r--yt_dlp/extractor/hotstar.py2
-rw-r--r--yt_dlp/extractor/instagram.py6
-rw-r--r--yt_dlp/extractor/koo.py2
-rw-r--r--yt_dlp/extractor/mlssoccer.py3
-rw-r--r--yt_dlp/extractor/musescore.py8
-rw-r--r--yt_dlp/extractor/mxplayer.py2
-rw-r--r--yt_dlp/extractor/niconico.py4
-rw-r--r--yt_dlp/extractor/onefootball.py2
-rw-r--r--yt_dlp/extractor/planetmarathi.py2
-rw-r--r--yt_dlp/extractor/projectveritas.py2
-rw-r--r--yt_dlp/extractor/redgifs.py212
-rw-r--r--yt_dlp/extractor/shemaroome.py5
-rw-r--r--yt_dlp/extractor/skynewsau.py2
-rw-r--r--yt_dlp/extractor/threespeak.py4
-rw-r--r--yt_dlp/extractor/trovo.py31
-rw-r--r--yt_dlp/extractor/utreon.py2
-rw-r--r--yt_dlp/extractor/vimeo.py8
-rw-r--r--yt_dlp/extractor/voot.py2
-rw-r--r--yt_dlp/extractor/youtube.py2
-rw-r--r--yt_dlp/extractor/zee5.py4
36 files changed, 636 insertions, 395 deletions
diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py
index 7c5d35f47..07b1b1861 100644
--- a/yt_dlp/extractor/amazon.py
+++ b/yt_dlp/extractor/amazon.py
@@ -4,7 +4,7 @@ from ..utils import int_or_none
class AmazonStoreIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
+ _VALID_URL = r'https?://(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P<id>[^/&#$?]+)'
_TESTS = [{
'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/',
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 672ed1ffe..85ab478a6 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -472,8 +472,7 @@ class BBCCoUkIE(InfoExtractor):
f['language_preference'] = -10
formats += version_formats
for tag, subformats in (version_subtitles or {}).items():
- subtitles.setdefault(tag, [])
- subtitles[tag] += subformats
+ subtitles.setdefault(tag, []).extend(subformats)
return programme_id, title, description, duration, formats, subtitles
except ExtractorError as ee:
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index 483f93d67..e019ec6a8 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -346,7 +346,8 @@ class BiliBiliIE(InfoExtractor):
def _extract_anthology_entries(self, bv_id, video_id, webpage):
title = self._html_search_regex(
(r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ r'(?s)<h1[^>]*>(?P<title>.+?)</h1>',
+ r'<title>(?P<title>.+?)</title>'), webpage, 'title',
group='title')
json_data = self._download_json(
f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
diff --git a/yt_dlp/extractor/cableav.py b/yt_dlp/extractor/cableav.py
new file mode 100644
index 000000000..77efdf45a
--- /dev/null
+++ b/yt_dlp/extractor/cableav.py
@@ -0,0 +1,34 @@
+# coding: utf-8
+from .common import InfoExtractor
+
+
+class CableAVIE(InfoExtractor):
+ _VALID_URL = r'https://cableav\.tv/(?P<id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://cableav.tv/lS4iR9lWjN8/',
+ 'md5': '7e3fe5e49d61c4233b7f5b0f69b15e18',
+ 'info_dict': {
+ 'id': 'lS4iR9lWjN8',
+ 'ext': 'mp4',
+ 'title': '國產麻豆AV 叮叮映畫 DDF001 情欲小說家 - CableAV',
+ 'description': '國產AV 480p, 720p 国产麻豆AV 叮叮映画 DDF001 情欲小说家',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._og_search_video_url(webpage, secure=False)
+
+ formats = self._extract_m3u8_formats(video_url, video_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py
index 7287677c1..51d30a321 100644
--- a/yt_dlp/extractor/canalalpha.py
+++ b/yt_dlp/extractor/canalalpha.py
@@ -11,7 +11,7 @@ from ..utils import (
class CanalAlphaIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
+ _VALID_URL = r'https?://(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P<id>\d+)/?.*'
_TESTS = [{
'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021',
diff --git a/yt_dlp/extractor/chingari.py b/yt_dlp/extractor/chingari.py
index 6bdc4f6bb..e6841fb8b 100644
--- a/yt_dlp/extractor/chingari.py
+++ b/yt_dlp/extractor/chingari.py
@@ -67,7 +67,7 @@ class ChingariBaseIE(InfoExtractor):
class ChingariIE(ChingariBaseIE):
- _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
+ _VALID_URL = r'https?://(?:www\.)?chingari\.io/share/post\?id=(?P<id>[^&/#?]+)'
_TESTS = [{
'url': 'https://chingari.io/share/post?id=612f8f4ce1dc57090e8a7beb',
'info_dict': {
@@ -102,7 +102,7 @@ class ChingariIE(ChingariBaseIE):
class ChingariUserIE(ChingariBaseIE):
- _VALID_URL = r'(?:https?://)(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
+ _VALID_URL = r'https?://(?:www\.)?chingari\.io/(?!share/post)(?P<id>[^/?]+)'
_TESTS = [{
'url': 'https://chingari.io/dada1023',
'playlist_mincount': 3,
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index fc28bca2e..37e69d409 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1079,7 +1079,8 @@ class InfoExtractor(object):
def raise_login_required(
self, msg='This video is only available for registered users',
metadata_available=False, method='any'):
- if metadata_available and self.get_param('ignore_no_formats_error'):
+ if metadata_available and (
+ self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
if method is not None:
msg = '%s. %s' % (msg, self._LOGIN_HINTS[method])
@@ -1088,13 +1089,15 @@ class InfoExtractor(object):
def raise_geo_restricted(
self, msg='This video is not available from your location due to geo restriction',
countries=None, metadata_available=False):
- if metadata_available and self.get_param('ignore_no_formats_error'):
+ if metadata_available and (
+ self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg)
else:
raise GeoRestrictedError(msg, countries=countries)
def raise_no_formats(self, msg, expected=False, video_id=None):
- if expected and self.get_param('ignore_no_formats_error'):
+ if expected and (
+ self.get_param('ignore_no_formats_error') or self.get_param('wait_for_video')):
self.report_warning(msg, video_id)
elif isinstance(msg, ExtractorError):
raise msg
@@ -1535,10 +1538,10 @@ class InfoExtractor(object):
default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
'res', 'fps', 'hdr:12', 'codec:vp9.2', 'size', 'br', 'asr',
- 'proto', 'ext', 'hasaud', 'source', 'format_id') # These must not be aliases
+ 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
'height', 'width', 'proto', 'vext', 'abr', 'aext',
- 'fps', 'fs_approx', 'source', 'format_id')
+ 'fps', 'fs_approx', 'source', 'id')
settings = {
'vcodec': {'type': 'ordered', 'regex': True,
@@ -1548,7 +1551,7 @@ class InfoExtractor(object):
'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
- 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.+', '.*dash', 'ws|websocket', '', 'mms|rtsp', 'none', 'f4']},
+ 'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
'vext': {'type': 'ordered', 'field': 'video_ext',
'order': ('mp4', 'webm', 'flv', '', 'none'),
'order_free': ('webm', 'mp4', 'flv', '', 'none')},
@@ -1583,7 +1586,7 @@ class InfoExtractor(object):
'res': {'type': 'multiple', 'field': ('height', 'width'),
'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
- # Most of these exist only for compatibility reasons
+ # Deprecated
'dimension': {'type': 'alias', 'field': 'res'},
'resolution': {'type': 'alias', 'field': 'res'},
'extension': {'type': 'alias', 'field': 'ext'},
@@ -1592,7 +1595,7 @@ class InfoExtractor(object):
'video_bitrate': {'type': 'alias', 'field': 'vbr'},
'audio_bitrate': {'type': 'alias', 'field': 'abr'},
'framerate': {'type': 'alias', 'field': 'fps'},
- 'language_preference': {'type': 'alias', 'field': 'lang'}, # not named as 'language' because such a field exists
+ 'language_preference': {'type': 'alias', 'field': 'lang'},
'protocol': {'type': 'alias', 'field': 'proto'},
'source_preference': {'type': 'alias', 'field': 'source'},
'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
@@ -1612,10 +1615,20 @@ class InfoExtractor(object):
'format_id': {'type': 'alias', 'field': 'id'},
}
- _order = []
+ def __init__(self, ie, field_preference):
+ self._order = []
+ self.ydl = ie._downloader
+ self.evaluate_params(self.ydl.params, field_preference)
+ if ie.get_param('verbose'):
+ self.print_verbose_info(self.ydl.write_debug)
def _get_field_setting(self, field, key):
if field not in self.settings:
+ if key in ('forced', 'priority'):
+ return False
+ self.ydl.deprecation_warning(
+ f'Using arbitrary fields ({field}) for format sorting is deprecated '
+ 'and may be removed in a future version')
self.settings[field] = {}
propObj = self.settings[field]
if key not in propObj:
@@ -1698,7 +1711,10 @@ class InfoExtractor(object):
if field is None:
continue
if self._get_field_setting(field, 'type') == 'alias':
- field = self._get_field_setting(field, 'field')
+ alias, field = field, self._get_field_setting(field, 'field')
+ self.ydl.deprecation_warning(
+ f'Format sorting alias {alias} is deprecated '
+ f'and may be removed in a future version. Please use {field} instead')
reverse = match.group('reverse') is not None
closest = match.group('separator') == '~'
limit_text = match.group('limit')
@@ -1802,10 +1818,7 @@ class InfoExtractor(object):
def _sort_formats(self, formats, field_preference=[]):
if not formats:
return
- format_sort = self.FormatSort() # params and to_screen are taken from the downloader
- format_sort.evaluate_params(self._downloader.params, field_preference)
- if self.get_param('verbose', False):
- format_sort.print_verbose_info(self._downloader.write_debug)
+ format_sort = self.FormatSort(self, field_preference)
formats.sort(key=lambda f: format_sort.calculate_preference(f))
def _check_formats(self, formats, video_id):
diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py
index 868d8d27d..d49f1ca74 100644
--- a/yt_dlp/extractor/cozytv.py
+++ b/yt_dlp/extractor/cozytv.py
@@ -6,7 +6,7 @@ from ..utils import unified_strdate
class CozyTVIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
+ _VALID_URL = r'https?://(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)'
_TESTS = [{
'url': 'https://cozy.tv/beardson/replays/2021-11-19_1',
diff --git a/yt_dlp/extractor/discoverynetworks.py b/yt_dlp/extractor/discoverynetworks.py
deleted file mode 100644
index 4f8bdf0b9..000000000
--- a/yt_dlp/extractor/discoverynetworks.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
-from .dplay import DPlayIE
-
-
-class DiscoveryNetworksDeIE(DPlayIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
-
- _TESTS = [{
- 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
- 'info_dict': {
- 'id': '78867',
- 'ext': 'mp4',
- 'title': 'Die Welt da draußen',
- 'description': 'md5:61033c12b73286e409d99a41742ef608',
- 'timestamp': 1554069600,
- 'upload_date': '20190331',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
- 'only_matching': True,
- }, {
- 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
- 'only_matching': True,
- }, {
- 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- domain, programme, alternate_id = self._match_valid_url(url).groups()
- country = 'GB' if domain == 'dplay.co.uk' else 'DE'
- realm = 'questuk' if country == 'GB' else domain.replace('.', '')
- return self._get_disco_api_info(
- url, '%s/%s' % (programme, alternate_id),
- 'sonic-eu1-prod.disco-api.com', realm, country)
diff --git a/yt_dlp/extractor/discoveryplusindia.py b/yt_dlp/extractor/discoveryplusindia.py
deleted file mode 100644
index 8ec418a97..000000000
--- a/yt_dlp/extractor/discoveryplusindia.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from ..compat import compat_str
-from ..utils import try_get
-from .common import InfoExtractor
-from .dplay import DPlayIE
-
-
-class DiscoveryPlusIndiaIE(DPlayIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
- _TESTS = [{
- 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
- 'info_dict': {
- 'id': '27104',
- 'ext': 'mp4',
- 'display_id': 'how-do-they-do-it/fugu-and-more',
- 'title': 'Fugu and More',
- 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
- 'duration': 1319,
- 'timestamp': 1582309800,
- 'upload_date': '20200221',
- 'series': 'How Do They Do It?',
- 'season_number': 8,
- 'episode_number': 2,
- 'creator': 'Discovery Channel',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Cookies (not necessarily logged in) are needed'
- }]
-
- def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers['x-disco-params'] = 'realm=%s' % realm
- headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
-
- def _download_video_playback_info(self, disco_base, video_id, headers):
- return self._download_json(
- disco_base + 'playback/v3/videoPlaybackInfo',
- video_id, headers=headers, data=json.dumps({
- 'deviceInfo': {
- 'adBlocker': False,
- },
- 'videoId': video_id,
- }).encode('utf-8'))['data']['attributes']['streaming']
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- return self._get_disco_api_info(
- url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
-
-
-class DiscoveryPlusIndiaShowIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
- _TESTS = [{
- 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
- 'playlist_mincount': 140,
- 'info_dict': {
- 'id': 'how-do-they-do-it',
- },
- }]
-
- def _entries(self, show_name):
- headers = {
- 'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
- 'x-disco-params': 'realm=dplusindia',
- 'referer': 'https://www.discoveryplus.in/',
- }
- show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
- show_json = self._download_json(show_url,
- video_id=show_name,
- headers=headers)['included'][4]['attributes']['component']
- show_id = show_json['mandatoryParams'].split('=')[-1]
- season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
- for season in show_json['filters'][0]['options']:
- season_id = season['id']
- total_pages, page_num = 1, 0
- while page_num < total_pages:
- season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
- video_id=show_id, headers=headers,
- note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
- if page_num == 0:
- total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
- episodes_json = season_json['data']
- for episode in episodes_json:
- video_id = episode['attributes']['path']
- yield self.url_result(
- 'https://discoveryplus.in/videos/%s' % video_id,
- ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
- page_num += 1
-
- def _real_extract(self, url):
- show_name = self._match_valid_url(url).group('show_name')
- return self.playlist_result(self._entries(show_name), playlist_id=show_name)
diff --git a/yt_dlp/extractor/dplay.py b/yt_dlp/extractor/dplay.py
index 525c8e243..f5d6540c0 100644
--- a/yt_dlp/extractor/dplay.py
+++ b/yt_dlp/extractor/dplay.py
@@ -2,6 +2,7 @@
from __future__ import unicode_literals
import json
+import uuid
from .common import InfoExtractor
from ..compat import compat_HTTPError
@@ -11,12 +12,172 @@ from ..utils import (
float_or_none,
int_or_none,
strip_or_none,
+ try_get,
unified_timestamp,
)
-class DPlayIE(InfoExtractor):
+class DPlayBaseIE(InfoExtractor):
_PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
+ _auth_token_cache = {}
+
+ def _get_auth(self, disco_base, display_id, realm, needs_device_id=True):
+ key = (disco_base, realm)
+ st = self._get_cookies(disco_base).get('st')
+ token = (st and st.value) or self._auth_token_cache.get(key)
+
+ if not token:
+ query = {'realm': realm}
+ if needs_device_id:
+ query['deviceId'] = uuid.uuid4().hex
+ token = self._download_json(
+ disco_base + 'token', display_id, 'Downloading token',
+ query=query)['data']['attributes']['token']
+
+ # Save cache only if cookies are not being set
+ if not self._get_cookies(disco_base).get('st'):
+ self._auth_token_cache[key] = token
+
+ return f'Bearer {token}'
+
+ def _process_errors(self, e, geo_countries):
+ info = self._parse_json(e.cause.read().decode('utf-8'), None)
+ error = info['errors'][0]
+ error_code = error.get('code')
+ if error_code == 'access.denied.geoblocked':
+ self.raise_geo_restricted(countries=geo_countries)
+ elif error_code in ('access.denied.missingpackage', 'invalid.token'):
+ raise ExtractorError(
+ 'This video is only available for registered users. You may want to use --cookies.', expected=True)
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers['Authorization'] = self._get_auth(disco_base, display_id, realm, False)
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ streaming = self._download_json(
+ disco_base + 'playback/videoPlaybackInfo/' + video_id,
+ video_id, headers=headers)['data']['attributes']['streaming']
+ streaming_list = []
+ for format_id, format_dict in streaming.items():
+ streaming_list.append({
+ 'type': format_id,
+ 'url': format_dict.get('url'),
+ })
+ return streaming_list
+
+ def _get_disco_api_info(self, url, display_id, disco_host, realm, country, domain=''):
+ geo_countries = [country.upper()]
+ self._initialize_geo_bypass({
+ 'countries': geo_countries,
+ })
+ disco_base = 'https://%s/' % disco_host
+ headers = {
+ 'Referer': url,
+ }
+ self._update_disco_api_headers(headers, disco_base, display_id, realm)
+ try:
+ video = self._download_json(
+ disco_base + 'content/videos/' + display_id, display_id,
+ headers=headers, query={
+ 'fields[channel]': 'name',
+ 'fields[image]': 'height,src,width',
+ 'fields[show]': 'name',
+ 'fields[tag]': 'name',
+ 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+ 'include': 'images,primaryChannel,show,tags'
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ self._process_errors(e, geo_countries)
+ raise
+ video_id = video['data']['id']
+ info = video['data']['attributes']
+ title = info['name'].strip()
+ formats = []
+ subtitles = {}
+ try:
+ streaming = self._download_video_playback_info(
+ disco_base, video_id, headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self._process_errors(e, geo_countries)
+ raise
+ for format_dict in streaming:
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url:
+ continue
+ format_id = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if format_id == 'dash' or ext == 'mpd':
+ dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles(
+ format_url, display_id, mpd_id='dash', fatal=False)
+ formats.extend(dash_fmts)
+ subtitles = self._merge_subtitles(subtitles, dash_subs)
+ elif format_id == 'hls' or ext == 'm3u8':
+ m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
+ format_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False)
+ formats.extend(m3u8_fmts)
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ creator = series = None
+ tags = []
+ thumbnails = []
+ included = video.get('included') or []
+ if isinstance(included, list):
+ for e in included:
+ attributes = e.get('attributes')
+ if not attributes:
+ continue
+ e_type = e.get('type')
+ if e_type == 'channel':
+ creator = attributes.get('name')
+ elif e_type == 'image':
+ src = attributes.get('src')
+ if src:
+ thumbnails.append({
+ 'url': src,
+ 'width': int_or_none(attributes.get('width')),
+ 'height': int_or_none(attributes.get('height')),
+ })
+ if e_type == 'show':
+ series = attributes.get('name')
+ elif e_type == 'tag':
+ name = attributes.get('name')
+ if name:
+ tags.append(name)
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': strip_or_none(info.get('description')),
+ 'duration': float_or_none(info.get('videoDuration'), 1000),
+ 'timestamp': unified_timestamp(info.get('publishStart')),
+ 'series': series,
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ 'creator': creator,
+ 'tags': tags,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'http_headers': {
+ 'referer': domain,
+ },
+ }
+
+
+class DPlayIE(DPlayBaseIE):
_VALID_URL = r'''(?x)https?://
(?P<domain>
(?:www\.)?(?P<host>d
@@ -26,7 +187,7 @@ class DPlayIE(InfoExtractor):
)
)|
(?P<subdomain_country>es|it)\.dplay\.com
- )/[^/]+''' + _PATH_REGEX
+ )/[^/]+''' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL
@@ -150,138 +311,6 @@ class DPlayIE(InfoExtractor):
'only_matching': True,
}]
- def _process_errors(self, e, geo_countries):
- info = self._parse_json(e.cause.read().decode('utf-8'), None)
- error = info['errors'][0]
- error_code = error.get('code')
- if error_code == 'access.denied.geoblocked':
- self.raise_geo_restricted(countries=geo_countries)
- elif error_code in ('access.denied.missingpackage', 'invalid.token'):
- raise ExtractorError(
- 'This video is only available for registered users. You may want to use --cookies.', expected=True)
- raise ExtractorError(info['errors'][0]['detail'], expected=True)
-
- def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
- headers['Authorization'] = 'Bearer ' + self._download_json(
- disco_base + 'token', display_id, 'Downloading token',
- query={
- 'realm': realm,
- })['data']['attributes']['token']
-
- def _download_video_playback_info(self, disco_base, video_id, headers):
- streaming = self._download_json(
- disco_base + 'playback/videoPlaybackInfo/' + video_id,
- video_id, headers=headers)['data']['attributes']['streaming']
- streaming_list = []
- for format_id, format_dict in streaming.items():
- streaming_list.append({
- 'type': format_id,
- 'url': format_dict.get('url'),
- })
- return streaming_list
-
- def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
- geo_countries = [country.upper()]
- self._initialize_geo_bypass({
- 'countries': geo_countries,
- })
- disco_base = 'https://%s/' % disco_host
- headers = {
- 'Referer': url,
- }
- self._update_disco_api_headers(headers, disco_base, display_id, realm)
- try:
- video = self._download_json(
- disco_base + 'content/videos/' + display_id, display_id,
- headers=headers, query={
- 'fields[channel]': 'name',
- 'fields[image]': 'height,src,width',
- 'fields[show]': 'name',
- 'fields[tag]': 'name',
- 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
- 'include': 'images,primaryChannel,show,tags'
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- self._process_errors(e, geo_countries)
- raise
- video_id = video['data']['id']
- info = video['data']['attributes']
- title = info['name'].strip()
- formats = []
- try:
- streaming = self._download_video_playback_info(
- disco_base, video_id, headers)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self._process_errors(e, geo_countries)
- raise
- for format_dict in streaming:
- if not isinstance(format_dict, dict):
- continue
- format_url = format_dict.get('url')
- if not format_url:
- continue
- format_id = format_dict.get('type')
- ext = determine_ext(format_url)
- if format_id == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, display_id, mpd_id='dash', fatal=False))
- elif format_id == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, display_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- creator = series = None
- tags = []
- thumbnails = []
- included = video.get('included') or []
- if isinstance(included, list):
- for e in included:
- attributes = e.get('attributes')
- if not attributes:
- continue
- e_type = e.get('type')
- if e_type == 'channel':
- creator = attributes.get('name')
- elif e_type == 'image':
- src = attributes.get('src')
- if src:
- thumbnails.append({
- 'url': src,
- 'width': int_or_none(attributes.get('width')),
- 'height': int_or_none(attributes.get('height')),
- })
- if e_type == 'show':
- series = attributes.get('name')
- elif e_type == 'tag':
- name = attributes.get('name')
- if name:
- tags.append(name)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': strip_or_none(info.get('description')),
- 'duration': float_or_none(info.get('videoDuration'), 1000),
- 'timestamp': unified_timestamp(info.get('publishStart')),
- 'series': series,
- 'season_number': int_or_none(info.get('seasonNumber')),
- 'episode_number': int_or_none(info.get('episodeNumber')),
- 'creator': creator,
- 'tags': tags,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
-
def _real_extract(self, url):
mobj = self._match_valid_url(url)
display_id = mobj.group('id')
@@ -289,11 +318,11 @@ class DPlayIE(InfoExtractor):
country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
return self._get_disco_api_info(
- url, display_id, host, 'dplay' + country, country)
+ url, display_id, host, 'dplay' + country, country, domain)
-class HGTVDeIE(DPlayIE):
- _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
+class HGTVDeIE(DPlayBaseIE):
+ _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
'info_dict': {
@@ -318,8 +347,8 @@ class HGTVDeIE(DPlayIE):
url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')
-class DiscoveryPlusIE(DPlayIE):
- _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayIE._PATH_REGEX
+class DiscoveryPlusIE(DPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/(?:\w{2}/)?video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
'info_dict': {
@@ -369,7 +398,7 @@ class DiscoveryPlusIE(DPlayIE):
class ScienceChannelIE(DiscoveryPlusIE):
- _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:www\.)?sciencechannel\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.sciencechannel.com/video/strangest-things-science-atve-us/nazi-mystery-machine',
'info_dict': {
@@ -389,7 +418,7 @@ class ScienceChannelIE(DiscoveryPlusIE):
class DIYNetworkIE(DiscoveryPlusIE):
- _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:watch\.)?diynetwork\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://watch.diynetwork.com/video/pool-kings-diy-network/bringing-beach-life-to-texas',
'info_dict': {
@@ -409,7 +438,7 @@ class DIYNetworkIE(DiscoveryPlusIE):
class AnimalPlanetIE(DiscoveryPlusIE):
- _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayIE._PATH_REGEX
+ _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/video' + DPlayBaseIE._PATH_REGEX
_TESTS = [{
'url': 'https://www.animalplanet.com/video/north-woods-law-animal-planet/squirrel-showdown',
'info_dict': {
@@ -426,3 +455,159 @@ class AnimalPlanetIE(DiscoveryPlusIE):
_PRODUCT = 'apl'
_API_URL = 'us1-prod-direct.animalplanet.com'
+
+
+class DiscoveryPlusIndiaIE(DPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayBaseIE._PATH_REGEX
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
+ 'info_dict': {
+ 'id': '27104',
+ 'ext': 'mp4',
+ 'display_id': 'how-do-they-do-it/fugu-and-more',
+ 'title': 'Fugu and More',
+ 'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
+ 'duration': 1319,
+ 'timestamp': 1582309800,
+ 'upload_date': '20200221',
+ 'series': 'How Do They Do It?',
+ 'season_number': 8,
+ 'episode_number': 2,
+ 'creator': 'Discovery Channel',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
+ headers.update({
+ 'x-disco-params': 'realm=%s' % realm,
+ 'x-disco-client': 'WEB:UNKNOWN:dplus-india:17.0.0',
+ 'Authorization': self._get_auth(disco_base, display_id, realm),
+ })
+
+ def _download_video_playback_info(self, disco_base, video_id, headers):
+ return self._download_json(
+ disco_base + 'playback/v3/videoPlaybackInfo',
+ video_id, headers=headers, data=json.dumps({
+ 'deviceInfo': {
+ 'adBlocker': False,
+ },
+ 'videoId': video_id,
+ }).encode('utf-8'))['data']['attributes']['streaming']
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ return self._get_disco_api_info(
+ url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in', 'https://www.discoveryplus.in/')
+
+
+class DiscoveryNetworksDeIE(DPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show|sendungen)/(?P<programme>[^/]+)/(?:video/)?(?P<alternate_id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
+ 'info_dict': {
+ 'id': '78867',
+ 'ext': 'mp4',
+ 'title': 'Die Welt da draußen',
+ 'description': 'md5:61033c12b73286e409d99a41742ef608',
+ 'timestamp': 1554069600,
+ 'upload_date': '20190331',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tlc.de/sendungen/breaking-amish/die-welt-da-drauen/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, programme, alternate_id = self._match_valid_url(url).groups()
+ country = 'GB' if domain == 'dplay.co.uk' else 'DE'
+ realm = 'questuk' if country == 'GB' else domain.replace('.', '')
+ return self._get_disco_api_info(
+ url, '%s/%s' % (programme, alternate_id),
+ 'sonic-eu1-prod.disco-api.com', realm, country)
+
+
+class DiscoveryPlusShowBaseIE(DPlayBaseIE):
+
+ def _entries(self, show_name):
+ headers = {
+ 'x-disco-client': self._X_CLIENT,
+ 'x-disco-params': f'realm={self._REALM}',
+ 'referer': self._DOMAIN,
+ 'Authentication': self._get_auth(self._BASE_API, None, self._REALM),
+ }
+ show_json = self._download_json(
+ f'{self._BASE_API}cms/routes/{self._SHOW_STR}/{show_name}?include=default',
+ video_id=show_name, headers=headers)['included'][self._INDEX]['attributes']['component']
+ show_id = show_json['mandatoryParams'].split('=')[-1]
+ season_url = self._BASE_API + 'content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
+ for season in show_json['filters'][0]['options']:
+ season_id = season['id']
+ total_pages, page_num = 1, 0
+ while page_num < total_pages:
+ season_json = self._download_json(
+ season_url.format(season_id, show_id, str(page_num + 1)), show_name, headers=headers,
+ note='Downloading season %s JSON metadata%s' % (season_id, ' page %d' % page_num if page_num else ''))
+ if page_num == 0:
+ total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
+ episodes_json = season_json['data']
+ for episode in episodes_json:
+ video_id = episode['attributes']['path']
+ yield self.url_result(
+ '%svideos/%s' % (self._DOMAIN, video_id),
+ ie=self._VIDEO_IE.ie_key(), video_id=video_id)
+ page_num += 1
+
+ def _real_extract(self, url):
+ show_name = self._match_valid_url(url).group('show_name')
+ return self.playlist_result(self._entries(show_name), playlist_id=show_name)
+
+
+class DiscoveryPlusItalyShowIE(DiscoveryPlusShowBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.it/programmi/(?P<show_name>[^/]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.it/programmi/deal-with-it-stai-al-gioco',
+ 'playlist_mincount': 168,
+ 'info_dict': {
+ 'id': 'deal-with-it-stai-al-gioco',
+ },
+ }]
+
+ _BASE_API = 'https://disco-api.discoveryplus.it/'
+ _DOMAIN = 'https://www.discoveryplus.it/'
+ _X_CLIENT = 'WEB:UNKNOWN:dplay-client:2.6.0'
+ _REALM = 'dplayit'
+ _SHOW_STR = 'programmi'
+ _INDEX = 1
+ _VIDEO_IE = DPlayIE
+
+
+class DiscoveryPlusIndiaShowIE(DiscoveryPlusShowBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
+ 'playlist_mincount': 140,
+ 'info_dict': {
+ 'id': 'how-do-they-do-it',
+ },
+ }]
+
+ _BASE_API = 'https://ap2-prod-direct.discoveryplus.in/'
+ _DOMAIN = 'https://www.discoveryplus.in/'
+ _X_CLIENT = 'WEB:UNKNOWN:dplus-india:prod'
+ _REALM = 'dplusindia'
+ _SHOW_STR = 'show'
+ _INDEX = 4
+ _VIDEO_IE = DiscoveryPlusIndiaIE
diff --git a/yt_dlp/extractor/epicon.py b/yt_dlp/extractor/epicon.py
index b4e544d4f..cd19325bc 100644
--- a/yt_dlp/extractor/epicon.py
+++ b/yt_dlp/extractor/epicon.py
@@ -8,7 +8,7 @@ from ..utils import ExtractorError
class EpiconIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?epicon\.in/(?:documentaries|movies|tv-shows/[^/?#]+/[^/?#]+)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.epicon.in/documentaries/air-battle-of-srinagar',
'info_dict': {
@@ -84,7 +84,7 @@ class EpiconIE(InfoExtractor):
class EpiconSeriesIE(InfoExtractor):
- _VALID_URL = r'(?!.*season)(?:https?://)(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
+ _VALID_URL = r'(?!.*season)https?://(?:www\.)?epicon\.in/tv-shows/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.epicon.in/tv-shows/1-of-something',
'playlist_mincount': 5,
diff --git a/yt_dlp/extractor/euscreen.py b/yt_dlp/extractor/euscreen.py
index 3980c2349..2759e7436 100644
--- a/yt_dlp/extractor/euscreen.py
+++ b/yt_dlp/extractor/euscreen.py
@@ -10,7 +10,7 @@ from ..utils import (
class EUScreenIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
+ _VALID_URL = r'https?://(?:www\.)?euscreen\.eu/item.html\?id=(?P<id>[^&?$/]+)'
_TESTS = [{
'url': 'https://euscreen.eu/item.html?id=EUS_0EBCBF356BFC4E12A014023BA41BD98C',
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index a4baad2da..0741a728f 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -188,6 +188,7 @@ from .businessinsider import BusinessInsiderIE
from .buzzfeed import BuzzFeedIE
from .byutv import BYUtvIE
from .c56 import C56IE
+from .cableav import CableAVIE
from .cam4 import CAM4IE
from .camdemy import (
CamdemyIE,
@@ -341,10 +342,6 @@ from .democracynow import DemocracynowIE
from .dfb import DFBIE
from .dhm import DHMIE
from .digg import DiggIE
-from .discoveryplusindia import (
- DiscoveryPlusIndiaIE,
- DiscoveryPlusIndiaShowIE,
-)
from .dotsub import DotsubIE
from .douyutv import (
DouyuShowIE,
@@ -356,7 +353,11 @@ from .dplay import (
HGTVDeIE,
ScienceChannelIE,
DIYNetworkIE,
- AnimalPlanetIE
+ AnimalPlanetIE,
+ DiscoveryPlusIndiaIE,
+ DiscoveryNetworksDeIE,
+ DiscoveryPlusItalyShowIE,
+ DiscoveryPlusIndiaShowIE,
)
from .dreisat import DreiSatIE
from .drbonanza import DRBonanzaIE
@@ -378,7 +379,6 @@ from .discoverygo import (
DiscoveryGoIE,
DiscoveryGoPlaylistIE,
)
-from .discoverynetworks import DiscoveryNetworksDeIE
from .discoveryvr import DiscoveryVRIE
from .disney import DisneyIE
from .dispeak import DigitallySpeakingIE
@@ -1216,7 +1216,11 @@ from .redbulltv import (
RedBullIE,
)
from .reddit import RedditIE
-from .redgifs import RedGifsIE
+from .redgifs import (
+ RedGifsIE,
+ RedGifsSearchIE,
+ RedGifsUserIE,
+)
from .redtube import RedTubeIE
from .regiotv import RegioTVIE
from .rentv import (
diff --git a/yt_dlp/extractor/gab.py b/yt_dlp/extractor/gab.py
index bde6e8624..9ba0b1ca1 100644
--- a/yt_dlp/extractor/gab.py
+++ b/yt_dlp/extractor/gab.py
@@ -15,7 +15,7 @@ from ..utils import (
class GabTVIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)tv.gab.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
+ _VALID_URL = r'https?://tv\.gab\.com/channel/[^/]+/view/(?P<id>[a-z0-9-]+)'
_TESTS = [{
'url': 'https://tv.gab.com/channel/wurzelroot/view/why-was-america-in-afghanistan-61217eacea5665de450d0488',
'info_dict': {
diff --git a/yt_dlp/extractor/gronkh.py b/yt_dlp/extractor/gronkh.py
index a7792a5e0..58cd59511 100644
--- a/yt_dlp/extractor/gronkh.py
+++ b/yt_dlp/extractor/gronkh.py
@@ -6,7 +6,7 @@ from ..utils import unified_strdate
class GronkhIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?gronkh\.tv/stream/(?P<id>\d+)'
_TESTS = [{
'url': 'https://gronkh.tv/stream/536',
diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py
index 0bdf772a1..de2b30cf7 100644
--- a/yt_dlp/extractor/hotstar.py
+++ b/yt_dlp/extractor/hotstar.py
@@ -296,7 +296,7 @@ class HotStarPlaylistIE(HotStarBaseIE):
class HotStarSeriesIE(HotStarBaseIE):
IE_NAME = 'hotstar:series'
- _VALID_URL = r'(?P<url>(?:https?://)(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com(?:/in)?/tv/[^/]+/(?P<id>\d+))'
_TESTS = [{
'url': 'https://www.hotstar.com/in/tv/radhakrishn/1260000646',
'info_dict': {
diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py
index 1fcf97a19..2ec24f3e7 100644
--- a/yt_dlp/extractor/instagram.py
+++ b/yt_dlp/extractor/instagram.py
@@ -63,6 +63,10 @@ class InstagramBaseIE(InfoExtractor):
if not login.get('authenticated'):
if login.get('message'):
raise ExtractorError(f'Unable to login: {login["message"]}')
+ elif login.get('user'):
+ raise ExtractorError('Unable to login: Sorry, your password was incorrect. Please double-check your password.', expected=True)
+ elif login.get('user') is False:
+ raise ExtractorError('Unable to login: The username you entered doesn\'t belong to an account. Please check your username and try again.', expected=True)
raise ExtractorError('Unable to login')
InstagramBaseIE._IS_LOGGED_IN = True
@@ -495,7 +499,7 @@ class InstagramUserIE(InstagramPlaylistBaseIE):
class InstagramTagIE(InstagramPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P<id>[^/]+)'
- IE_DESC = 'Instagram hashtag search'
+ IE_DESC = 'Instagram hashtag search URLs'
IE_NAME = 'instagram:tag'
_TESTS = [{
'url': 'https://instagram.com/explore/tags/lolcats',
diff --git a/yt_dlp/extractor/koo.py b/yt_dlp/extractor/koo.py
index 1706b28a0..088db1cb0 100644
--- a/yt_dlp/extractor/koo.py
+++ b/yt_dlp/extractor/koo.py
@@ -8,7 +8,7 @@ from ..utils import (
class KooIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
+ _VALID_URL = r'https?://(?:www\.)?kooapp\.com/koo/[^/]+/(?P<id>[^/&#$?]+)'
_TESTS = [{ # Test for video in the comments
'url': 'https://www.kooapp.com/koo/ytdlpTestAccount/946c4189-bc2d-4524-b95b-43f641e2adde',
'info_dict': {
diff --git a/yt_dlp/extractor/mlssoccer.py b/yt_dlp/extractor/mlssoccer.py
index 2d65787e2..1d6d4b804 100644
--- a/yt_dlp/extractor/mlssoccer.py
+++ b/yt_dlp/extractor/mlssoccer.py
@@ -6,7 +6,7 @@ from .common import InfoExtractor
class MLSSoccerIE(InfoExtractor):
_VALID_DOMAINS = r'(?:(?:cfmontreal|intermiamicf|lagalaxy|lafc|houstondynamofc|dcunited|atlutd|mlssoccer|fcdallas|columbuscrew|coloradorapids|fccincinnati|chicagofirefc|austinfc|nashvillesc|whitecapsfc|sportingkc|soundersfc|sjearthquakes|rsl|timbers|philadelphiaunion|orlandocitysc|newyorkredbulls|nycfc)\.com|(?:torontofc)\.ca|(?:revolutionsoccer)\.net)'
- _VALID_URL = r'(?:https?://)(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
+ _VALID_URL = r'https?://(?:www\.)?%s/video/#?(?P<id>[^/&$#?]+)' % _VALID_DOMAINS
_TESTS = [{
'url': 'https://www.mlssoccer.com/video/the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986#the-octagon-can-alphonso-davies-lead-canada-to-first-world-cup-since-1986',
@@ -21,7 +21,6 @@ class MLSSoccerIE(InfoExtractor):
'uploader_id': '5530036772001',
'tags': ['club/canada'],
'is_live': False,
- 'duration_string': '5:50',
'upload_date': '20211007',
'filesize_approx': 255193528.83200002
},
diff --git a/yt_dlp/extractor/musescore.py b/yt_dlp/extractor/musescore.py
index dcd26388a..09fadf8d9 100644
--- a/yt_dlp/extractor/musescore.py
+++ b/yt_dlp/extractor/musescore.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class MuseScoreIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
+ _VALID_URL = r'https?://(?:www\.)?musescore\.com/(?:user/\d+|[^/]+)(?:/scores)?/(?P<id>[^#&?]+)'
_TESTS = [{
'url': 'https://musescore.com/user/73797/scores/142975',
'info_dict': {
@@ -13,7 +13,7 @@ class MuseScoreIE(InfoExtractor):
'ext': 'mp3',
'title': 'WA Mozart Marche Turque (Turkish March fingered)',
'description': 'md5:7ede08230e4eaabd67a4a98bb54d07be',
- 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
'uploader': 'PapyPiano',
'creator': 'Wolfgang Amadeus Mozart',
}
@@ -24,7 +24,7 @@ class MuseScoreIE(InfoExtractor):
'ext': 'mp3',
'title': 'Sweet Child O\' Mine – Guns N\' Roses sweet child',
'description': 'md5:4dca71191c14abc312a0a4192492eace',
- 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
'uploader': 'roxbelviolin',
'creator': 'Guns N´Roses Arr. Roxbel Violin',
}
@@ -35,7 +35,7 @@ class MuseScoreIE(InfoExtractor):
'ext': 'mp3',
'title': 'Für Elise – Beethoven',
'description': 'md5:49515a3556d5ecaf9fa4b2514064ac34',
- 'thumbnail': r're:(?:https?://)(?:www\.)?musescore\.com/.*\.png[^$]+',
+ 'thumbnail': r're:https?://(?:www\.)?musescore\.com/.*\.png[^$]+',
'uploader': 'ClassicMan',
'creator': 'Ludwig van Beethoven (1770–1827)',
}
diff --git a/yt_dlp/extractor/mxplayer.py b/yt_dlp/extractor/mxplayer.py
index 5874556e3..3c2afd838 100644
--- a/yt_dlp/extractor/mxplayer.py
+++ b/yt_dlp/extractor/mxplayer.py
@@ -180,7 +180,7 @@ class MxplayerIE(InfoExtractor):
class MxplayerShowIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
+ _VALID_URL = r'https?://(?:www\.)?mxplayer\.in/show/(?P<display_id>[-\w]+)-(?P<id>\w+)/?(?:$|[#?])'
_TESTS = [{
'url': 'https://www.mxplayer.in/show/watch-chakravartin-ashoka-samrat-series-online-a8f44e3cc0814b5601d17772cedf5417',
'playlist_mincount': 440,
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 4bcea33d5..b46ca293f 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -703,7 +703,7 @@ class NicovideoSearchURLIE(InfoExtractor):
class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE):
- IE_DESC = 'Nico video searches'
+ IE_DESC = 'Nico video search'
IE_NAME = NicovideoSearchIE_NAME
_SEARCH_KEY = 'nicosearch'
_TESTS = []
@@ -714,7 +714,7 @@ class NicovideoSearchIE(SearchInfoExtractor, NicovideoSearchURLIE):
class NicovideoSearchDateIE(NicovideoSearchIE):
- IE_DESC = 'Nico video searches, newest first'
+ IE_DESC = 'Nico video search, newest first'
IE_NAME = f'{NicovideoSearchIE_NAME}:date'
_SEARCH_KEY = 'nicosearchdate'
_TESTS = [{
diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py
index 79501003d..826faadd2 100644
--- a/yt_dlp/extractor/onefootball.py
+++ b/yt_dlp/extractor/onefootball.py
@@ -5,7 +5,7 @@ from .common import InfoExtractor
class OneFootballIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P<id>\d+)'
_TESTS = [{
'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334',
diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py
index d1d9911f7..07ac15b54 100644
--- a/yt_dlp/extractor/planetmarathi.py
+++ b/yt_dlp/extractor/planetmarathi.py
@@ -9,7 +9,7 @@ from ..utils import (
class PlanetMarathiIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
+ _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)'
_TESTS = [{
'url': 'https://www.planetmarathi.com/titles/ek-unad-divas',
'playlist_mincount': 2,
diff --git a/yt_dlp/extractor/projectveritas.py b/yt_dlp/extractor/projectveritas.py
index 1d832a679..9e9867ba5 100644
--- a/yt_dlp/extractor/projectveritas.py
+++ b/yt_dlp/extractor/projectveritas.py
@@ -10,7 +10,7 @@ from ..utils import (
class ProjectVeritasIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)'
_TESTS = [{
'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/',
'info_dict': {
diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py
index 1257d1344..55196b768 100644
--- a/yt_dlp/extractor/redgifs.py
+++ b/yt_dlp/extractor/redgifs.py
@@ -1,21 +1,94 @@
# coding: utf-8
+import functools
from .common import InfoExtractor
+from ..compat import compat_parse_qs
from ..utils import (
ExtractorError,
int_or_none,
qualities,
try_get,
+ OnDemandPagedList,
)
-class RedGifsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P<id>[^-/?#\.]+)'
+class RedGifsBaseInfoExtractor(InfoExtractor):
_FORMATS = {
'gif': 250,
'sd': 480,
'hd': None,
}
+
+ def _parse_gif_data(self, gif_data):
+ video_id = gif_data.get('id')
+ quality = qualities(tuple(self._FORMATS.keys()))
+
+ orig_height = int_or_none(gif_data.get('height'))
+ aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width'])
+
+ formats = []
+ for format_id, height in self._FORMATS.items():
+ video_url = gif_data['urls'].get(format_id)
+ if not video_url:
+ continue
+ height = min(orig_height, height or orig_height)
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'width': height * aspect_ratio if aspect_ratio else None,
+ 'height': height,
+ 'quality': quality(format_id),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'webpage_url': f'https://redgifs.com/watch/{video_id}',
+ 'ie_key': RedGifsIE.ie_key(),
+ 'extractor': 'RedGifs',
+ 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs',
+ 'timestamp': int_or_none(gif_data.get('createDate')),
+ 'uploader': gif_data.get('userName'),
+ 'duration': int_or_none(gif_data.get('duration')),
+ 'view_count': int_or_none(gif_data.get('views')),
+ 'like_count': int_or_none(gif_data.get('likes')),
+ 'categories': gif_data.get('tags') or [],
+ 'tags': gif_data.get('tags'),
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
+ def _call_api(self, ep, video_id, *args, **kwargs):
+ data = self._download_json(
+ f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs)
+ if 'error' in data:
+ raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id)
+ return data
+
+ def _fetch_page(self, ep, video_id, query, page):
+ query['page'] = page + 1
+ data = self._call_api(
+ ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}')
+
+ for entry in data['gifs']:
+ yield self._parse_gif_data(entry)
+
+ def _prepare_api_query(self, query, fields):
+ api_query = [
+ (field_name, query.get(field_name, (default,))[0])
+ for field_name, default in fields.items()]
+
+ return {key: val for key, val in api_query if val is not None}
+
+ def _paged_entries(self, ep, item_id, query, fields):
+ page = int_or_none(query.get('page', (None,))[0])
+ page_fetcher = functools.partial(
+ self._fetch_page, ep, item_id, self._prepare_api_query(query, fields))
+ return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE)
+
+
+class RedGifsIE(RedGifsBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)'
_TESTS = [{
'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent',
'info_dict': {
@@ -50,45 +123,110 @@ class RedGifsIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url).lower()
+ video_info = self._call_api(
+ f'gifs/{video_id}', video_id, note='Downloading video info')
+ return self._parse_gif_data(video_info['gif'])
- video_info = self._download_json(
- 'https://api.redgifs.com/v2/gifs/%s' % video_id,
- video_id, 'Downloading video info')
- if 'error' in video_info:
- raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True)
- gif = video_info['gif']
- urls = gif['urls']
+class RedGifsSearchIE(RedGifsBaseInfoExtractor):
+ IE_DESC = 'Redgifs search'
+ _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)'
+ _PAGE_SIZE = 80
+ _TESTS = [
+ {
+ 'url': 'https://www.redgifs.com/browse?tags=Lesbian',
+ 'info_dict': {
+ 'id': 'tags=Lesbian',
+ 'title': 'Lesbian',
+ 'description': 'RedGifs search for Lesbian, ordered by trending'
+ },
+ 'playlist_mincount': 100,
+ },
+ {
+ 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian',
+ 'info_dict': {
+ 'id': 'type=g&order=latest&tags=Lesbian',
+ 'title': 'Lesbian',
+ 'description': 'RedGifs search for Lesbian, ordered by latest'
+ },
+ 'playlist_mincount': 100,
+ },
+ {
+ 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2',
+ 'info_dict': {
+ 'id': 'type=g&order=latest&tags=Lesbian&page=2',
+ 'title': 'Lesbian',
+ 'description': 'RedGifs search for Lesbian, ordered by latest'
+ },
+ 'playlist_count': 80,
+ }
+ ]
- quality = qualities(tuple(self._FORMATS.keys()))
+ def _real_extract(self, url):
+ query_str = self._match_valid_url(url).group('query')
+ query = compat_parse_qs(query_str)
+ if not query.get('tags'):
+ raise ExtractorError('Invalid query tags', expected=True)
- orig_height = int_or_none(gif.get('height'))
- aspect_ratio = try_get(gif, lambda x: orig_height / x['width'])
+ tags = query.get('tags')[0]
+ order = query.get('order', ('trending',))[0]
- formats = []
- for format_id, height in self._FORMATS.items():
- video_url = urls.get(format_id)
- if not video_url:
- continue
- height = min(orig_height, height or orig_height)
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'width': height * aspect_ratio if aspect_ratio else None,
- 'height': height,
- 'quality': quality(format_id),
- })
- self._sort_formats(formats)
+ query['search_text'] = [tags]
+ entries = self._paged_entries('gifs/search', query_str, query, {
+ 'search_text': None,
+ 'order': 'trending',
+ 'type': None,
+ })
- return {
- 'id': video_id,
- 'title': ' '.join(gif.get('tags') or []) or 'RedGifs',
- 'timestamp': int_or_none(gif.get('createDate')),
- 'uploader': gif.get('userName'),
- 'duration': int_or_none(gif.get('duration')),
- 'view_count': int_or_none(gif.get('views')),
- 'like_count': int_or_none(gif.get('likes')),
- 'categories': gif.get('tags') or [],
- 'age_limit': 18,
- 'formats': formats,
+ return self.playlist_result(
+ entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}')
+
+
+class RedGifsUserIE(RedGifsBaseInfoExtractor):
+ IE_DESC = 'Redgifs user'
+ _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?'
+ _PAGE_SIZE = 30
+ _TESTS = [
+ {
+ 'url': 'https://www.redgifs.com/users/lamsinka89',
+ 'info_dict': {
+ 'id': 'lamsinka89',
+ 'title': 'lamsinka89',
+ 'description': 'RedGifs user lamsinka89, ordered by recent'
+ },
+ 'playlist_mincount': 100,
+ },
+ {
+ 'url': 'https://www.redgifs.com/users/lamsinka89?page=3',
+ 'info_dict': {
+ 'id': 'lamsinka89?page=3',
+ 'title': 'lamsinka89',
+ 'description': 'RedGifs user lamsinka89, ordered by recent'
+ },
+ 'playlist_count': 30,
+ },
+ {
+ 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g',
+ 'info_dict': {
+ 'id': 'lamsinka89?order=best&type=g',
+ 'title': 'lamsinka89',
+ 'description': 'RedGifs user lamsinka89, ordered by best'
+ },
+ 'playlist_mincount': 100,
}
+ ]
+
+ def _real_extract(self, url):
+ username, query_str = self._match_valid_url(url).group('username', 'query')
+ playlist_id = f'{username}?{query_str}' if query_str else username
+
+ query = compat_parse_qs(query_str)
+ order = query.get('order', ('recent',))[0]
+
+ entries = self._paged_entries(f'users/{username}/search', playlist_id, query, {
+ 'order': 'recent',
+ 'type': None,
+ })
+
+ return self.playlist_result(
+ entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')
diff --git a/yt_dlp/extractor/shemaroome.py b/yt_dlp/extractor/shemaroome.py
index 142d5dc3a..00a5b00cd 100644
--- a/yt_dlp/extractor/shemaroome.py
+++ b/yt_dlp/extractor/shemaroome.py
@@ -16,7 +16,7 @@ from ..utils import (
class ShemarooMeIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?shemaroome\.com/(?:movies|shows)/(?P<id>[^?#]+)'
_TESTS = [{
'url': 'https://www.shemaroome.com/movies/dil-hai-tumhaara',
'info_dict': {
@@ -78,7 +78,7 @@ class ShemarooMeIE(InfoExtractor):
iv = [0] * 16
m3u8_url = intlist_to_bytes(aes_cbc_decrypt(url_data, key, iv))
m3u8_url = m3u8_url[:-compat_ord((m3u8_url[-1]))].decode('ascii')
- formats = self._extract_m3u8_formats(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
+ formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, fatal=False, headers={'stream_key': data_json['stream_key']})
self._sort_formats(formats)
release_date = self._html_search_regex(
@@ -91,6 +91,7 @@ class ShemarooMeIE(InfoExtractor):
subtitles.setdefault('EN', []).append({
'url': self._proto_relative_url(sub_url),
})
+ subtitles = self._merge_subtitles(subtitles, m3u8_subs)
description = self._html_search_regex(r'(?s)>Synopsis(</.+?)</', webpage, 'description', fatal=False)
return {
diff --git a/yt_dlp/extractor/skynewsau.py b/yt_dlp/extractor/skynewsau.py
index b1d77951e..8e079ee31 100644
--- a/yt_dlp/extractor/skynewsau.py
+++ b/yt_dlp/extractor/skynewsau.py
@@ -9,7 +9,7 @@ from ..utils import (
class SkyNewsAUIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
+ _VALID_URL = r'https?://(?:www\.)?skynews\.com\.au/[^/]+/[^/]+/[^/]+/video/(?P<id>[a-z0-9]+)'
_TESTS = [{
'url': 'https://www.skynews.com.au/world-news/united-states/incredible-vision-shows-lava-overflowing-from-spains-la-palma-volcano/video/0f4c6243d6903502c01251f228b91a71',
diff --git a/yt_dlp/extractor/threespeak.py b/yt_dlp/extractor/threespeak.py
index 60e84529d..fe6a9554a 100644
--- a/yt_dlp/extractor/threespeak.py
+++ b/yt_dlp/extractor/threespeak.py
@@ -11,7 +11,7 @@ from ..utils import (
class ThreeSpeakIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
+ _VALID_URL = r'https?://(?:www\.)?3speak\.tv/watch\?v\=[^/]+/(?P<id>[^/$&#?]+)'
_TESTS = [{
'url': 'https://3speak.tv/watch?v=dannyshine/wjgoxyfy',
@@ -75,7 +75,7 @@ class ThreeSpeakIE(InfoExtractor):
class ThreeSpeakUserIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
+ _VALID_URL = r'https?://(?:www\.)?3speak\.tv/user/(?P<id>[^/$&?#]+)'
_TESTS = [{
'url': 'https://3speak.tv/user/theycallmedan',
diff --git a/yt_dlp/extractor/trovo.py b/yt_dlp/extractor/trovo.py
index a0f0cc31c..9d49840a5 100644
--- a/yt_dlp/extractor/trovo.py
+++ b/yt_dlp/extractor/trovo.py
@@ -17,6 +17,11 @@ class TrovoBaseIE(InfoExtractor):
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
_HEADERS = {'Origin': 'https://trovo.live'}
+ def _call_api(self, video_id, query=None, data=None):
+ return self._download_json(
+ 'https://gql.trovo.live/', video_id, query=query, data=data,
+ headers={'Accept': 'application/json'})
+
def _extract_streamer_info(self, data):
streamer_info = data.get('streamerInfo') or {}
username = streamer_info.get('userName')
@@ -32,9 +37,8 @@ class TrovoIE(TrovoBaseIE):
def _real_extract(self, url):
username = self._match_id(url)
- live_info = self._download_json(
- 'https://gql.trovo.live/', username, query={
- 'query': '''{
+ live_info = self._call_api(username, query={
+ 'query': '''{
getLiveInfo(params: {userName: "%s"}) {
isLive
programInfo {
@@ -53,7 +57,7 @@ class TrovoIE(TrovoBaseIE):
}
}
}''' % username,
- })['data']['getLiveInfo']
+ })['data']['getLiveInfo']
if live_info.get('isLive') == 0:
raise ExtractorError('%s is offline' % username, expected=True)
program_info = live_info['programInfo']
@@ -111,15 +115,14 @@ class TrovoVodIE(TrovoBaseIE):
def _real_extract(self, url):
vid = self._match_id(url)
- resp = self._download_json(
- 'https://gql.trovo.live/', vid, data=json.dumps([{
- 'query': '''{
+ resp = self._call_api(vid, data=json.dumps([{
+ 'query': '''{
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
VodDetailInfos
}
}''' % vid,
- }, {
- 'query': '''{
+ }, {
+ 'query': '''{
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
commentList {
author {
@@ -133,9 +136,7 @@ class TrovoVodIE(TrovoBaseIE):
}
}
}''' % vid,
- }]).encode(), headers={
- 'Content-Type': 'application/json',
- })
+ }]).encode())
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
vod_info = vod_detail_info['vodInfo']
title = vod_info['title']
@@ -215,7 +216,7 @@ class TrovoChannelBaseIE(InfoExtractor):
def _real_extract(self, url):
id = self._match_id(url)
- uid = str(self._download_json('https://gql.trovo.live/', id, query={
+ uid = str(self._call_api(id, query={
'query': '{getLiveInfo(params:{userName:"%s"}){streamerInfo{uid}}}' % id
})['data']['getLiveInfo']['streamerInfo']['uid'])
return self.playlist_result(self._entries(uid), playlist_id=uid)
@@ -237,7 +238,7 @@ class TrovoChannelVodIE(TrovoChannelBaseIE):
_TYPE = 'video'
def _get_vod_json(self, page, uid):
- return self._download_json('https://gql.trovo.live/', uid, query={
+ return self._call_api(uid, query={
'query': self._QUERY % (page, uid)
})['data']['getChannelLtvVideoInfos']
@@ -258,6 +259,6 @@ class TrovoChannelClipIE(TrovoChannelBaseIE):
_TYPE = 'clip'
def _get_vod_json(self, page, uid):
- return self._download_json('https://gql.trovo.live/', uid, query={
+ return self._call_api(uid, query={
'query': self._QUERY % (page, uid)
})['data']['getChannelClipVideoInfos']
diff --git a/yt_dlp/extractor/utreon.py b/yt_dlp/extractor/utreon.py
index 4a25f0c55..4986635f2 100644
--- a/yt_dlp/extractor/utreon.py
+++ b/yt_dlp/extractor/utreon.py
@@ -13,7 +13,7 @@ from ..utils import (
class UtreonIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
+ _VALID_URL = r'https?://(?:www\.)?utreon.com/v/(?P<id>[a-zA-Z0-9_-]+)'
_TESTS = [{
'url': 'https://utreon.com/v/z_I7ikQbuDw',
'info_dict': {
diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py
index e2b86662b..27d5c969d 100644
--- a/yt_dlp/extractor/vimeo.py
+++ b/yt_dlp/extractor/vimeo.py
@@ -119,10 +119,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._set_cookie('vimeo.com', name, value)
def _vimeo_sort_formats(self, formats):
- # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
- # at the same time without actual units specified. This lead to wrong sorting.
- # But since yt-dlp prefers 'res,fps' anyway, 'field_preference' is not needed
- self._sort_formats(formats)
+ # Note: Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified.
+ self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source'))
def _parse_config(self, config, video_id):
video_data = config['video']
@@ -140,6 +139,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
formats.append({
'url': video_url,
'format_id': 'http-%s' % f.get('quality'),
+ 'source_preference': 10,
'width': int_or_none(f.get('width')),
'height': int_or_none(f.get('height')),
'fps': int_or_none(f.get('fps')),
diff --git a/yt_dlp/extractor/voot.py b/yt_dlp/extractor/voot.py
index e2944ec63..a9b66b95c 100644
--- a/yt_dlp/extractor/voot.py
+++ b/yt_dlp/extractor/voot.py
@@ -15,7 +15,7 @@ class VootIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
voot:|
- (?:https?://)(?:www\.)?voot\.com/?
+ https?://(?:www\.)?voot\.com/?
(?:
movies/[^/]+/|
(?:shows|kids)/(?:[^/]+/){4}
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index ba135613b..e4854bead 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2514,7 +2514,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
query = parse_qs(fmt_url)
throttled = False
- if query.get('ratebypass') != ['yes'] and query.get('n'):
+ if query.get('n'):
try:
fmt_url = update_url_query(fmt_url, {
'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
diff --git a/yt_dlp/extractor/zee5.py b/yt_dlp/extractor/zee5.py
index 536604167..462bc4efe 100644
--- a/yt_dlp/extractor/zee5.py
+++ b/yt_dlp/extractor/zee5.py
@@ -21,7 +21,7 @@ class Zee5IE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
zee5:|
- (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+ https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:
(?:tvshows|kids|zee5originals)(?:/[^#/?]+){3}
|movies/[^#/?]+
@@ -174,7 +174,7 @@ class Zee5SeriesIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?:
zee5:series:|
- (?:https?://)(?:www\.)?zee5\.com/(?:[^#?]+/)?
+ https?://(?:www\.)?zee5\.com/(?:[^#?]+/)?
(?:tvshows|kids|zee5originals)(?:/[^#/?]+){2}/
)
(?P<id>[^#/?]+)/?(?:$|[?#])