aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yt_dlp/extractor/appleconnect.py13
-rw-r--r--yt_dlp/extractor/bilibili.py2
-rw-r--r--yt_dlp/extractor/curiositystream.py37
-rw-r--r--yt_dlp/extractor/egghead.py16
-rw-r--r--yt_dlp/extractor/extractors.py7
-rw-r--r--yt_dlp/extractor/generic.py31
-rw-r--r--yt_dlp/extractor/liveleak.py191
-rw-r--r--yt_dlp/extractor/nrk.py2
-rw-r--r--yt_dlp/extractor/orf.py3
-rw-r--r--yt_dlp/extractor/pornhub.py50
-rw-r--r--yt_dlp/extractor/umg.py8
-rw-r--r--yt_dlp/extractor/youtube.py23
12 files changed, 98 insertions, 285 deletions
diff --git a/yt_dlp/extractor/appleconnect.py b/yt_dlp/extractor/appleconnect.py
index a84b8b1eb..494f8330c 100644
--- a/yt_dlp/extractor/appleconnect.py
+++ b/yt_dlp/extractor/appleconnect.py
@@ -9,10 +9,10 @@ from ..utils import (
class AppleConnectIE(InfoExtractor):
- _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/idsa\.(?P<id>[\w-]+)'
- _TEST = {
+ _VALID_URL = r'https?://itunes\.apple\.com/\w{0,2}/?post/(?:id)?sa\.(?P<id>[\w-]+)'
+ _TESTS = [{
'url': 'https://itunes.apple.com/us/post/idsa.4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
- 'md5': 'e7c38568a01ea45402570e6029206723',
+ 'md5': 'c1d41f72c8bcaf222e089434619316e4',
'info_dict': {
'id': '4ab17a39-2720-11e5-96c5-a5b38f6c42d3',
'ext': 'm4v',
@@ -22,7 +22,10 @@ class AppleConnectIE(InfoExtractor):
'upload_date': '20150710',
'timestamp': 1436545535,
},
- }
+ }, {
+ 'url': 'https://itunes.apple.com/us/post/sa.0fe0229f-2457-11e5-9f40-1bb645f2d5d9',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
@@ -36,7 +39,7 @@ class AppleConnectIE(InfoExtractor):
video_data = self._parse_json(video_json, video_id)
timestamp = str_to_int(self._html_search_regex(r'data-timestamp="(\d+)"', webpage, 'timestamp'))
- like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count'))
+ like_count = str_to_int(self._html_search_regex(r'(\d+) Loves', webpage, 'like count', default=None))
return {
'id': video_id,
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index baa753976..1fe6a5c18 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -281,7 +281,7 @@ class BiliBiliIE(InfoExtractor):
webpage)
if uploader_mobj:
info.update({
- 'uploader': uploader_mobj.group('name'),
+ 'uploader': uploader_mobj.group('name').strip(),
'uploader_id': uploader_mobj.group('id'),
})
diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py
index c33430a88..034a5c92a 100644
--- a/yt_dlp/extractor/curiositystream.py
+++ b/yt_dlp/extractor/curiositystream.py
@@ -143,9 +143,9 @@ class CuriosityStreamIE(CuriosityStreamBaseIE):
}
-class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
- IE_NAME = 'curiositystream:collections'
- _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P<id>\d+)'
+class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
+ IE_NAME = 'curiositystream:collection'
+ _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P<id>\d+)'
_API_BASE_URL = 'https://api.curiositystream.com/v2/collections/'
_TESTS = [{
'url': 'https://curiositystream.com/collections/86',
@@ -155,6 +155,20 @@ class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
'description': 'Wondering where to start? Here are a few of our favorite series and films... from our couch to yours.',
},
'playlist_mincount': 7,
+ }, {
+ 'url': 'https://app.curiositystream.com/collection/2',
+ 'info_dict': {
+ 'id': '2',
+ 'title': 'Curious Minds: The Internet',
+ 'description': 'How is the internet shaping our lives in the 21st Century?',
+ },
+ 'playlist_mincount': 16,
+ }, {
+ 'url': 'https://curiositystream.com/series/2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://curiositystream.com/collections/36',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -163,25 +177,10 @@ class CuriosityStreamCollectionsIE(CuriosityStreamBaseIE):
entries = []
for media in collection.get('media', []):
media_id = compat_str(media.get('id'))
- media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
+ media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE)
entries.append(self.url_result(
'https://curiositystream.com/%s/%s' % (media_type, media_id),
ie=ie.ie_key(), video_id=media_id))
return self.playlist_result(
entries, collection_id,
collection.get('title'), collection.get('description'))
-
-
-class CuriosityStreamSeriesIE(CuriosityStreamCollectionsIE):
- IE_NAME = 'curiositystream:series'
- _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/series/(?P<id>\d+)'
- _API_BASE_URL = 'https://api.curiositystream.com/v2/series/'
- _TESTS = [{
- 'url': 'https://app.curiositystream.com/series/2',
- 'info_dict': {
- 'id': '2',
- 'title': 'Curious Minds: The Internet',
- 'description': 'How is the internet shaping our lives in the 21st Century?',
- },
- 'playlist_mincount': 16,
- }]
diff --git a/yt_dlp/extractor/egghead.py b/yt_dlp/extractor/egghead.py
index 22123e5d4..f6b50e7c2 100644
--- a/yt_dlp/extractor/egghead.py
+++ b/yt_dlp/extractor/egghead.py
@@ -22,16 +22,19 @@ class EggheadBaseIE(InfoExtractor):
class EggheadCourseIE(EggheadBaseIE):
IE_DESC = 'egghead.io course'
IE_NAME = 'egghead:course'
- _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
- _TEST = {
+ _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:course|playlist)s/(?P<id>[^/?#&]+)'
+ _TESTS = [{
'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
'playlist_count': 29,
'info_dict': {
- 'id': '72',
+ 'id': '432655',
'title': 'Professor Frisby Introduces Composable Functional JavaScript',
'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
},
- }
+ }, {
+ 'url': 'https://app.egghead.io/playlists/professor-frisby-introduces-composable-functional-javascript',
+ 'only_matching': True,
+ }]
def _real_extract(self, url):
playlist_id = self._match_id(url)
@@ -65,7 +68,7 @@ class EggheadCourseIE(EggheadBaseIE):
class EggheadLessonIE(EggheadBaseIE):
IE_DESC = 'egghead.io lesson'
IE_NAME = 'egghead:lesson'
- _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
+ _VALID_URL = r'https://(?:app\.)?egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
'info_dict': {
@@ -88,6 +91,9 @@ class EggheadLessonIE(EggheadBaseIE):
}, {
'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
'only_matching': True,
+ }, {
+ 'url': 'https://app.egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
+ 'only_matching': True,
}]
def _real_extract(self, url):
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index d61771e97..8af15ec7b 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -291,8 +291,7 @@ from .ctvnews import CTVNewsIE
from .cultureunplugged import CultureUnpluggedIE
from .curiositystream import (
CuriosityStreamIE,
- CuriosityStreamCollectionsIE,
- CuriosityStreamSeriesIE,
+ CuriosityStreamCollectionIE,
)
from .cwtv import CWTVIE
from .dailymail import DailyMailIE
@@ -655,10 +654,6 @@ from .linkedin import (
from .linuxacademy import LinuxAcademyIE
from .litv import LiTVIE
from .livejournal import LiveJournalIE
-from .liveleak import (
- LiveLeakIE,
- LiveLeakEmbedIE,
-)
from .livestream import (
LivestreamIE,
LivestreamOriginalIE,
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index cd9efea16..e53a35008 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -84,7 +84,6 @@ from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
from .arkena import ArkenaIE
from .instagram import InstagramIE
-from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .kaltura import KalturaIE
@@ -1632,31 +1631,6 @@ class GenericIE(InfoExtractor):
'upload_date': '20160409',
},
},
- # LiveLeak embed
- {
- 'url': 'http://www.wykop.pl/link/3088787/',
- 'md5': '7619da8c820e835bef21a1efa2a0fc71',
- 'info_dict': {
- 'id': '874_1459135191',
- 'ext': 'mp4',
- 'title': 'Man shows poor quality of new apartment building',
- 'description': 'The wall is like a sand pile.',
- 'uploader': 'Lake8737',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
- # Another LiveLeak embed pattern (#13336)
- {
- 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
- 'info_dict': {
- 'id': '2eb_1496309988',
- 'ext': 'mp4',
- 'title': 'Thief robs place where everyone was armed',
- 'description': 'md5:694d73ee79e535953cf2488562288eee',
- 'uploader': 'brazilwtf',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
# Duplicated embedded video URLs
{
'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
@@ -3204,11 +3178,6 @@ class GenericIE(InfoExtractor):
return self.url_result(
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
- # Look for LiveLeak embeds
- liveleak_urls = LiveLeakIE._extract_urls(webpage)
- if liveleak_urls:
- return self.playlist_from_matches(liveleak_urls, video_id, video_title)
-
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
if threeqsdn_url:
diff --git a/yt_dlp/extractor/liveleak.py b/yt_dlp/extractor/liveleak.py
deleted file mode 100644
index 114556ef0..000000000
--- a/yt_dlp/extractor/liveleak.py
+++ /dev/null
@@ -1,191 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class LiveLeakIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?liveleak\.com/view\?.*?\b[it]=(?P<id>[\w_]+)'
- _TESTS = [{
- 'url': 'http://www.liveleak.com/view?i=757_1364311680',
- 'md5': '0813c2430bea7a46bf13acf3406992f4',
- 'info_dict': {
- 'id': '757_1364311680',
- 'ext': 'mp4',
- 'description': 'extremely bad day for this guy..!',
- 'uploader': 'ljfriel2',
- 'title': 'Most unlucky car accident',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- 'url': 'http://www.liveleak.com/view?i=f93_1390833151',
- 'md5': 'd3f1367d14cc3c15bf24fbfbe04b9abf',
- 'info_dict': {
- 'id': 'f93_1390833151',
- 'ext': 'mp4',
- 'description': 'German Television Channel NDR does an exclusive interview with Edward Snowden.\r\nUploaded on LiveLeak cause German Television thinks the rest of the world isn\'t intereseted in Edward Snowden.',
- 'uploader': 'ARD_Stinkt',
- 'title': 'German Television does first Edward Snowden Interview (ENGLISH)',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- # Prochan embed
- 'url': 'http://www.liveleak.com/view?i=4f7_1392687779',
- 'md5': '42c6d97d54f1db107958760788c5f48f',
- 'info_dict': {
- 'id': '4f7_1392687779',
- 'ext': 'mp4',
- 'description': "The guy with the cigarette seems amazingly nonchalant about the whole thing... I really hope my friends' reactions would be a bit stronger.\r\n\r\nAction-go to 0:55.",
- 'uploader': 'CapObveus',
- 'title': 'Man is Fatally Struck by Reckless Car While Packing up a Moving Truck',
- 'age_limit': 18,
- },
- 'skip': 'Video is dead',
- }, {
- # Covers https://github.com/ytdl-org/youtube-dl/pull/5983
- # Multiple resolutions
- 'url': 'http://www.liveleak.com/view?i=801_1409392012',
- 'md5': 'c3a449dbaca5c0d1825caecd52a57d7b',
- 'info_dict': {
- 'id': '801_1409392012',
- 'ext': 'mp4',
- 'description': 'Happened on 27.7.2014. \r\nAt 0:53 you can see people still swimming at near beach.',
- 'uploader': 'bony333',
- 'title': 'Crazy Hungarian tourist films close call waterspout in Croatia',
- 'thumbnail': r're:^https?://.*\.jpg$'
- }
- }, {
- # Covers https://github.com/ytdl-org/youtube-dl/pull/10664#issuecomment-247439521
- 'url': 'http://m.liveleak.com/view?i=763_1473349649',
- 'add_ie': ['Youtube'],
- 'info_dict': {
- 'id': '763_1473349649',
- 'ext': 'mp4',
- 'title': 'Reporters and public officials ignore epidemic of black on asian violence in Sacramento | Colin Flaherty',
- 'description': 'Colin being the warrior he is and showing the injustice Asians in Sacramento are being subjected to.',
- 'uploader': 'Ziz',
- 'upload_date': '20160908',
- 'uploader_id': 'UCEbta5E_jqlZmEJsriTEtnw'
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.liveleak.com/view?i=677_1439397581',
- 'info_dict': {
- 'id': '677_1439397581',
- 'title': 'Fuel Depot in China Explosion caught on video',
- },
- 'playlist_count': 3,
- }, {
- 'url': 'https://www.liveleak.com/view?t=HvHi_1523016227',
- 'only_matching': True,
- }, {
- # No original video
- 'url': 'https://www.liveleak.com/view?t=C26ZZ_1558612804',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src="(https?://(?:\w+\.)?liveleak\.com/ll_embed\?[^"]*[ift]=[\w_]+[^"]+)"',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_title = self._og_search_title(webpage).replace('LiveLeak.com -', '').strip()
- video_description = self._og_search_description(webpage)
- video_uploader = self._html_search_regex(
- r'By:.*?(\w+)</a>', webpage, 'uploader', fatal=False)
- age_limit = int_or_none(self._search_regex(
- r'you confirm that you are ([0-9]+) years and over.',
- webpage, 'age limit', default=None))
- video_thumbnail = self._og_search_thumbnail(webpage)
-
- entries = self._parse_html5_media_entries(url, webpage, video_id)
- if not entries:
- # Maybe an embed?
- embed_url = self._search_regex(
- r'<iframe[^>]+src="((?:https?:)?//(?:www\.)?(?:prochan|youtube)\.com/embed[^"]+)"',
- webpage, 'embed URL')
- return {
- '_type': 'url_transparent',
- 'url': embed_url,
- 'id': video_id,
- 'title': video_title,
- 'description': video_description,
- 'uploader': video_uploader,
- 'age_limit': age_limit,
- }
-
- for idx, info_dict in enumerate(entries):
- formats = []
- for a_format in info_dict['formats']:
- if not a_format.get('height'):
- a_format['height'] = int_or_none(self._search_regex(
- r'([0-9]+)p\.mp4', a_format['url'], 'height label',
- default=None))
- formats.append(a_format)
-
- # Removing '.*.mp4' gives the raw video, which is essentially
- # the same video without the LiveLeak logo at the top (see
- # https://github.com/ytdl-org/youtube-dl/pull/4768)
- orig_url = re.sub(r'\.mp4\.[^.]+', '', a_format['url'])
- if a_format['url'] != orig_url:
- format_id = a_format.get('format_id')
- format_id = 'original' + ('-' + format_id if format_id else '')
- if self._is_valid_url(orig_url, video_id, format_id):
- formats.append({
- 'format_id': format_id,
- 'url': orig_url,
- 'quality': 1,
- })
- self._sort_formats(formats)
- info_dict['formats'] = formats
-
- # Don't append entry ID for one-video pages to keep backward compatibility
- if len(entries) > 1:
- info_dict['id'] = '%s_%s' % (video_id, idx + 1)
- else:
- info_dict['id'] = video_id
-
- info_dict.update({
- 'title': video_title,
- 'description': video_description,
- 'uploader': video_uploader,
- 'age_limit': age_limit,
- 'thumbnail': video_thumbnail,
- })
-
- return self.playlist_result(entries, video_id, video_title)
-
-
-class LiveLeakEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?liveleak\.com/ll_embed\?.*?\b(?P<kind>[ift])=(?P<id>[\w_]+)'
-
- # See generic.py for actual test cases
- _TESTS = [{
- 'url': 'https://www.liveleak.com/ll_embed?i=874_1459135191',
- 'only_matching': True,
- }, {
- 'url': 'https://www.liveleak.com/ll_embed?f=ab065df993c1',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- kind, video_id = re.match(self._VALID_URL, url).groups()
-
- if kind == 'f':
- webpage = self._download_webpage(url, video_id)
- liveleak_url = self._search_regex(
- r'(?:logourl\s*:\s*|window\.open\()(?P<q1>[\'"])(?P<url>%s)(?P=q1)' % LiveLeakIE._VALID_URL,
- webpage, 'LiveLeak URL', group='url')
- else:
- liveleak_url = 'http://www.liveleak.com/view?%s=%s' % (kind, video_id)
-
- return self.url_result(liveleak_url, ie=LiveLeakIE.ie_key())
diff --git a/yt_dlp/extractor/nrk.py b/yt_dlp/extractor/nrk.py
index 40dee2162..6d01a25c3 100644
--- a/yt_dlp/extractor/nrk.py
+++ b/yt_dlp/extractor/nrk.py
@@ -58,7 +58,7 @@ class NRKBaseIE(InfoExtractor):
def _call_api(self, path, video_id, item=None, note=None, fatal=True, query=None):
return self._download_json(
- urljoin('http://psapi.nrk.no/', path),
+ urljoin('https://psapi.nrk.no/', path),
video_id, note or 'Downloading %s JSON' % item,
fatal=fatal, query=query,
headers={'Accept-Encoding': 'gzip, deflate, br'})
diff --git a/yt_dlp/extractor/orf.py b/yt_dlp/extractor/orf.py
index ed8a9a841..8d537d7ae 100644
--- a/yt_dlp/extractor/orf.py
+++ b/yt_dlp/extractor/orf.py
@@ -98,6 +98,9 @@ class ORFTVthekIE(InfoExtractor):
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
src, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id=format_id, fatal=False))
else:
formats.append({
'format_id': format_id,
diff --git a/yt_dlp/extractor/pornhub.py b/yt_dlp/extractor/pornhub.py
index cf407a813..c525505d1 100644
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@@ -31,6 +31,7 @@ from ..utils import (
class PornHubBaseIE(InfoExtractor):
_NETRC_MACHINE = 'pornhub'
+ _PORNHUB_HOST_RE = r'(?:(?P<host>pornhub(?:premium)?\.(?:com|net|org))|pornhubthbh7ap3u\.onion)'
def _download_webpage_handle(self, *args, **kwargs):
def dl(*args, **kwargs):
@@ -123,11 +124,13 @@ class PornHubIE(PornHubBaseIE):
_VALID_URL = r'''(?x)
https?://
(?:
- (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:[^/]+\.)?
+ %s
+ /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
(?P<id>[\da-z]+)
- '''
+ ''' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
'md5': 'a6391306d050e4547f62b3f485dd9ba9',
@@ -238,6 +241,13 @@ class PornHubIE(PornHubBaseIE):
}, {
'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3',
'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://pornhubthbh7ap3u.onion/view_video.php?viewkey=ph5a9813bfa7156',
+ 'only_matching': True,
}]
@staticmethod
@@ -277,6 +287,11 @@ class PornHubIE(PornHubBaseIE):
'PornHub said: %s' % error_msg,
expected=True, video_id=video_id)
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']geoBlocked["\']',
+ r'>\s*This content is unavailable in your country')):
+ self.raise_geo_restricted()
+
# video_title from flashvars contains whitespace instead of non-ASCII (see
# http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
# on that anymore.
@@ -410,17 +425,14 @@ class PornHubIE(PornHubBaseIE):
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
return
- tbr = None
- mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
- if mobj:
- if not height:
- height = int(mobj.group('height'))
- tbr = int(mobj.group('tbr'))
+ if not height:
+ height = int_or_none(self._search_regex(
+ r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height',
+ default=None))
formats.append({
'url': format_url,
'format_id': '%dp' % height if height else None,
'height': height,
- 'tbr': tbr,
})
for video_url, height in video_urls:
@@ -442,7 +454,10 @@ class PornHubIE(PornHubBaseIE):
add_format(video_url, height)
continue
add_format(video_url)
- self._sort_formats(formats)
+
+ # field_preference is unnecessary here, but kept for code-similarity with youtube-dl
+ self._sort_formats(
+ formats, field_preference=('height', 'width', 'fps', 'format_id'))
video_uploader = self._html_search_regex(
r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
@@ -516,7 +531,7 @@ class PornHubPlaylistBaseIE(PornHubBaseIE):
class PornHubUserIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph',
'playlist_mincount': 118,
@@ -545,6 +560,9 @@ class PornHubUserIE(PornHubPlaylistBaseIE):
# Same as before, multi page
'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau',
'only_matching': True,
+ }, {
+ 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph',
+ 'only_matching': True,
}]
def _real_extract(self, url):
@@ -620,7 +638,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+ _VALID_URL = r'https?://(?:[^/]+\.)?%s/(?P<id>(?:[^/]+/)*[^/?#&]+)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/model/zoe_ph/videos',
'only_matching': True,
@@ -725,6 +743,9 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
}, {
'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
+ }, {
+ 'url': 'https://pornhubthbh7ap3u.onion/model/zoe_ph/videos',
+ 'only_matching': True,
}]
@classmethod
@@ -735,7 +756,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?%s/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' % PornHubBaseIE._PORNHUB_HOST_RE
_TESTS = [{
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'info_dict': {
@@ -745,4 +766,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
}, {
'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
'only_matching': True,
+ }, {
+ 'url': 'http://pornhubthbh7ap3u.onion/pornstar/jenny-blighe/videos/upload',
+ 'only_matching': True,
}]
diff --git a/yt_dlp/extractor/umg.py b/yt_dlp/extractor/umg.py
index 8c84f2009..c1b65d189 100644
--- a/yt_dlp/extractor/umg.py
+++ b/yt_dlp/extractor/umg.py
@@ -28,7 +28,7 @@ class UMGDeIE(InfoExtractor):
def _real_extract(self, url):
video_id = self._match_id(url)
video_data = self._download_json(
- 'https://api.universal-music.de/graphql',
+ 'https://graphql.universal-music.de/',
video_id, query={
'query': '''{
universalMusic(channel:16) {
@@ -56,11 +56,9 @@ class UMGDeIE(InfoExtractor):
formats = []
def add_m3u8_format(format_id):
- m3u8_formats = self._extract_m3u8_formats(
+ formats.extend(self._extract_m3u8_formats(
hls_url_template % format_id, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal='False')
- if m3u8_formats and m3u8_formats[0].get('height'):
- formats.extend(m3u8_formats)
+ 'm3u8_native', m3u8_id='hls', fatal=False))
for f in video_data.get('formats', []):
f_url = f.get('url')
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index ad2cdb052..c16f16165 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -464,20 +464,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Invidious instances taken from https://github.com/iv-org/documentation/blob/master/Invidious-Instances.md
r'(?:www\.)?invidious\.pussthecat\.org',
r'(?:www\.)?invidious\.zee\.li',
- r'(?:(?:www|au)\.)?ytprivate\.com',
- r'(?:www\.)?invidious\.namazso\.eu',
r'(?:www\.)?invidious\.ethibox\.fr',
- r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
- r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
r'(?:www\.)?invidious\.3o7z6yfxhbw7n3za4rss6l434kmv55cgw2vuziwuigpwegswvwzqipyd\.onion',
- r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
# youtube-dl invidious instances list
r'(?:(?:www|no)\.)?invidiou\.sh',
r'(?:(?:www|fi)\.)?invidious\.snopyta\.org',
r'(?:www\.)?invidious\.kabi\.tk',
r'(?:www\.)?invidious\.mastodon\.host',
r'(?:www\.)?invidious\.zapashcanon\.fr',
- r'(?:www\.)?invidious\.kavin\.rocks',
+ r'(?:www\.)?(?:invidious(?:-us)?|piped)\.kavin\.rocks',
r'(?:www\.)?invidious\.tinfoil-hat\.net',
r'(?:www\.)?invidious\.himiko\.cloud',
r'(?:www\.)?invidious\.reallyancient\.tech',
@@ -504,6 +499,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.toot\.koeln',
r'(?:www\.)?invidious\.fdn\.fr',
r'(?:www\.)?watch\.nettohikari\.com',
+ r'(?:www\.)?invidious\.namazso\.eu',
+ r'(?:www\.)?invidious\.silkky\.cloud',
+ r'(?:www\.)?invidious\.exonip\.de',
+ r'(?:www\.)?invidious\.riverside\.rocks',
+ r'(?:www\.)?invidious\.blamefran\.net',
+ r'(?:www\.)?invidious\.moomoo\.de',
+ r'(?:www\.)?ytb\.trom\.tf',
+ r'(?:www\.)?yt\.cyberhost\.uk',
r'(?:www\.)?kgg2m7yk5aybusll\.onion',
r'(?:www\.)?qklhadlycap4cnod\.onion',
r'(?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion',
@@ -512,6 +515,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
r'(?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion',
r'(?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p',
r'(?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion',
+ r'(?:www\.)?w6ijuptxiku4xpnnaetxvnkc5vqcdu7mgns2u77qefoixi63vbvnpnqd\.onion',
+ r'(?:www\.)?kbjggqkzv65ivcqj6bumvp337z6264huv5kpkwuv6gu5yjiskvan7fad\.onion',
+ r'(?:www\.)?grwp24hodrefzvjjuccrkw3mjq4tzhaaq32amf33dzpmuxe7ilepcmad\.onion',
+ r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion',
)
_VALID_URL = r"""(?x)^
(
@@ -1923,9 +1930,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'c': 'WEB_REMIX',
'cver': '0.1',
'cplayer': 'UNIPLAYER',
- }, fatal=False)),
+ }, fatal=False) or ''),
lambda x: x['player_response'][0],
- compat_str) or '{}', video_id)
+ compat_str) or '{}', video_id, fatal=False)
ytm_streaming_data = ytm_player_response.get('streamingData') or {}
player_response = None