aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-12-07 12:26:51 -0500
committerJesús <heckyel@hyperbola.info>2021-12-07 12:26:51 -0500
commit495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch)
tree4845e40905136556b7513b9f36e3a70e505ee4c9 /yt_dlp/extractor
parent25831c5572c6e1d45bc05a122312516e0d264f8d (diff)
parentddd24c99493483bde822944e8063064f53464ac1 (diff)
downloadhypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip
updated from upstream | 07/12/2021 at 12:26
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/ceskatelevize.py15
-rw-r--r--yt_dlp/extractor/common.py18
-rw-r--r--yt_dlp/extractor/niconico.py8
-rw-r--r--yt_dlp/extractor/ntvcojp.py27
-rw-r--r--yt_dlp/extractor/redtube.py35
-rw-r--r--yt_dlp/extractor/sovietscloset.py13
6 files changed, 73 insertions, 43 deletions
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index f766dfbb7..6ca2f38b5 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -12,8 +12,7 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
- unescapeHTML,
- update_url_query,
+ traverse_obj,
urlencode_postdata,
USER_AGENTS,
)
@@ -99,11 +98,13 @@ class CeskaTelevizeIE(InfoExtractor):
playlist_description = playlist_description.replace('\xa0', ' ')
if parsed_url.path.startswith('/porady/'):
- refer_url = update_url_query(unescapeHTML(self._search_regex(
- (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
- webpage, 'iframe player url', group='url')), query={'autoStart': 'true'})
- webpage = self._download_webpage(refer_url, playlist_id)
+ next_data = self._search_nextjs_data(webpage, playlist_id)
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if not idec:
+ raise ExtractorError('Failed to find IDEC id')
+ iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
+ webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
+ query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2180f879c..d8fc5272c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1513,6 +1513,24 @@ class InfoExtractor(object):
webpage, 'next.js data', **kw),
video_id, **kw)
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
+ ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
+ # not all website do this, but it can be changed
+ # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+ rectx = re.escape(context_name)
+ js, arg_keys, arg_vals = self._search_regex(
+ (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
+ r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
+ webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+
+ args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
+
+ for key, val in args.items():
+ if val in ('undefined', 'void 0'):
+ args[key] = 'null'
+
+ return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+
@staticmethod
def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 4fcf1d8ed..ee888e9d3 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -675,16 +675,16 @@ class NicovideoSearchBaseIE(InfoExtractor):
if not results:
break
+ def _search_results(self, query):
+ return self._entries(
+ self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
+
class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
IE_DESC = 'Nico video search'
IE_NAME = 'nicovideo:search'
_SEARCH_KEY = 'nicosearch'
- def _search_results(self, query):
- return self._entries(
- self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-
class NicovideoSearchURLIE(NicovideoSearchBaseIE):
IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py
index 0c8221b22..c9af91188 100644
--- a/yt_dlp/extractor/ntvcojp.py
+++ b/yt_dlp/extractor/ntvcojp.py
@@ -3,8 +3,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- js_to_json,
+ ExtractorError,
smuggle_url,
+ traverse_obj,
)
@@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor):
'ext': 'mp4',
'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
'upload_date': '20181213',
- 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+ 'description': 'md5:1985b51a9abc285df0104d982a325f2a',
'uploader_id': '3855502814001',
'timestamp': 1544669941,
},
@@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor):
'skip_download': True,
},
}
+
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- player_config = self._parse_json(self._search_regex(
- r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
- webpage, 'player config'), display_id, js_to_json)
- video_id = player_config['videoId']
- account_id = player_config.get('account') or '3855502814001'
+ player_config = self._search_nuxt_data(webpage, display_id)
+ video_id = traverse_obj(player_config, ('movie', 'video_id'))
+ if not video_id:
+ raise ExtractorError('Failed to extract video ID for Brightcove')
+ account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
+ title = traverse_obj(player_config, ('movie', 'name'))
+ if not title:
+ og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
+ if og_title:
+ title = og_title.split('(', 1)[0].strip()
+ description = (traverse_obj(player_config, ('movie', 'description'))
+ or self._html_search_meta(['description', 'og:description'], webpage))
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
- 'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
- 'description': self._html_search_meta(['description', 'og:description'], webpage),
+ 'title': title,
+ 'description': description,
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
'ie_key': 'BrightcoveNew',
}
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 747ce5199..7fee54fee 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -17,17 +17,20 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'http://www.redtube.com/66418',
- 'md5': 'fc08071233725f26b8f014dba9590005',
+ 'url': 'https://www.redtube.com/38864951',
+ 'md5': '4fba70cbca3aefd25767ab4b523c9878',
'info_dict': {
- 'id': '66418',
+ 'id': '38864951',
'ext': 'mp4',
- 'title': 'Sucked on a toilet',
- 'upload_date': '20110811',
- 'duration': 596,
+ 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
+ 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
+ 'upload_date': '20210111',
+ 'timestamp': 1610343109,
+ 'duration': 646,
'view_count': int,
'age_limit': 18,
- }
+ 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
+ },
}, {
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
'only_matching': True,
@@ -84,15 +87,25 @@ class RedTubeIE(InfoExtractor):
r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
'media definitions', default='{}'),
video_id, fatal=False)
- if medias and isinstance(medias, list):
- for media in medias:
+ for media in medias if isinstance(medias, list) else []:
+ format_url = url_or_none(media.get('videoUrl'))
+ if not format_url:
+ continue
+ format_id = media.get('format')
+ quality = media.get('quality')
+ if format_id == 'hls' or (format_id == 'mp4' and not quality):
+ more_media = self._download_json(format_url, video_id, fatal=False)
+ else:
+ more_media = [media]
+ for media in more_media if isinstance(more_media, list) else []:
format_url = url_or_none(media.get('videoUrl'))
if not format_url:
continue
- if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+ format_id = media.get('format')
+ if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
+ entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
fatal=False))
continue
format_id = media.get('quality')
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 7df23759a..daf1c7450 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- js_to_json,
try_get,
unified_timestamp
)
@@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor):
def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
- js, arg_keys, arg_vals = self._search_regex(
- r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)',
- nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals'])
-
- args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
-
- for key, val in args.items():
- if val in ('undefined', 'void 0'):
- args[key] = 'null'
-
- return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+ return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
title = game_name