aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesús <heckyel@hyperbola.info>2021-12-07 12:26:51 -0500
committerJesús <heckyel@hyperbola.info>2021-12-07 12:26:51 -0500
commit495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch)
tree4845e40905136556b7513b9f36e3a70e505ee4c9
parent25831c5572c6e1d45bc05a122312516e0d264f8d (diff)
parentddd24c99493483bde822944e8063064f53464ac1 (diff)
downloadhypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip
updated from upstream | 07/12/2021 at 12:26
-rw-r--r--yt_dlp/YoutubeDL.py5
-rw-r--r--yt_dlp/__init__.py18
-rw-r--r--yt_dlp/extractor/ceskatelevize.py15
-rw-r--r--yt_dlp/extractor/common.py18
-rw-r--r--yt_dlp/extractor/niconico.py8
-rw-r--r--yt_dlp/extractor/ntvcojp.py27
-rw-r--r--yt_dlp/extractor/redtube.py35
-rw-r--r--yt_dlp/extractor/sovietscloset.py13
-rw-r--r--yt_dlp/options.py7
-rw-r--r--yt_dlp/utils.py3
10 files changed, 91 insertions, 58 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 45500ab5a..e953916d5 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1370,11 +1370,11 @@ class YoutubeDL(object):
min_wait, max_wait = self.params.get('wait_for_video')
diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
if diff is None and ie_result.get('live_status') == 'is_upcoming':
- diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait
+ diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
self.report_warning('Release time of video is not known')
elif (diff or 0) <= 0:
self.report_warning('Video should already be available according to extracted info')
- diff = min(max(diff, min_wait or 0), max_wait or float('inf'))
+ diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
wait_till = time.time() + diff
@@ -1453,6 +1453,7 @@ class YoutubeDL(object):
info_copy['id'] = ie.get_temp_id(ie_result['url'])
self.add_default_extra_info(info_copy, ie, ie_result['url'])
self.add_extra_info(info_copy, extra_info)
+ info_copy, _ = self.pre_process(info_copy)
self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
if self.params.get('force_write_download_archive', False):
self.record_download_archive(info_copy)
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index bedb5f7ab..baba5411e 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -194,12 +194,11 @@ def _real_main(argv=None):
if opts.concurrent_fragment_downloads <= 0:
parser.error('Concurrent fragments must be positive')
if opts.wait_for_video is not None:
- mobj = re.match(r'(?P<min>\d+)(?:-(?P<max>\d+))?$', opts.wait_for_video)
- if not mobj:
- parser.error('Invalid time range to wait')
- min_wait, max_wait = map(int_or_none, mobj.group('min', 'max'))
- if max_wait is not None and max_wait < min_wait:
+ min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
+ if min_wait is None or (max_wait is None and '-' in opts.wait_for_video):
parser.error('Invalid time range to wait')
+ elif max_wait is not None and max_wait < min_wait:
+ parser.error('Minimum time range to wait must not be longer than the maximum')
opts.wait_for_video = (min_wait, max_wait)
def parse_retries(retries, name=''):
@@ -556,13 +555,12 @@ def _real_main(argv=None):
'_from_cli': True,
})
if opts.embedthumbnail:
- already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
postprocessors.append({
'key': 'EmbedThumbnail',
# already_have_thumbnail = True prevents the file from being deleted after embedding
- 'already_have_thumbnail': already_have_thumbnail
+ 'already_have_thumbnail': opts.writethumbnail
})
- if not already_have_thumbnail:
+ if not opts.writethumbnail:
opts.writethumbnail = True
opts.outtmpl['pl_thumbnail'] = ''
if opts.split_chapters:
@@ -692,8 +690,8 @@ def _real_main(argv=None):
'allow_playlist_files': opts.allow_playlist_files,
'clean_infojson': opts.clean_infojson,
'getcomments': opts.getcomments,
- 'writethumbnail': opts.writethumbnail,
- 'write_all_thumbnails': opts.write_all_thumbnails,
+ 'writethumbnail': opts.writethumbnail is True,
+ 'write_all_thumbnails': opts.writethumbnail == 'all',
'writelink': opts.writelink,
'writeurllink': opts.writeurllink,
'writewebloclink': opts.writewebloclink,
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index f766dfbb7..6ca2f38b5 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -12,8 +12,7 @@ from ..utils import (
ExtractorError,
float_or_none,
sanitized_Request,
- unescapeHTML,
- update_url_query,
+ traverse_obj,
urlencode_postdata,
USER_AGENTS,
)
@@ -99,11 +98,13 @@ class CeskaTelevizeIE(InfoExtractor):
playlist_description = playlist_description.replace('\xa0', ' ')
if parsed_url.path.startswith('/porady/'):
- refer_url = update_url_query(unescapeHTML(self._search_regex(
- (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
- webpage, 'iframe player url', group='url')), query={'autoStart': 'true'})
- webpage = self._download_webpage(refer_url, playlist_id)
+ next_data = self._search_nextjs_data(webpage, playlist_id)
+ idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+ if not idec:
+ raise ExtractorError('Failed to find IDEC id')
+ iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
+ webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
+ query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2180f879c..d8fc5272c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1513,6 +1513,24 @@ class InfoExtractor(object):
webpage, 'next.js data', **kw),
video_id, **kw)
+ def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
+ ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
+ # not all website do this, but it can be changed
+ # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+ rectx = re.escape(context_name)
+ js, arg_keys, arg_vals = self._search_regex(
+ (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
+ r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
+ webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+
+ args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
+
+ for key, val in args.items():
+ if val in ('undefined', 'void 0'):
+ args[key] = 'null'
+
+ return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+
@staticmethod
def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 4fcf1d8ed..ee888e9d3 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -675,16 +675,16 @@ class NicovideoSearchBaseIE(InfoExtractor):
if not results:
break
+ def _search_results(self, query):
+ return self._entries(
+ self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
+
class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
IE_DESC = 'Nico video search'
IE_NAME = 'nicovideo:search'
_SEARCH_KEY = 'nicosearch'
- def _search_results(self, query):
- return self._entries(
- self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-
class NicovideoSearchURLIE(NicovideoSearchBaseIE):
IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py
index 0c8221b22..c9af91188 100644
--- a/yt_dlp/extractor/ntvcojp.py
+++ b/yt_dlp/extractor/ntvcojp.py
@@ -3,8 +3,9 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- js_to_json,
+ ExtractorError,
smuggle_url,
+ traverse_obj,
)
@@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor):
'ext': 'mp4',
'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
'upload_date': '20181213',
- 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+ 'description': 'md5:1985b51a9abc285df0104d982a325f2a',
'uploader_id': '3855502814001',
'timestamp': 1544669941,
},
@@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor):
'skip_download': True,
},
}
+
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
- player_config = self._parse_json(self._search_regex(
- r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
- webpage, 'player config'), display_id, js_to_json)
- video_id = player_config['videoId']
- account_id = player_config.get('account') or '3855502814001'
+ player_config = self._search_nuxt_data(webpage, display_id)
+ video_id = traverse_obj(player_config, ('movie', 'video_id'))
+ if not video_id:
+ raise ExtractorError('Failed to extract video ID for Brightcove')
+ account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
+ title = traverse_obj(player_config, ('movie', 'name'))
+ if not title:
+ og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
+ if og_title:
+ title = og_title.split('(', 1)[0].strip()
+ description = (traverse_obj(player_config, ('movie', 'description'))
+ or self._html_search_meta(['description', 'og:description'], webpage))
return {
'_type': 'url_transparent',
'id': video_id,
'display_id': display_id,
- 'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
- 'description': self._html_search_meta(['description', 'og:description'], webpage),
+ 'title': title,
+ 'description': description,
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
'ie_key': 'BrightcoveNew',
}
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 747ce5199..7fee54fee 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -17,17 +17,20 @@ from ..utils import (
class RedTubeIE(InfoExtractor):
_VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
_TESTS = [{
- 'url': 'http://www.redtube.com/66418',
- 'md5': 'fc08071233725f26b8f014dba9590005',
+ 'url': 'https://www.redtube.com/38864951',
+ 'md5': '4fba70cbca3aefd25767ab4b523c9878',
'info_dict': {
- 'id': '66418',
+ 'id': '38864951',
'ext': 'mp4',
- 'title': 'Sucked on a toilet',
- 'upload_date': '20110811',
- 'duration': 596,
+ 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
+ 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
+ 'upload_date': '20210111',
+ 'timestamp': 1610343109,
+ 'duration': 646,
'view_count': int,
'age_limit': 18,
- }
+ 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
+ },
}, {
'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
'only_matching': True,
@@ -84,15 +87,25 @@ class RedTubeIE(InfoExtractor):
r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
'media definitions', default='{}'),
video_id, fatal=False)
- if medias and isinstance(medias, list):
- for media in medias:
+ for media in medias if isinstance(medias, list) else []:
+ format_url = url_or_none(media.get('videoUrl'))
+ if not format_url:
+ continue
+ format_id = media.get('format')
+ quality = media.get('quality')
+ if format_id == 'hls' or (format_id == 'mp4' and not quality):
+ more_media = self._download_json(format_url, video_id, fatal=False)
+ else:
+ more_media = [media]
+ for media in more_media if isinstance(more_media, list) else []:
format_url = url_or_none(media.get('videoUrl'))
if not format_url:
continue
- if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+ format_id = media.get('format')
+ if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
+ entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
fatal=False))
continue
format_id = media.get('quality')
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 7df23759a..daf1c7450 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import (
- js_to_json,
try_get,
unified_timestamp
)
@@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor):
def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
- js, arg_keys, arg_vals = self._search_regex(
- r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)',
- nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals'])
-
- args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
-
- for key, val in args.items():
- if val in ('undefined', 'void 0'):
- args[key] = 'null'
-
- return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+ return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
title = game_name
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 0f807e805..120084046 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1183,7 +1183,10 @@ def parseOpts(overrideArguments=None):
thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
thumbnail.add_option(
'--write-thumbnail',
- action='store_true', dest='writethumbnail', default=False,
+ action='callback', dest='writethumbnail', default=False,
+ # Should override --no-write-thumbnail, but not --write-all-thumbnail
+ callback=lambda option, _, __, parser: setattr(
+ parser.values, option.dest, getattr(parser.values, option.dest) or True),
help='Write thumbnail image to disk')
thumbnail.add_option(
'--no-write-thumbnail',
@@ -1191,7 +1194,7 @@ def parseOpts(overrideArguments=None):
help='Do not write thumbnail image to disk (default)')
thumbnail.add_option(
'--write-all-thumbnails',
- action='store_true', dest='write_all_thumbnails', default=False,
+ action='store_const', dest='writethumbnail', const='all',
help='Write all thumbnail image formats to disk')
thumbnail.add_option(
'--list-thumbnails',
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 18d531202..9172151f0 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3972,8 +3972,9 @@ def strftime_or_none(timestamp, date_format, default=None):
def parse_duration(s):
if not isinstance(s, compat_basestring):
return None
-
s = s.strip()
+ if not s:
+ return None
days, hours, mins, secs, ms = [None] * 5
m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)