diff options
-rw-r--r-- | yt_dlp/YoutubeDL.py | 5 | ||||
-rw-r--r-- | yt_dlp/__init__.py | 18 | ||||
-rw-r--r-- | yt_dlp/extractor/ceskatelevize.py | 15 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 18 | ||||
-rw-r--r-- | yt_dlp/extractor/niconico.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/ntvcojp.py | 27 | ||||
-rw-r--r-- | yt_dlp/extractor/redtube.py | 35 | ||||
-rw-r--r-- | yt_dlp/extractor/sovietscloset.py | 13 | ||||
-rw-r--r-- | yt_dlp/options.py | 7 | ||||
-rw-r--r-- | yt_dlp/utils.py | 3 |
10 files changed, 91 insertions, 58 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 45500ab5a..e953916d5 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1370,11 +1370,11 @@ class YoutubeDL(object): min_wait, max_wait = self.params.get('wait_for_video') diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) if diff is None and ie_result.get('live_status') == 'is_upcoming': - diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait + diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait) self.report_warning('Release time of video is not known') elif (diff or 0) <= 0: self.report_warning('Video should already be available according to extracted info') - diff = min(max(diff, min_wait or 0), max_wait or float('inf')) + diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') wait_till = time.time() + diff @@ -1453,6 +1453,7 @@ class YoutubeDL(object): info_copy['id'] = ie.get_temp_id(ie_result['url']) self.add_default_extra_info(info_copy, ie, ie_result['url']) self.add_extra_info(info_copy, extra_info) + info_copy, _ = self.pre_process(info_copy) self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True) if self.params.get('force_write_download_archive', False): self.record_download_archive(info_copy) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index bedb5f7ab..baba5411e 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -194,12 +194,11 @@ def _real_main(argv=None): if opts.concurrent_fragment_downloads <= 0: parser.error('Concurrent fragments must be positive') if opts.wait_for_video is not None: - mobj = re.match(r'(?P<min>\d+)(?:-(?P<max>\d+))?$', opts.wait_for_video) - if not mobj: - parser.error('Invalid time range to wait') - min_wait, max_wait = map(int_or_none, mobj.group('min', 'max')) - if max_wait is not None and max_wait < min_wait: + min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None]) + if min_wait is None or (max_wait is None and '-' in opts.wait_for_video): parser.error('Invalid time range to wait') + elif max_wait is not None and max_wait < min_wait: + parser.error('Minimum time range to wait must not be longer than the maximum') opts.wait_for_video = (min_wait, max_wait) def parse_retries(retries, name=''): @@ -556,13 +555,12 @@ def _real_main(argv=None): '_from_cli': True, }) if opts.embedthumbnail: - already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails postprocessors.append({ 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding - 'already_have_thumbnail': already_have_thumbnail + 'already_have_thumbnail': opts.writethumbnail }) - if not already_have_thumbnail: + if not opts.writethumbnail: opts.writethumbnail = True opts.outtmpl['pl_thumbnail'] = '' if opts.split_chapters: @@ -692,8 +690,8 @@ def _real_main(argv=None): 'allow_playlist_files': opts.allow_playlist_files, 'clean_infojson': opts.clean_infojson, 'getcomments': opts.getcomments, - 'writethumbnail': opts.writethumbnail, - 'write_all_thumbnails': opts.write_all_thumbnails, + 'writethumbnail': opts.writethumbnail is True, + 'write_all_thumbnails': opts.writethumbnail == 'all', 'writelink': opts.writelink, 'writeurllink': opts.writeurllink, 'writewebloclink': opts.writewebloclink, diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index f766dfbb7..6ca2f38b5 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -12,8 +12,7 @@ from ..utils import ( ExtractorError, float_or_none, sanitized_Request, - unescapeHTML, - update_url_query, + traverse_obj, urlencode_postdata, USER_AGENTS, ) @@ -99,11 +98,13 @@ class CeskaTelevizeIE(InfoExtractor): playlist_description = playlist_description.replace('\xa0', ' ') if parsed_url.path.startswith('/porady/'): - refer_url = update_url_query(unescapeHTML(self._search_regex( - (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={'autoStart': 'true'}) - webpage = self._download_webpage(refer_url, playlist_id) + next_data = self._search_nextjs_data(webpage, playlist_id) + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if not idec: + raise ExtractorError('Failed to find IDEC id') + iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id) + webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id, + query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec}) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s</p>' % NOT_AVAILABLE_STRING in webpage: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 2180f879c..d8fc5272c 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1513,6 +1513,24 @@ class InfoExtractor(object): webpage, 'next.js data', **kw), video_id, **kw) + def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'): + ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. ''' + # not all website do this, but it can be changed + # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source + rectx = re.escape(context_name) + js, arg_keys, arg_vals = self._search_regex( + (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx, + r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx), + webpage, context_name, group=['js', 'arg_keys', 'arg_vals']) + + args = dict(zip(arg_keys.split(','), arg_vals.split(','))) + + for key, val in args.items(): + if val in ('undefined', 'void 0'): + args[key] = 'null' + + return self._parse_json(js_to_json(js, args), video_id)['data'][0] + @staticmethod def _hidden_inputs(html): html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4fcf1d8ed..ee888e9d3 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -675,16 +675,16 @@ class NicovideoSearchBaseIE(InfoExtractor): if not results: break + def _search_results(self, query): + return self._entries( + self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) + class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor): IE_DESC = 'Nico video search' IE_NAME = 'nicovideo:search' _SEARCH_KEY = 'nicosearch' - def _search_results(self, query): - return self._entries( - self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query) - class NicovideoSearchURLIE(NicovideoSearchBaseIE): IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url' diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py index 0c8221b22..c9af91188 100644 --- a/yt_dlp/extractor/ntvcojp.py +++ b/yt_dlp/extractor/ntvcojp.py @@ -3,8 +3,9 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - js_to_json, + ExtractorError, smuggle_url, + traverse_obj, ) @@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor): 'ext': 'mp4', 'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸', 'upload_date': '20181213', - 'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9', + 'description': 'md5:1985b51a9abc285df0104d982a325f2a', 'uploader_id': '3855502814001', 'timestamp': 1544669941, }, @@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor): 'skip_download': True, }, } + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - player_config = self._parse_json(self._search_regex( - r'(?s)PLAYER_CONFIG\s*=\s*({.+?})', - webpage, 'player config'), display_id, js_to_json) - video_id = player_config['videoId'] - account_id = player_config.get('account') or '3855502814001' + player_config = self._search_nuxt_data(webpage, display_id) + video_id = traverse_obj(player_config, ('movie', 'video_id')) + if not video_id: + raise ExtractorError('Failed to extract video ID for Brightcove') + account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001' + title = traverse_obj(player_config, ('movie', 'name')) + if not title: + og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title')) + if og_title: + title = og_title.split('(', 1)[0].strip() + description = (traverse_obj(player_config, ('movie', 'description')) + or self._html_search_meta(['description', 'og:description'], webpage)) return { '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, - 'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(), - 'description': self._html_search_meta(['description', 'og:description'], webpage), + 'title': title, + 'description': description, 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}), 'ie_key': 'BrightcoveNew', } diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py index 747ce5199..7fee54fee 100644 --- a/yt_dlp/extractor/redtube.py +++ b/yt_dlp/extractor/redtube.py @@ -17,17 +17,20 @@ from ..utils import ( class RedTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'http://www.redtube.com/66418', - 'md5': 'fc08071233725f26b8f014dba9590005', + 'url': 'https://www.redtube.com/38864951', + 'md5': '4fba70cbca3aefd25767ab4b523c9878', 'info_dict': { - 'id': '66418', + 'id': '38864951', 'ext': 'mp4', - 'title': 'Sucked on a toilet', - 'upload_date': '20110811', - 'duration': 596, + 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu', + 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu', + 'upload_date': '20210111', + 'timestamp': 1610343109, + 'duration': 646, 'view_count': int, 'age_limit': 18, - } + 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg', + }, }, { 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 'only_matching': True, @@ -84,15 +87,25 @@ class RedTubeIE(InfoExtractor): r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, 'media definitions', default='{}'), video_id, fatal=False) - if medias and isinstance(medias, list): - for media in medias: + for media in medias if isinstance(medias, list) else []: + format_url = url_or_none(media.get('videoUrl')) + if not format_url: + continue + format_id = media.get('format') + quality = media.get('quality') + if format_id == 'hls' or (format_id == 'mp4' and not quality): + more_media = self._download_json(format_url, video_id, fatal=False) + else: + more_media = [media] + for media in more_media if isinstance(more_media, list) else []: format_url = url_or_none(media.get('videoUrl')) if not format_url: continue - if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8': + format_id = media.get('format') + if format_id == 'hls' or determine_ext(format_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id='hls', + entry_protocol='m3u8_native', m3u8_id=format_id or 'hls', fatal=False)) continue format_id = media.get('quality') diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 7df23759a..daf1c7450 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - js_to_json, try_get, unified_timestamp ) @@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor): def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name): nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__') - js, arg_keys, arg_vals = self._search_regex( - r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)', - nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals']) - - args = dict(zip(arg_keys.split(','), arg_vals.split(','))) - - for key, val in args.items(): - if val in ('undefined', 'void 0'): - args[key] = 'null' - - return self._parse_json(js_to_json(js, args), video_id)['data'][0] + return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__') def video_meta(self, video_id, game_name, category_name, episode_number, stream_date): title = game_name diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0f807e805..120084046 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1183,7 +1183,10 @@ def parseOpts(overrideArguments=None): thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options') thumbnail.add_option( '--write-thumbnail', - action='store_true', dest='writethumbnail', default=False, + action='callback', dest='writethumbnail', default=False, + # Should override --no-write-thumbnail, but not --write-all-thumbnail + callback=lambda option, _, __, parser: setattr( + parser.values, option.dest, getattr(parser.values, option.dest) or True), help='Write thumbnail image to disk') thumbnail.add_option( '--no-write-thumbnail', @@ -1191,7 +1194,7 @@ def parseOpts(overrideArguments=None): help='Do not write thumbnail image to disk (default)') thumbnail.add_option( '--write-all-thumbnails', - action='store_true', dest='write_all_thumbnails', default=False, + action='store_const', dest='writethumbnail', const='all', help='Write all thumbnail image formats to disk') thumbnail.add_option( '--list-thumbnails', diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 18d531202..9172151f0 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3972,8 +3972,9 @@ def strftime_or_none(timestamp, date_format, default=None): def parse_duration(s): if not isinstance(s, compat_basestring): return None - s = s.strip() + if not s: + return None days, hours, mins, secs, ms = [None] * 5 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s) |