updated from upstream | 07/12/2021 at 12:26

author: Jesús <heckyel@hyperbola.info> 2021-12-07 12:26:51 -0500
committer: Jesús <heckyel@hyperbola.info> 2021-12-07 12:26:51 -0500
commit: 495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch)
tree: 4845e40905136556b7513b9f36e3a70e505ee4c9
parent: 25831c5572c6e1d45bc05a122312516e0d264f8d (diff)
parent: ddd24c99493483bde822944e8063064f53464ac1 (diff)
download: hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip
10 files changed, 91 insertions, 58 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 45500ab5a..e953916d5 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1370,11 +1370,11 @@ class YoutubeDL(object):
         min_wait, max_wait = self.params.get('wait_for_video')
         diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time())
         if diff is None and ie_result.get('live_status') == 'is_upcoming':
-            diff = random.randrange(min_wait or 0, max_wait) if max_wait else min_wait
+            diff = random.randrange(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait)
             self.report_warning('Release time of video is not known')
         elif (diff or 0) <= 0:
             self.report_warning('Video should already be available according to extracted info')
-        diff = min(max(diff, min_wait or 0), max_wait or float('inf'))
+        diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf'))
         self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now')
 
         wait_till = time.time() + diff
@@ -1453,6 +1453,7 @@ class YoutubeDL(object):
                     info_copy['id'] = ie.get_temp_id(ie_result['url'])
                 self.add_default_extra_info(info_copy, ie, ie_result['url'])
                 self.add_extra_info(info_copy, extra_info)
+                info_copy, _ = self.pre_process(info_copy)
                 self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
                 if self.params.get('force_write_download_archive', False):
                     self.record_download_archive(info_copy)
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index bedb5f7ab..baba5411e 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -194,12 +194,11 @@ def _real_main(argv=None):
     if opts.concurrent_fragment_downloads <= 0:
         parser.error('Concurrent fragments must be positive')
     if opts.wait_for_video is not None:
-        mobj = re.match(r'(?P<min>\d+)(?:-(?P<max>\d+))?$', opts.wait_for_video)
-        if not mobj:
-            parser.error('Invalid time range to wait')
-        min_wait, max_wait = map(int_or_none, mobj.group('min', 'max'))
-        if max_wait is not None and max_wait < min_wait:
+        min_wait, max_wait, *_ = map(parse_duration, opts.wait_for_video.split('-', 1) + [None])
+        if min_wait is None or (max_wait is None and '-' in opts.wait_for_video):
             parser.error('Invalid time range to wait')
+        elif max_wait is not None and max_wait < min_wait:
+            parser.error('Minimum time range to wait must not be longer than the maximum')
         opts.wait_for_video = (min_wait, max_wait)
 
     def parse_retries(retries, name=''):
@@ -556,13 +555,12 @@ def _real_main(argv=None):
             '_from_cli': True,
         })
     if opts.embedthumbnail:
-        already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
         postprocessors.append({
             'key': 'EmbedThumbnail',
             # already_have_thumbnail = True prevents the file from being deleted after embedding
-            'already_have_thumbnail': already_have_thumbnail
+            'already_have_thumbnail': opts.writethumbnail
         })
-        if not already_have_thumbnail:
+        if not opts.writethumbnail:
             opts.writethumbnail = True
             opts.outtmpl['pl_thumbnail'] = ''
     if opts.split_chapters:
@@ -692,8 +690,8 @@ def _real_main(argv=None):
         'allow_playlist_files': opts.allow_playlist_files,
         'clean_infojson': opts.clean_infojson,
         'getcomments': opts.getcomments,
-        'writethumbnail': opts.writethumbnail,
-        'write_all_thumbnails': opts.write_all_thumbnails,
+        'writethumbnail': opts.writethumbnail is True,
+        'write_all_thumbnails': opts.writethumbnail == 'all',
         'writelink': opts.writelink,
         'writeurllink': opts.writeurllink,
         'writewebloclink': opts.writewebloclink,
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index f766dfbb7..6ca2f38b5 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -12,8 +12,7 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     sanitized_Request,
-    unescapeHTML,
-    update_url_query,
+    traverse_obj,
     urlencode_postdata,
     USER_AGENTS,
 )
@@ -99,11 +98,13 @@ class CeskaTelevizeIE(InfoExtractor):
             playlist_description = playlist_description.replace('\xa0', ' ')
 
         if parsed_url.path.startswith('/porady/'):
-            refer_url = update_url_query(unescapeHTML(self._search_regex(
-                (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-                 r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
-                webpage, 'iframe player url', group='url')), query={'autoStart': 'true'})
-            webpage = self._download_webpage(refer_url, playlist_id)
+            next_data = self._search_nextjs_data(webpage, playlist_id)
+            idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+            if not idec:
+                raise ExtractorError('Failed to find IDEC id')
+            iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
+            webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
+                                             query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
 
         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
         if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2180f879c..d8fc5272c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1513,6 +1513,24 @@ class InfoExtractor(object):
                 webpage, 'next.js data', **kw),
             video_id, **kw)
 
+    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
+        ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
+        # not all website do this, but it can be changed
+        # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+        rectx = re.escape(context_name)
+        js, arg_keys, arg_vals = self._search_regex(
+            (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
+             r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
+            webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+
+        args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
+
+        for key, val in args.items():
+            if val in ('undefined', 'void 0'):
+                args[key] = 'null'
+
+        return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+
     @staticmethod
     def _hidden_inputs(html):
         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 4fcf1d8ed..ee888e9d3 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -675,16 +675,16 @@ class NicovideoSearchBaseIE(InfoExtractor):
             if not results:
                 break
 
+    def _search_results(self, query):
+        return self._entries(
+            self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
+
 
 class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
     IE_DESC = 'Nico video search'
     IE_NAME = 'nicovideo:search'
     _SEARCH_KEY = 'nicosearch'
 
-    def _search_results(self, query):
-        return self._entries(
-            self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-
 
 class NicovideoSearchURLIE(NicovideoSearchBaseIE):
     IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py
index 0c8221b22..c9af91188 100644
--- a/yt_dlp/extractor/ntvcojp.py
+++ b/yt_dlp/extractor/ntvcojp.py
@@ -3,8 +3,9 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
+    ExtractorError,
     smuggle_url,
+    traverse_obj,
 )
 
 
@@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor):
             'ext': 'mp4',
             'title': '桜エビと炒り卵がポイント！ 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
             'upload_date': '20181213',
-            'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+            'description': 'md5:1985b51a9abc285df0104d982a325f2a',
             'uploader_id': '3855502814001',
             'timestamp': 1544669941,
         },
@@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor):
             'skip_download': True,
         },
     }
+
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        player_config = self._parse_json(self._search_regex(
-            r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
-            webpage, 'player config'), display_id, js_to_json)
-        video_id = player_config['videoId']
-        account_id = player_config.get('account') or '3855502814001'
+        player_config = self._search_nuxt_data(webpage, display_id)
+        video_id = traverse_obj(player_config, ('movie', 'video_id'))
+        if not video_id:
+            raise ExtractorError('Failed to extract video ID for Brightcove')
+        account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
+        title = traverse_obj(player_config, ('movie', 'name'))
+        if not title:
+            og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
+            if og_title:
+                title = og_title.split('(', 1)[0].strip()
+        description = (traverse_obj(player_config, ('movie', 'description'))
+                       or self._html_search_meta(['description', 'og:description'], webpage))
         return {
             '_type': 'url_transparent',
             'id': video_id,
             'display_id': display_id,
-            'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
-            'description': self._html_search_meta(['description', 'og:description'], webpage),
+            'title': title,
+            'description': description,
             'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
             'ie_key': 'BrightcoveNew',
         }
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 747ce5199..7fee54fee 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -17,17 +17,20 @@ from ..utils import (
 class RedTubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'http://www.redtube.com/66418',
-        'md5': 'fc08071233725f26b8f014dba9590005',
+        'url': 'https://www.redtube.com/38864951',
+        'md5': '4fba70cbca3aefd25767ab4b523c9878',
         'info_dict': {
-            'id': '66418',
+            'id': '38864951',
             'ext': 'mp4',
-            'title': 'Sucked on a toilet',
-            'upload_date': '20110811',
-            'duration': 596,
+            'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
+            'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
+            'upload_date': '20210111',
+            'timestamp': 1610343109,
+            'duration': 646,
             'view_count': int,
             'age_limit': 18,
-        }
+            'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
+        },
     }, {
         'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
         'only_matching': True,
@@ -84,15 +87,25 @@ class RedTubeIE(InfoExtractor):
                 r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
                 'media definitions', default='{}'),
             video_id, fatal=False)
-        if medias and isinstance(medias, list):
-            for media in medias:
+        for media in medias if isinstance(medias, list) else []:
+            format_url = url_or_none(media.get('videoUrl'))
+            if not format_url:
+                continue
+            format_id = media.get('format')
+            quality = media.get('quality')
+            if format_id == 'hls' or (format_id == 'mp4' and not quality):
+                more_media = self._download_json(format_url, video_id, fatal=False)
+            else:
+                more_media = [media]
+            for media in more_media if isinstance(more_media, list) else []:
                 format_url = url_or_none(media.get('videoUrl'))
                 if not format_url:
                     continue
-                if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+                format_id = media.get('format')
+                if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
                         format_url, video_id, 'mp4',
-                        entry_protocol='m3u8_native', m3u8_id='hls',
+                        entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
                         fatal=False))
                     continue
                 format_id = media.get('quality')
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 7df23759a..daf1c7450 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
     try_get,
     unified_timestamp
 )
@@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor):
 
     def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
         nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
-        js, arg_keys, arg_vals = self._search_regex(
-            r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)',
-            nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals'])
-
-        args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
-
-        for key, val in args.items():
-            if val in ('undefined', 'void 0'):
-                args[key] = 'null'
-
-        return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+        return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
 
     def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
         title = game_name
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 0f807e805..120084046 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1183,7 +1183,10 @@ def parseOpts(overrideArguments=None):
     thumbnail = optparse.OptionGroup(parser, 'Thumbnail Options')
     thumbnail.add_option(
         '--write-thumbnail',
-        action='store_true', dest='writethumbnail', default=False,
+        action='callback', dest='writethumbnail', default=False,
+        # Should override --no-write-thumbnail, but not --write-all-thumbnail
+        callback=lambda option, _, __, parser: setattr(
+            parser.values, option.dest, getattr(parser.values, option.dest) or True),
         help='Write thumbnail image to disk')
     thumbnail.add_option(
         '--no-write-thumbnail',
@@ -1191,7 +1194,7 @@ def parseOpts(overrideArguments=None):
         help='Do not write thumbnail image to disk (default)')
     thumbnail.add_option(
         '--write-all-thumbnails',
-        action='store_true', dest='write_all_thumbnails', default=False,
+        action='store_const', dest='writethumbnail', const='all',
         help='Write all thumbnail image formats to disk')
     thumbnail.add_option(
         '--list-thumbnails',
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 18d531202..9172151f0 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3972,8 +3972,9 @@ def strftime_or_none(timestamp, date_format, default=None):
 def parse_duration(s):
     if not isinstance(s, compat_basestring):
         return None
-
     s = s.strip()
+    if not s:
+        return None
 
     days, hours, mins, secs, ms = [None] * 5
     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
author	Jesús <heckyel@hyperbola.info>	2021-12-07 12:26:51 -0500
committer	Jesús <heckyel@hyperbola.info>	2021-12-07 12:26:51 -0500
commit	495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch)
tree	4845e40905136556b7513b9f36e3a70e505ee4c9
parent	25831c5572c6e1d45bc05a122312516e0d264f8d (diff)
parent	ddd24c99493483bde822944e8063064f53464ac1 (diff)
download	hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip