updated from upstream | 07/12/2021 at 12:26

author: Jesús <heckyel@hyperbola.info> 2021-12-07 12:26:51 -0500
committer: Jesús <heckyel@hyperbola.info> 2021-12-07 12:26:51 -0500
commit: 495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch)
tree: 4845e40905136556b7513b9f36e3a70e505ee4c9 /yt_dlp/extractor
parent: 25831c5572c6e1d45bc05a122312516e0d264f8d (diff)
parent: ddd24c99493483bde822944e8063064f53464ac1 (diff)
download: hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz
hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip
6 files changed, 73 insertions, 43 deletions
diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py
index f766dfbb7..6ca2f38b5 100644
--- a/yt_dlp/extractor/ceskatelevize.py
+++ b/yt_dlp/extractor/ceskatelevize.py
@@ -12,8 +12,7 @@ from ..utils import (
     ExtractorError,
     float_or_none,
     sanitized_Request,
-    unescapeHTML,
-    update_url_query,
+    traverse_obj,
     urlencode_postdata,
     USER_AGENTS,
 )
@@ -99,11 +98,13 @@ class CeskaTelevizeIE(InfoExtractor):
             playlist_description = playlist_description.replace('\xa0', ' ')
 
         if parsed_url.path.startswith('/porady/'):
-            refer_url = update_url_query(unescapeHTML(self._search_regex(
-                (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-                 r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
-                webpage, 'iframe player url', group='url')), query={'autoStart': 'true'})
-            webpage = self._download_webpage(refer_url, playlist_id)
+            next_data = self._search_nextjs_data(webpage, playlist_id)
+            idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False)
+            if not idec:
+                raise ExtractorError('Failed to find IDEC id')
+            iframe_hash = self._download_webpage('https://www.ceskatelevize.cz/v-api/iframe-hash/', playlist_id)
+            webpage = self._download_webpage('https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', playlist_id,
+                                             query={'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', 'IDEC': idec})
 
         NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
         if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 2180f879c..d8fc5272c 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1513,6 +1513,24 @@ class InfoExtractor(object):
                 webpage, 'next.js data', **kw),
             video_id, **kw)
 
+    def _search_nuxt_data(self, webpage, video_id, context_name='__NUXT__'):
+        ''' Parses Nuxt.js metadata. This works as long as the function __NUXT__ invokes is a pure function. '''
+        # not all website do this, but it can be changed
+        # https://stackoverflow.com/questions/67463109/how-to-change-or-hide-nuxt-and-nuxt-keyword-in-page-source
+        rectx = re.escape(context_name)
+        js, arg_keys, arg_vals = self._search_regex(
+            (r'<script>window\.%s=\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.+?)\)\);?</script>' % rectx,
+             r'%s\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)' % rectx),
+            webpage, context_name, group=['js', 'arg_keys', 'arg_vals'])
+
+        args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
+
+        for key, val in args.items():
+            if val in ('undefined', 'void 0'):
+                args[key] = 'null'
+
+        return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+
     @staticmethod
     def _hidden_inputs(html):
         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 4fcf1d8ed..ee888e9d3 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -675,16 +675,16 @@ class NicovideoSearchBaseIE(InfoExtractor):
             if not results:
                 break
 
+    def _search_results(self, query):
+        return self._entries(
+            self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
+
 
 class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
     IE_DESC = 'Nico video search'
     IE_NAME = 'nicovideo:search'
     _SEARCH_KEY = 'nicosearch'
 
-    def _search_results(self, query):
-        return self._entries(
-            self._proto_relative_url(f'//www.nicovideo.jp/search/{query}'), query)
-
 
 class NicovideoSearchURLIE(NicovideoSearchBaseIE):
     IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py
index 0c8221b22..c9af91188 100644
--- a/yt_dlp/extractor/ntvcojp.py
+++ b/yt_dlp/extractor/ntvcojp.py
@@ -3,8 +3,9 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
+    ExtractorError,
     smuggle_url,
+    traverse_obj,
 )
 
 
@@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor):
             'ext': 'mp4',
             'title': '桜エビと炒り卵がポイント！ 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
             'upload_date': '20181213',
-            'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+            'description': 'md5:1985b51a9abc285df0104d982a325f2a',
             'uploader_id': '3855502814001',
             'timestamp': 1544669941,
         },
@@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor):
             'skip_download': True,
         },
     }
+
     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
 
     def _real_extract(self, url):
         display_id = self._match_id(url)
         webpage = self._download_webpage(url, display_id)
-        player_config = self._parse_json(self._search_regex(
-            r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
-            webpage, 'player config'), display_id, js_to_json)
-        video_id = player_config['videoId']
-        account_id = player_config.get('account') or '3855502814001'
+        player_config = self._search_nuxt_data(webpage, display_id)
+        video_id = traverse_obj(player_config, ('movie', 'video_id'))
+        if not video_id:
+            raise ExtractorError('Failed to extract video ID for Brightcove')
+        account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
+        title = traverse_obj(player_config, ('movie', 'name'))
+        if not title:
+            og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
+            if og_title:
+                title = og_title.split('(', 1)[0].strip()
+        description = (traverse_obj(player_config, ('movie', 'description'))
+                       or self._html_search_meta(['description', 'og:description'], webpage))
         return {
             '_type': 'url_transparent',
             'id': video_id,
             'display_id': display_id,
-            'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
-            'description': self._html_search_meta(['description', 'og:description'], webpage),
+            'title': title,
+            'description': description,
             'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
             'ie_key': 'BrightcoveNew',
         }
diff --git a/yt_dlp/extractor/redtube.py b/yt_dlp/extractor/redtube.py
index 747ce5199..7fee54fee 100644
--- a/yt_dlp/extractor/redtube.py
+++ b/yt_dlp/extractor/redtube.py
@@ -17,17 +17,20 @@ from ..utils import (
 class RedTubeIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
     _TESTS = [{
-        'url': 'http://www.redtube.com/66418',
-        'md5': 'fc08071233725f26b8f014dba9590005',
+        'url': 'https://www.redtube.com/38864951',
+        'md5': '4fba70cbca3aefd25767ab4b523c9878',
         'info_dict': {
-            'id': '66418',
+            'id': '38864951',
             'ext': 'mp4',
-            'title': 'Sucked on a toilet',
-            'upload_date': '20110811',
-            'duration': 596,
+            'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu',
+            'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu',
+            'upload_date': '20210111',
+            'timestamp': 1610343109,
+            'duration': 646,
             'view_count': int,
             'age_limit': 18,
-        }
+            'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg',
+        },
     }, {
         'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
         'only_matching': True,
@@ -84,15 +87,25 @@ class RedTubeIE(InfoExtractor):
                 r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
                 'media definitions', default='{}'),
             video_id, fatal=False)
-        if medias and isinstance(medias, list):
-            for media in medias:
+        for media in medias if isinstance(medias, list) else []:
+            format_url = url_or_none(media.get('videoUrl'))
+            if not format_url:
+                continue
+            format_id = media.get('format')
+            quality = media.get('quality')
+            if format_id == 'hls' or (format_id == 'mp4' and not quality):
+                more_media = self._download_json(format_url, video_id, fatal=False)
+            else:
+                more_media = [media]
+            for media in more_media if isinstance(more_media, list) else []:
                 format_url = url_or_none(media.get('videoUrl'))
                 if not format_url:
                     continue
-                if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+                format_id = media.get('format')
+                if format_id == 'hls' or determine_ext(format_url) == 'm3u8':
                     formats.extend(self._extract_m3u8_formats(
                         format_url, video_id, 'mp4',
-                        entry_protocol='m3u8_native', m3u8_id='hls',
+                        entry_protocol='m3u8_native', m3u8_id=format_id or 'hls',
                         fatal=False))
                     continue
                 format_id = media.get('quality')
diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py
index 7df23759a..daf1c7450 100644
--- a/yt_dlp/extractor/sovietscloset.py
+++ b/yt_dlp/extractor/sovietscloset.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 from .common import InfoExtractor
 from ..utils import (
-    js_to_json,
     try_get,
     unified_timestamp
 )
@@ -14,17 +13,7 @@ class SovietsClosetBaseIE(InfoExtractor):
 
     def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
         nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
-        js, arg_keys, arg_vals = self._search_regex(
-            r'__NUXT_JSONP__\(.*?\(function\((?P<arg_keys>.*?)\)\{return\s(?P<js>\{.*?\})\}\((?P<arg_vals>.*?)\)',
-            nuxt_jsonp, '__NUXT_JSONP__', group=['js', 'arg_keys', 'arg_vals'])
-
-        args = dict(zip(arg_keys.split(','), arg_vals.split(',')))
-
-        for key, val in args.items():
-            if val in ('undefined', 'void 0'):
-                args[key] = 'null'
-
-        return self._parse_json(js_to_json(js, args), video_id)['data'][0]
+        return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
 
     def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
         title = game_name
author	Jesús <heckyel@hyperbola.info>	2021-12-07 12:26:51 -0500
committer	Jesús <heckyel@hyperbola.info>	2021-12-07 12:26:51 -0500
commit	495746b9a6d4d32ddfa39ed908092d90a7cd5f3f (patch)
tree	4845e40905136556b7513b9f36e3a70e505ee4c9 /yt_dlp/extractor
parent	25831c5572c6e1d45bc05a122312516e0d264f8d (diff)
parent	ddd24c99493483bde822944e8063064f53464ac1 (diff)
download	hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.lz hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.tar.xz hypervideo-pre-495746b9a6d4d32ddfa39ed908092d90a7cd5f3f.zip