diff options
| author | Bricio <216170+Bricio@users.noreply.github.com> | 2022-02-18 13:02:14 -0300 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-02-18 08:02:14 -0800 | 
| commit | 5625e6073fc9bd0ca944eaa18b1038ca8eddabd7 (patch) | |
| tree | df76b982bb941b55ccdbaa911c5112007f919908 | |
| parent | 0ad92dfb180e4b29f4fec91413474588b8c2b005 (diff) | |
| download | hypervideo-pre-5625e6073fc9bd0ca944eaa18b1038ca8eddabd7.tar.lz hypervideo-pre-5625e6073fc9bd0ca944eaa18b1038ca8eddabd7.tar.xz hypervideo-pre-5625e6073fc9bd0ca944eaa18b1038ca8eddabd7.zip | |
[Biqle] Fix extractor (#2731)
Closes #193
Authored by: Bricio
| -rw-r--r-- | yt_dlp/extractor/biqle.py | 93 | 
1 files changed, 51 insertions, 42 deletions
| diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 17ebbb257..2b57bade3 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -3,27 +3,28 @@ from __future__ import unicode_literals  from .common import InfoExtractor  from .vk import VKIE -from ..compat import ( -    compat_b64decode, -    compat_urllib_parse_unquote, +from ..compat import compat_b64decode +from ..utils import ( +    int_or_none, +    js_to_json, +    traverse_obj, +    unified_timestamp,  ) -from ..utils import int_or_none  class BIQLEIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'      _TESTS = [{ -        # Youtube embed -        'url': 'https://biqle.ru/watch/-115995369_456239081', -        'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06', +        'url': 'https://biqle.ru/watch/-2000421746_85421746', +        'md5': 'ae6ef4f04d19ac84e4658046d02c151c',          'info_dict': { -            'id': '8v4f-avW-VI', +            'id': '-2000421746_85421746',              'ext': 'mp4', -            'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", -            'description': 'Passe-Partout', -            'uploader_id': 'mrsimpsonstef3', -            'uploader': 'Phanolito', -            'upload_date': '20120822', +            'title': 'Forsaken By Hope Studio Clip', +            'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн', +            'upload_date': '19700101', +            'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb', +            'timestamp': 0,          },      }, {          'url': 'http://biqle.org/watch/-44781847_168547604', @@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor):              'id': '-44781847_168547604',              'ext': 'mp4',              'title': 'Ребенок в шоке от автоматической мойки', +            'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',              'timestamp': 1396633454, -            'uploader': 'Dmitry Kotov',              'upload_date': '20140404', -            'uploader_id': '47850140', +            'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',          },      }]      def _real_extract(self, url):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        embed_url = self._proto_relative_url(self._search_regex( -            r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', -            webpage, 'embed url')) + +        title = self._html_search_meta('name', webpage, 'Title', fatal=False) +        timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) +        description = self._html_search_meta('description', webpage, 'Description', default=None) + +        global_embed_url = self._search_regex( +            r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'', +            webpage, 'global Embed url') +        hash = self._search_regex( +            r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash') + +        embed_url = global_embed_url + hash +          if VKIE.suitable(embed_url):              return self.url_result(embed_url, VKIE.ie_key(), video_id)          embed_page = self._download_webpage( -            embed_url, video_id, headers={'Referer': url}) -        video_ext = self._get_cookies(embed_url).get('video_ext') -        if video_ext: -            video_ext = compat_urllib_parse_unquote(video_ext.value) -        if not video_ext: -            video_ext = compat_b64decode(self._search_regex( -                r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', -                embed_page, 'video_ext')).decode() -        video_id, sig, _, access_token = video_ext.split(':') +            embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url}) + +        glob_params = self._parse_json(self._search_regex( +            r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>', +            embed_page, 'Global Parameters'), video_id, transform_source=js_to_json) +        host_name = compat_b64decode(glob_params['server'][::-1]).decode() +          item = self._download_json( -            'https://api.vk.com/method/video.get', video_id, -            headers={'User-Agent': 'okhttp/3.4.1'}, query={ -                'access_token': access_token, -                'sig': sig, -                'v': 5.44, +            f'https://{host_name}/method/video.get/{video_id}', video_id, +            headers={'Referer': url}, query={ +                'token': glob_params['video']['access_token'],                  'videos': video_id, +                'ckey': glob_params['c_key'], +                'credentials': glob_params['video']['credentials'],              })['response']['items'][0] -        title = item['title']          formats = []          for f_id, f_url in item.get('files', {}).items():              if f_id == 'external':                  return self.url_result(f_url)              ext, height = f_id.split('_') -            formats.append({ -                'format_id': height + 'p', -                'url': f_url, -                'height': int_or_none(height), -                'ext': ext, -            }) +            height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height)) +            if height_extra_key: +                formats.append({ +                    'format_id': f'{height}p', +                    'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', +                    'height': int_or_none(height), +                    'ext': ext, +                })          self._sort_formats(formats)          thumbnails = [] @@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor):              'title': title,              'formats': formats,              'comment_count': int_or_none(item.get('comments')), -            'description': item.get('description'), +            'description': description,              'duration': int_or_none(item.get('duration')),              'thumbnails': thumbnails, -            'timestamp': int_or_none(item.get('date')), -            'uploader': item.get('owner_id'), +            'timestamp': timestamp,              'view_count': int_or_none(item.get('views')),          } | 
