diff options
Diffstat (limited to 'yt_dlp')
-rw-r--r-- | yt_dlp/YoutubeDL.py | 3 | ||||
-rw-r--r-- | yt_dlp/__init__.py | 18 | ||||
-rw-r--r-- | yt_dlp/downloader/hls.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/iqiyi.py | 549 | ||||
-rw-r--r-- | yt_dlp/extractor/ivi.py | 2 | ||||
-rw-r--r-- | yt_dlp/options.py | 4 | ||||
-rw-r--r-- | yt_dlp/swfinterp.py.disabled | 834 | ||||
-rw-r--r-- | yt_dlp/update.py | 288 | ||||
-rw-r--r-- | yt_dlp/utils.py | 357 |
9 files changed, 78 insertions, 1981 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 9892ed328..4438e40e9 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -157,8 +157,7 @@ from .postprocessor import ( MoveFilesAfterDownloadPP, _PLUGIN_CLASSES as plugin_postprocessors ) -from .update import detect_variant -from .version import __version__, RELEASE_GIT_HEAD +from .version import __version__ if compat_os_name == 'nt': import ctypes diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index b93f47ecc..7469b0f61 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -1,9 +1,7 @@ -#!/usr/bin/env python3 +#!/usr/bin/python # coding: utf-8 -f'You are using an unsupported version of Python. Only Python versions 3.6 and above are supported by yt-dlp' # noqa: F541 - -__license__ = 'Public Domain' +__license__ = 'CC0-1.0' import codecs import io @@ -43,7 +41,6 @@ from .utils import ( std_headers, write_string, ) -from .update import run_update from .downloader import ( FileDownloader, ) @@ -822,19 +819,8 @@ def _real_main(argv=None): if opts.rm_cachedir: ydl.cache.remove() - # Update version - if opts.update_self: - # If updater returns True, exit. Required for windows - if run_update(ydl): - if actual_use: - sys.exit('ERROR: The program must exit for the update to complete') - sys.exit() - # Maybe do nothing if not actual_use: - if opts.update_self or opts.rm_cachedir: - sys.exit() - ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv) parser.error( 'You must provide at least one URL.\n' diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index e932fd6ae..f3f32b514 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -72,9 +72,9 @@ class HlsFD(FragmentFD): can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s: if FFmpegFD.available(): - can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' + can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available' else: - message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' + message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: has_drm = re.search('|'.join([ diff --git a/yt_dlp/extractor/iqiyi.py b/yt_dlp/extractor/iqiyi.py index 74e20a54a..b13b9f4cf 100644 --- a/yt_dlp/extractor/iqiyi.py +++ b/yt_dlp/extractor/iqiyi.py @@ -9,30 +9,14 @@ import time from .common import InfoExtractor from ..compat import ( compat_str, - compat_urllib_parse_urlencode, - compat_urllib_parse_unquote ) -from .openload import PhantomJSwrapper from ..utils import ( clean_html, - decode_packed_codes, - ExtractorError, - float_or_none, - format_field, get_element_by_id, get_element_by_attribute, - int_or_none, - js_to_json, + ExtractorError, ohdave_rsa_encrypt, - parse_age_limit, - parse_duration, - parse_iso8601, - parse_resolution, - qualities, remove_start, - str_or_none, - traverse_obj, - urljoin, ) @@ -40,135 +24,6 @@ def md5_text(text): return hashlib.md5(text.encode('utf-8')).hexdigest() -class IqiyiSDK(object): - def __init__(self, target, ip, timestamp): - self.target = target - self.ip = ip - self.timestamp = timestamp - - @staticmethod - def split_sum(data): - return compat_str(sum(map(lambda p: int(p, 16), list(data)))) - - @staticmethod - def digit_sum(num): - if isinstance(num, int): - num = compat_str(num) - return compat_str(sum(map(int, num))) - - def even_odd(self): - even = self.digit_sum(compat_str(self.timestamp)[::2]) - odd = self.digit_sum(compat_str(self.timestamp)[1::2]) - return even, odd - - def preprocess(self, chunksize): - self.target = md5_text(self.target) - chunks = [] - for i in range(32 // chunksize): - chunks.append(self.target[chunksize * i:chunksize * (i + 1)]) - if 32 % chunksize: - chunks.append(self.target[32 - 32 % chunksize:]) - return chunks, list(map(int, self.ip.split('.'))) - - def mod(self, modulus): - chunks, ip = self.preprocess(32) - self.target = chunks[0] + ''.join(map(lambda p: compat_str(p % modulus), ip)) - - def split(self, chunksize): - modulus_map = { - 4: 256, - 5: 10, - 8: 100, - } - - chunks, ip = self.preprocess(chunksize) - ret = '' - for i in range(len(chunks)): - ip_part = compat_str(ip[i] % modulus_map[chunksize]) if i < 4 else '' - if chunksize == 8: - ret += ip_part + chunks[i] - else: - ret += chunks[i] + ip_part - self.target = ret - - def handle_input16(self): - self.target = md5_text(self.target) - self.target = self.split_sum(self.target[:16]) + self.target + self.split_sum(self.target[16:]) - - def handle_input8(self): - self.target = md5_text(self.target) - ret = '' - for i in range(4): - part = self.target[8 * i:8 * (i + 1)] - ret += self.split_sum(part) + part - self.target = ret - - def handleSum(self): - self.target = md5_text(self.target) - self.target = self.split_sum(self.target) + self.target - - def date(self, scheme): - self.target = md5_text(self.target) - d = time.localtime(self.timestamp) - strings = { - 'y': compat_str(d.tm_year), - 'm': '%02d' % d.tm_mon, - 'd': '%02d' % d.tm_mday, - } - self.target += ''.join(map(lambda c: strings[c], list(scheme))) - - def split_time_even_odd(self): - even, odd = self.even_odd() - self.target = odd + md5_text(self.target) + even - - def split_time_odd_even(self): - even, odd = self.even_odd() - self.target = even + md5_text(self.target) + odd - - def split_ip_time_sum(self): - chunks, ip = self.preprocess(32) - self.target = compat_str(sum(ip)) + chunks[0] + self.digit_sum(self.timestamp) - - def split_time_ip_sum(self): - chunks, ip = self.preprocess(32) - self.target = self.digit_sum(self.timestamp) + chunks[0] + compat_str(sum(ip)) - - -class IqiyiSDKInterpreter(object): - def __init__(self, sdk_code): - self.sdk_code = sdk_code - - def run(self, target, ip, timestamp): - self.sdk_code = decode_packed_codes(self.sdk_code) - - functions = re.findall(r'input=([a-zA-Z0-9]+)\(input', self.sdk_code) - - sdk = IqiyiSDK(target, ip, timestamp) - - other_functions = { - 'handleSum': sdk.handleSum, - 'handleInput8': sdk.handle_input8, - 'handleInput16': sdk.handle_input16, - 'splitTimeEvenOdd': sdk.split_time_even_odd, - 'splitTimeOddEven': sdk.split_time_odd_even, - 'splitIpTimeSum': sdk.split_ip_time_sum, - 'splitTimeIpSum': sdk.split_time_ip_sum, - } - for function in functions: - if re.match(r'mod\d+', function): - sdk.mod(int(function[3:])) - elif re.match(r'date[ymd]{3}', function): - sdk.date(function[4:]) - elif re.match(r'split\d+', function): - sdk.split(int(function[5:])) - elif function in other_functions: - other_functions[function]() - else: - raise ExtractorError('Unknown function %s' % function) - - return sdk.target - - class IqiyiIE(InfoExtractor): IE_NAME = 'iqiyi' IE_DESC = '爱奇艺' @@ -253,51 +108,7 @@ class IqiyiIE(InfoExtractor): return ohdave_rsa_encrypt(data, e, N) def _login(self): - username, password = self._get_login_info() - - # No authentication to be performed - if not username: - return True - - data = self._download_json( - 'http://kylin.iqiyi.com/get_token', None, - note='Get token for logging', errnote='Unable to get token for logging') - sdk = data['sdk'] - timestamp = int(time.time()) - target = '/apis/reglogin/login.action?lang=zh_TW&area_code=null&email=%s&passwd=%s&agenttype=1&from=undefined&keeplogin=0&piccode=&fromurl=&_pos=1' % ( - username, self._rsa_fun(password.encode('utf-8'))) - - interp = IqiyiSDKInterpreter(sdk) - sign = interp.run(target, data['ip'], timestamp) - - validation_params = { - 'target': target, - 'server': 'BEA3AA1908656AABCCFF76582C4C6660', - 'token': data['token'], - 'bird_src': 'f8d91d57af224da7893dd397d52d811a', - 'sign': sign, - 'bird_t': timestamp, - } - validation_result = self._download_json( - 'http://kylin.iqiyi.com/validate?' + compat_urllib_parse_urlencode(validation_params), None, - note='Validate credentials', errnote='Unable to validate credentials') - - MSG_MAP = { - 'P00107': 'please login via the web interface and enter the CAPTCHA code', - 'P00117': 'bad username or password', - } - - code = validation_result['code'] - if code != 'A00000': - msg = MSG_MAP.get(code) - if not msg: - msg = 'error %s' % code - if validation_result.get('msg'): - msg += ': ' + validation_result['msg'] - self.report_warning('unable to log in: ' + msg) - return False - - return True + raise ExtractorError("iQiyi's non-free authentication algorithm has made login impossible", expected=True) def get_raw_data(self, tvid, video_id): tm = int(time.time() * 1000) @@ -406,359 +217,3 @@ class IqiyiIE(InfoExtractor): 'title': title, 'formats': formats, } - - -class IqIE(InfoExtractor): - IE_NAME = 'iq.com' - IE_DESC = 'International version of iQiyi' - _VALID_URL = r'https?://(?:www\.)?iq\.com/play/(?:[\w%-]*-)?(?P<id>\w+)' - _TESTS = [{ - 'url': 'https://www.iq.com/play/one-piece-episode-1000-1ma1i6ferf4', - 'md5': '2d7caf6eeca8a32b407094b33b757d39', - 'info_dict': { - 'ext': 'mp4', - 'id': '1ma1i6ferf4', - 'title': '航海王 第1000集', - 'description': 'Subtitle available on Sunday 4PM(GMT+8).', - 'duration': 1430, - 'timestamp': 1637488203, - 'upload_date': '20211121', - 'episode_number': 1000, - 'episode': 'Episode 1000', - 'series': 'One Piece', - 'age_limit': 13, - 'average_rating': float, - }, - 'params': { - 'format': '500', - }, - 'expected_warnings': ['format is restricted'] - }, { - # VIP-restricted video - 'url': 'https://www.iq.com/play/mermaid-in-the-fog-2021-gbdpx13bs4', - 'only_matching': True - }] - _BID_TAGS = { - '100': '240P', - '200': '360P', - '300': '480P', - '500': '720P', - '600': '1080P', - '610': '1080P50', - '700': '2K', - '800': '4K', - } - _LID_TAGS = { - '1': 'zh_CN', - '2': 'zh_TW', - '3': 'en', - '18': 'th', - '21': 'my', - '23': 'vi', - '24': 'id', - '26': 'es', - '28': 'ar', - } - - _DASH_JS = ''' - console.log(page.evaluate(function() { - var tvid = "%(tvid)s"; var vid = "%(vid)s"; var src = "%(src)s"; - var uid = "%(uid)s"; var dfp = "%(dfp)s"; var mode = "%(mode)s"; var lang = "%(lang)s"; - var bid_list = %(bid_list)s; var ut_list = %(ut_list)s; var tm = new Date().getTime(); - var cmd5x_func = %(cmd5x_func)s; var cmd5x_exporter = {}; cmd5x_func({}, cmd5x_exporter, {}); var cmd5x = cmd5x_exporter.cmd5x; - var authKey = cmd5x(cmd5x('') + tm + '' + tvid); - var k_uid = Array.apply(null, Array(32)).map(function() {return Math.floor(Math.random() * 15).toString(16)}).join(''); - var dash_paths = {}; - bid_list.forEach(function(bid) { - var query = { - 'tvid': tvid, - 'bid': bid, - 'ds': 1, - 'vid': vid, - 'src': src, - 'vt': 0, - 'rs': 1, - 'uid': uid, - 'ori': 'pcw', - 'ps': 1, - 'k_uid': k_uid, - 'pt': 0, - 'd': 0, - 's': '', - 'lid': '', - 'slid': 0, - 'cf': '', - 'ct': '', - 'authKey': authKey, - 'k_tag': 1, - 'ost': 0, - 'ppt': 0, - 'dfp': dfp, - 'prio': JSON.stringify({ - 'ff': 'f4v', - 'code': 2 - }), - 'k_err_retries': 0, - 'up': '', - 'su': 2, - 'applang': lang, - 'sver': 2, - 'X-USER-MODE': mode, - 'qd_v': 2, - 'tm': tm, - 'qdy': 'a', - 'qds': 0, - 'k_ft1': 141287244169348, - 'k_ft4': 34359746564, - 'k_ft5': 1, - 'bop': JSON.stringify({ - 'version': '10.0', - 'dfp': dfp - }), - }; - var enc_params = []; - for (var prop in query) { - enc_params.push(encodeURIComponent(prop) + '=' + encodeURIComponent(query[prop])); - } - ut_list.forEach(function(ut) { - enc_params.push('ut=' + ut); - }) - var dash_path = '/dash?' + enc_params.join('&'); dash_path += '&vf=' + cmd5x(dash_path); - dash_paths[bid] = dash_path; - }); - return JSON.stringify(dash_paths); - })); - saveAndExit(); - ''' - - def _extract_vms_player_js(self, webpage, video_id): - player_js_cache = self._downloader.cache.load('iq', 'player_js') - if player_js_cache: - return player_js_cache - webpack_js_url = self._proto_relative_url(self._search_regex( - r'<script src="((?:https?)?//stc.iqiyipic.com/_next/static/chunks/webpack-\w+\.js)"', webpage, 'webpack URL')) - webpack_js = self._download_webpage(webpack_js_url, video_id, note='Downloading webpack JS', errnote='Unable to download webpack JS') - webpack_map1, webpack_map2 = [self._parse_json(js_map, video_id, transform_source=js_to_json) for js_map in self._search_regex( - r'\(({[^}]*})\[\w+\][^\)]*\)\s*\+\s*["\']\.["\']\s*\+\s*({[^}]*})\[\w+\]\+["\']\.js', webpack_js, 'JS locations', group=(1, 2))] - for module_index in reversed(list(webpack_map2.keys())): - module_js = self._download_webpage( - f'https://stc.iqiyipic.com/_next/static/chunks/{webpack_map1.get(module_index, module_index)}.{webpack_map2[module_index]}.js', - video_id, note=f'Downloading #{module_index} module JS', errnote='Unable to download module JS', fatal=False) or '' - if 'vms request' in module_js: - self._downloader.cache.store('iq', 'player_js', module_js) - return module_js - raise ExtractorError('Unable to extract player JS') - - def _extract_cmd5x_function(self, webpage, video_id): - return self._search_regex(r',\s*(function\s*\([^\)]*\)\s*{\s*var _qda.+_qdc\(\)\s*})\s*,', - self._extract_vms_player_js(webpage, video_id), 'signature function') - - def _update_bid_tags(self, webpage, video_id): - extracted_bid_tags = self._parse_json( - self._search_regex( - r'arguments\[1\][^,]*,\s*function\s*\([^\)]*\)\s*{\s*"use strict";?\s*var \w=({.+}})\s*,\s*\w\s*=\s*{\s*getNewVd', - self._extract_vms_player_js(webpage, video_id), 'video tags', default=''), - video_id, transform_source=js_to_json, fatal=False) - if not extracted_bid_tags: - return - self._BID_TAGS = { - bid: traverse_obj(extracted_bid_tags, (bid, 'value'), expected_type=str, default=self._BID_TAGS.get(bid)) - for bid in extracted_bid_tags.keys() - } - - def _get_cookie(self, name, default=None): - cookie = self._get_cookies('https://iq.com/').get(name) - return cookie.value if cookie else default - - def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - self._update_bid_tags(webpage, video_id) - - next_props = self._search_nextjs_data(webpage, video_id)['props'] - page_data = next_props['initialState']['play'] - video_info = page_data['curVideoInfo'] - - uid = traverse_obj( - self._parse_json( - self._get_cookie('I00002', '{}'), video_id, transform_source=compat_urllib_parse_unquote, fatal=False), - ('data', 'uid'), default=0) - - if uid: - vip_data = self._download_json( - 'https://pcw-api.iq.com/api/vtype', video_id, note='Downloading VIP data', errnote='Unable to download VIP data', query={ - 'batch': 1, - 'platformId': 3, - 'modeCode': self._get_cookie('mod', 'intl'), - 'langCode': self._get_cookie('lang', 'en_us'), - 'deviceId': self._get_cookie('QC005', '') - }, fatal=False) - ut_list = traverse_obj(vip_data, ('data', 'all_vip', ..., 'vipType'), expected_type=str_or_none, default=[]) - else: - ut_list = ['0'] - - # bid 0 as an initial format checker - dash_paths = self._parse_json(PhantomJSwrapper(self).get( - url, html='<!DOCTYPE html>', video_id=video_id, note2='Executing signature code', jscode=self._DASH_JS % { - 'tvid': video_info['tvId'], - 'vid': video_info['vid'], - 'src': traverse_obj(next_props, ('initialProps', 'pageProps', 'ptid'), - expected_type=str, default='04022001010011000000'), - 'uid': uid, - 'dfp': self._get_cookie('dfp', ''), - 'mode': self._get_cookie('mod', 'intl'), - 'lang': self._get_cookie('lang', 'en_us'), - 'bid_list': '[' + ','.join(['0', *self._BID_TAGS.keys()]) + ']', - 'ut_list': '[' + ','.join(ut_list) + ']', - 'cmd5x_func': self._extract_cmd5x_function(webpage, video_id), - })[1].strip(), video_id) - - formats, subtitles = [], {} - initial_format_data = self._download_json( - urljoin('https://cache-video.iq.com', dash_paths['0']), video_id, - note='Downloading initial video format info', errnote='Unable to download initial video format info')['data'] - - preview_time = traverse_obj( - initial_format_data, ('boss_ts', (None, 'data'), ('previewTime', 'rtime')), expected_type=float_or_none, get_all=False) - if traverse_obj(initial_format_data, ('boss_ts', 'data', 'prv'), expected_type=int_or_none): - self.report_warning('This preview video is limited%s' % format_field(preview_time, template='to %s seconds')) - - # TODO: Extract audio-only formats - for bid in set(traverse_obj(initial_format_data, ('program', 'video', ..., 'bid'), expected_type=str_or_none, default=[])): - dash_path = dash_paths.get(bid) - if not dash_path: - self.report_warning(f'Unknown format id: {bid}. It is currently not being extracted') - continue - format_data = traverse_obj(self._download_json( - urljoin('https://cache-video.iq.com', dash_path), video_id, - note=f'Downloading format data for {self._BID_TAGS[bid]}', errnote='Unable to download format data', - fatal=False), 'data', expected_type=dict) - - video_format = next((video_format for video_format in traverse_obj( - format_data, ('program', 'video', ...), expected_type=dict, default=[]) if str(video_format['bid']) == bid), {}) - extracted_formats = [] - if video_format.get('m3u8Url'): - extracted_formats.extend(self._extract_m3u8_formats( - urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['m3u8Url']), - 'mp4', m3u8_id=bid, fatal=False)) - if video_format.get('mpdUrl'): - # TODO: Properly extract mpd hostname - extracted_formats.extend(self._extract_mpd_formats( - urljoin(format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/'), video_format['mpdUrl']), - mpd_id=bid, fatal=False)) - if video_format.get('m3u8'): - ff = video_format.get('ff', 'ts') - if ff == 'ts': - m3u8_formats, _ = self._parse_m3u8_formats_and_subtitles( - video_format['m3u8'], ext='mp4', m3u8_id=bid, fatal=False) - extracted_formats.extend(m3u8_formats) - elif ff == 'm4s': - mpd_data = traverse_obj( - self._parse_json(video_format['m3u8'], video_id, fatal=False), ('payload', ..., 'data'), expected_type=str) - if not mpd_data: - continue - mpd_formats, _ = self._parse_mpd_formats_and_subtitles( - mpd_data, bid, format_data.get('dm3u8', 'https://cache-m.iq.com/dc/dt/')) - extracted_formats.extend(mpd_formats) - else: - self.report_warning(f'{ff} formats are currently not supported') - - if not extracted_formats: - if video_format.get('s'): - self.report_warning(f'{self._BID_TAGS[bid]} format is restricted') - else: - self.report_warning(f'Unable to extract {self._BID_TAGS[bid]} format') - for f in extracted_formats: - f.update({ - 'quality': qualities(list(self._BID_TAGS.keys()))(bid), - 'format_note': self._BID_TAGS[bid], - **parse_resolution(video_format.get('scrsz')) - }) - formats.extend(extracted_formats) - - self._sort_formats(formats) - - for sub_format in traverse_obj(initial_format_data, ('program', 'stl', ...), expected_type=dict, default=[]): - lang = self._LID_TAGS.get(str_or_none(sub_format.get('lid')), sub_format.get('_name')) - subtitles.setdefault(lang, []).extend([{ - 'ext': format_ext, - 'url': urljoin(initial_format_data.get('dstl', 'http://meta.video.iqiyi.com'), sub_format[format_key]) - } for format_key, format_ext in [('srt', 'srt'), ('webvtt', 'vtt')] if sub_format.get(format_key)]) - - extra_metadata = page_data.get('albumInfo') if video_info.get('albumId') and page_data.get('albumInfo') else video_info - return { - 'id': video_id, - 'title': video_info['name'], - 'formats': formats, - 'subtitles': subtitles, - 'description': video_info.get('mergeDesc'), - 'duration': parse_duration(video_info.get('len')), - 'age_limit': parse_age_limit(video_info.get('rating')), - 'average_rating': traverse_obj(page_data, ('playScoreInfo', 'score'), expected_type=float_or_none), - 'timestamp': parse_iso8601(video_info.get('isoUploadDate')), - 'categories': traverse_obj(extra_metadata, ('videoTagMap', ..., ..., 'name'), expected_type=str), - 'cast': traverse_obj(extra_metadata, ('actorArr', ..., 'name'), expected_type=str), - 'episode_number': int_or_none(video_info.get('order')) or None, - 'series': video_info.get('albumName'), - } - - -class IqAlbumIE(InfoExtractor): - IE_NAME = 'iq.com:album' - _VALID_URL = r'https?://(?:www\.)?iq\.com/album/(?:[\w%-]*-)?(?P<id>\w+)' - _TESTS = [{ - 'url': 'https://www.iq.com/album/one-piece-1999-1bk9icvr331', - 'info_dict': { - 'id': '1bk9icvr331', - 'title': 'One Piece', - 'description': 'Subtitle available on Sunday 4PM(GMT+8).' - }, - 'playlist_mincount': 238 - }, { - # Movie/single video - 'url': 'https://www.iq.com/album/九龙城寨-2021-22yjnij099k', - 'info_dict': { - 'ext': 'mp4', - 'id': '22yjnij099k', - 'title': '九龙城寨', - 'description': 'md5:8a09f50b8ba0db4dc69bc7c844228044', - 'duration': 5000, - 'timestamp': 1641911371, - 'upload_date': '20220111', - 'series': '九龙城寨', - 'cast': ['Shi Yan Neng', 'Yu Lang', 'Peter lv', 'Sun Zi Jun', 'Yang Xiao Bo'], - 'age_limit': 13, - 'average_rating': float, - }, - 'expected_warnings': ['format is restricted'] - }] - - def _entries(self, album_id_num, page_ranges, album_id=None, mode_code='intl', lang_code='en_us'): - for page_range in page_ranges: - page = self._download_json( - f'https://pcw-api.iq.com/api/episodeListSource/{album_id_num}', album_id, - note=f'Downloading video list episodes {page_range.get("msg", "")}', - errnote='Unable to download video list', query={ - 'platformId': 3, - 'modeCode': mode_code, - 'langCode': lang_code, - 'endOrder': page_range['to'], - 'startOrder': page_range['from'] - }) - for video in page['data']['epg']: - yield self.url_result('https://www.iq.com/play/%s' % (video.get('playLocSuffix') or video['qipuIdStr']), - IqIE.ie_key(), video.get('qipuIdStr'), video.get('name')) - - def _real_extract(self, url): - album_id = self._match_id(url) - webpage = self._download_webpage(url, album_id) - next_data = self._search_nextjs_data(webpage, album_id) - album_data = next_data['props']['initialState']['album']['videoAlbumInfo'] - - if album_data.get('videoType') == 'singleVideo': - return self.url_result('https://www.iq.com/play/%s' % album_id, IqIE.ie_key()) - return self.playlist_result( - self._entries(album_data['albumId'], album_data['totalPageRange'], album_id, - traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'modeCode')), - traverse_obj(next_data, ('props', 'initialProps', 'pageProps', 'langCode'))), - album_id, album_data.get('name'), album_data.get('desc')) diff --git a/yt_dlp/extractor/ivi.py b/yt_dlp/extractor/ivi.py index 5f8a046e0..098ab6665 100644 --- a/yt_dlp/extractor/ivi.py +++ b/yt_dlp/extractor/ivi.py @@ -141,7 +141,7 @@ class IviIE(InfoExtractor): elif site == 353: continue elif not pycryptodome_found: - raise ExtractorError('pycryptodomex not found. Please install', expected=True) + raise ExtractorError('pycryptodome not found. Please install', expected=True) elif message: extractor_msg += ': ' + message raise ExtractorError(extractor_msg % video_id, expected=True) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 2ba7d2601..eb21a25ac 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -240,10 +240,6 @@ def create_parser(): action='version', help='Print program version and exit') general.add_option( - '-U', '--update', - action='store_true', dest='update_self', - help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)') - general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails') diff --git a/yt_dlp/swfinterp.py.disabled b/yt_dlp/swfinterp.py.disabled deleted file mode 100644 index 0c7158575..000000000 --- a/yt_dlp/swfinterp.py.disabled +++ /dev/null @@ -1,834 +0,0 @@ -from __future__ import unicode_literals - -import collections -import io -import zlib - -from .compat import ( - compat_str, - compat_struct_unpack, -) -from .utils import ( - ExtractorError, -) - - -def _extract_tags(file_contents): - if file_contents[1:3] != b'WS': - raise ExtractorError( - 'Not an SWF file; header is %r' % file_contents[:3]) - if file_contents[:1] == b'C': - content = zlib.decompress(file_contents[8:]) - else: - raise NotImplementedError( - 'Unsupported compression format %r' % - file_contents[:1]) - - # Determine number of bits in framesize rectangle - framesize_nbits = compat_struct_unpack('!B', content[:1])[0] >> 3 - framesize_len = (5 + 4 * framesize_nbits + 7) // 8 - - pos = framesize_len + 2 + 2 - while pos < len(content): - header16 = compat_struct_unpack('<H', content[pos:pos + 2])[0] - pos += 2 - tag_code = header16 >> 6 - tag_len = header16 & 0x3f - if tag_len == 0x3f: - tag_len = compat_struct_unpack('<I', content[pos:pos + 4])[0] - pos += 4 - assert pos + tag_len <= len(content), \ - ('Tag %d ends at %d+%d - that\'s longer than the file (%d)' - % (tag_code, pos, tag_len, len(content))) - yield (tag_code, content[pos:pos + tag_len]) - pos += tag_len - - -class _AVMClass_Object(object): - def __init__(self, avm_class): - self.avm_class = avm_class - - def __repr__(self): - return '%s#%x' % (self.avm_class.name, id(self)) - - -class _ScopeDict(dict): - def __init__(self, avm_class): - super(_ScopeDict, self).__init__() - self.avm_class = avm_class - - def __repr__(self): - return '%s__Scope(%s)' % ( - self.avm_class.name, - super(_ScopeDict, self).__repr__()) - - -class _AVMClass(object): - def __init__(self, name_idx, name, static_properties=None): - self.name_idx = name_idx - self.name = name - self.method_names = {} - self.method_idxs = {} - self.methods = {} - self.method_pyfunctions = {} - self.static_properties = static_properties if static_properties else {} - - self.variables = _ScopeDict(self) - self.constants = {} - - def make_object(self): - return _AVMClass_Object(self) - - def __repr__(self): - return '_AVMClass(%s)' % (self.name) - - def register_methods(self, methods): - self.method_names.update(methods.items()) - self.method_idxs.update(dict( - (idx, name) - for name, idx in methods.items())) - - -class _Multiname(object): - def __init__(self, kind): - self.kind = kind - - def __repr__(self): - return '[MULTINAME kind: 0x%x]' % self.kind - - -def _read_int(reader): - res = 0 - shift = 0 - for _ in range(5): - buf = reader.read(1) - assert len(buf) == 1 - b = compat_struct_unpack('<B', buf)[0] - res = res | ((b & 0x7f) << shift) - if b & 0x80 == 0: - break - shift += 7 - return res - - -def _u30(reader): - res = _read_int(reader) - assert res & 0xf0000000 == 0 - return res - - -_u32 = _read_int - - -def _s32(reader): - v = _read_int(reader) - if v & 0x80000000 != 0: - v = - ((v ^ 0xffffffff) + 1) - return v - - -def _s24(reader): - bs = reader.read(3) - assert len(bs) == 3 - last_byte = b'\xff' if (ord(bs[2:3]) >= 0x80) else b'\x00' - return compat_struct_unpack('<i', bs + last_byte)[0] - - -def _read_string(reader): - slen = _u30(reader) - resb = reader.read(slen) - assert len(resb) == slen - return resb.decode('utf-8') - - -def _read_bytes(count, reader): - assert count >= 0 - resb = reader.read(count) - assert len(resb) == count - return resb - - -def _read_byte(reader): - resb = _read_bytes(1, reader=reader) - res = compat_struct_unpack('<B', resb)[0] - return res - - -StringClass = _AVMClass('(no name idx)', 'String') -ByteArrayClass = _AVMClass('(no name idx)', 'ByteArray') -TimerClass = _AVMClass('(no name idx)', 'Timer') -TimerEventClass = _AVMClass('(no name idx)', 'TimerEvent', {'TIMER': 'timer'}) -_builtin_classes = { - StringClass.name: StringClass, - ByteArrayClass.name: ByteArrayClass, - TimerClass.name: TimerClass, - TimerEventClass.name: TimerEventClass, -} - - -class _Undefined(object): - def __bool__(self): - return False - __nonzero__ = __bool__ - - def __hash__(self): - return 0 - - def __str__(self): - return 'undefined' - __repr__ = __str__ - - -undefined = _Undefined() - - -class SWFInterpreter(object): - def __init__(self, file_contents): - self._patched_functions = { - (TimerClass, 'addEventListener'): lambda params: undefined, - } - code_tag = next(tag - for tag_code, tag in _extract_tags(file_contents) - if tag_code == 82) - p = code_tag.index(b'\0', 4) + 1 - code_reader = io.BytesIO(code_tag[p:]) - - # Parse ABC (AVM2 ByteCode) - - # Define a couple convenience methods - u30 = lambda *args: _u30(*args, reader=code_reader) - s32 = lambda *args: _s32(*args, reader=code_reader) - u32 = lambda *args: _u32(*args, reader=code_reader) - read_bytes = lambda *args: _read_bytes(*args, reader=code_reader) - read_byte = lambda *args: _read_byte(*args, reader=code_reader) - - # minor_version + major_version - read_bytes(2 + 2) - - # Constant pool - int_count = u30() - self.constant_ints = [0] - for _c in range(1, int_count): - self.constant_ints.append(s32()) - self.constant_uints = [0] - uint_count = u30() - for _c in range(1, uint_count): - self.constant_uints.append(u32()) - double_count = u30() - read_bytes(max(0, (double_count - 1)) * 8) - string_count = u30() - self.constant_strings = [''] - for _c in range(1, string_count): - s = _read_string(code_reader) - self.constant_strings.append(s) - namespace_count = u30() - for _c in range(1, namespace_count): - read_bytes(1) # kind - u30() # name - ns_set_count = u30() - for _c in range(1, ns_set_count): - count = u30() - for _c2 in range(count): - u30() - multiname_count = u30() - MULTINAME_SIZES = { - 0x07: 2, # QName - 0x0d: 2, # QNameA - 0x0f: 1, # RTQName - 0x10: 1, # RTQNameA - 0x11: 0, # RTQNameL - 0x12: 0, # RTQNameLA - 0x09: 2, # Multiname - 0x0e: 2, # MultinameA - 0x1b: 1, # MultinameL - 0x1c: 1, # MultinameLA - } - self.multinames = [''] - for _c in range(1, multiname_count): - kind = u30() - assert kind in MULTINAME_SIZES, 'Invalid multiname kind %r' % kind - if kind == 0x07: - u30() # namespace_idx - name_idx = u30() - self.multinames.append(self.constant_strings[name_idx]) - elif kind == 0x09: - name_idx = u30() - u30() - self.multinames.append(self.constant_strings[name_idx]) - else: - self.multinames.append(_Multiname(kind)) - for _c2 in range(MULTINAME_SIZES[kind]): - u30() - - # Methods - method_count = u30() - MethodInfo = collections.namedtuple( - 'MethodInfo', - ['NEED_ARGUMENTS', 'NEED_REST']) - method_infos = [] - for method_id in range(method_count): - param_count = u30() - u30() # return type - for _ in range(param_count): - u30() # param type - u30() # name index (always 0 for youtube) - flags = read_byte() - if flags & 0x08 != 0: - # Options present - option_count = u30() - for c in range(option_count): - u30() # val - read_bytes(1) # kind - if flags & 0x80 != 0: - # Param names present - for _ in range(param_count): - u30() # param name - mi = MethodInfo(flags & 0x01 != 0, flags & 0x04 != 0) - method_infos.append(mi) - - # Metadata - metadata_count = u30() - for _c in range(metadata_count): - u30() # name - item_count = u30() - for _c2 in range(item_count): - u30() # key - u30() # value - - def parse_traits_info(): - trait_name_idx = u30() - kind_full = read_byte() - kind = kind_full & 0x0f - attrs = kind_full >> 4 - methods = {} - constants = None - if kind == 0x00: # Slot - u30() # Slot id - u30() # type_name_idx - vindex = u30() - if vindex != 0: - read_byte() # vkind - elif kind == 0x06: # Const - u30() # Slot id - u30() # type_name_idx - vindex = u30() - vkind = 'any' - if vindex != 0: - vkind = read_byte() - if vkind == 0x03: # Constant_Int - value = self.constant_ints[vindex] - elif vkind == 0x04: # Constant_UInt - value = self.constant_uints[vindex] - else: - return {}, None # Ignore silently for now - constants = {self.multinames[trait_name_idx]: value} - elif kind in (0x01, 0x02, 0x03): # Method / Getter / Setter - u30() # disp_id - method_idx = u30() - methods[self.multinames[trait_name_idx]] = method_idx - elif kind == 0x04: # Class - u30() # slot_id - u30() # classi - elif kind == 0x05: # Function - u30() # slot_id - function_idx = u30() - methods[function_idx] = self.multinames[trait_name_idx] - else: - raise ExtractorError('Unsupported trait kind %d' % kind) - - if attrs & 0x4 != 0: # Metadata present - metadata_count = u30() - for _c3 in range(metadata_count): - u30() # metadata index - - return methods, constants - - # Classes - class_count = u30() - classes = [] - for class_id in range(class_count): - name_idx = u30() - - cname = self.multinames[name_idx] - avm_class = _AVMClass(name_idx, cname) - classes.append(avm_class) - - u30() # super_name idx - flags = read_byte() - if flags & 0x08 != 0: # Protected namespace is present - u30() # protected_ns_idx - intrf_count = u30() - for _c2 in range(intrf_count): - u30() - u30() # iinit - trait_count = u30() - for _c2 in range(trait_count): - trait_methods, trait_constants = parse_traits_info() - avm_class.register_methods(trait_methods) - if trait_constants: - avm_class.constants.update(trait_constants) - - assert len(classes) == class_count - self._classes_by_name = dict((c.name, c) for c in classes) - - for avm_class in classes: - avm_class.cinit_idx = u30() - trait_count = u30() - for _c2 in range(trait_count): - trait_methods, trait_constants = parse_traits_info() - avm_class.register_methods(trait_methods) - if trait_constants: - avm_class.constants.update(trait_constants) - - # Scripts - script_count = u30() - for _c in range(script_count): - u30() # init - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - - # Method bodies - method_body_count = u30() - Method = collections.namedtuple('Method', ['code', 'local_count']) - self._all_methods = [] - for _c in range(method_body_count): - method_idx = u30() - u30() # max_stack - local_count = u30() - u30() # init_scope_depth - u30() # max_scope_depth - code_length = u30() - code = read_bytes(code_length) - m = Method(code, local_count) - self._all_methods.append(m) - for avm_class in classes: - if method_idx in avm_class.method_idxs: - avm_class.methods[avm_class.method_idxs[method_idx]] = m - exception_count = u30() - for _c2 in range(exception_count): - u30() # from - u30() # to - u30() # target - u30() # exc_type - u30() # var_name - trait_count = u30() - for _c2 in range(trait_count): - parse_traits_info() - - assert p + code_reader.tell() == len(code_tag) - - def patch_function(self, avm_class, func_name, f): - self._patched_functions[(avm_class, func_name)] = f - - def extract_class(self, class_name, call_cinit=True): - try: - res = self._classes_by_name[class_name] - except KeyError: - raise ExtractorError('Class %r not found' % class_name) - - if call_cinit and hasattr(res, 'cinit_idx'): - res.register_methods({'$cinit': res.cinit_idx}) - res.methods['$cinit'] = self._all_methods[res.cinit_idx] - cinit = self.extract_function(res, '$cinit') - cinit([]) - - return res - - def extract_function(self, avm_class, func_name): - p = self._patched_functions.get((avm_class, func_name)) - if p: - return p - if func_name in avm_class.method_pyfunctions: - return avm_class.method_pyfunctions[func_name] - if func_name in self._classes_by_name: - return self._classes_by_name[func_name].make_object() - if func_name not in avm_class.methods: - raise ExtractorError('Cannot find function %s.%s' % ( - avm_class.name, func_name)) - m = avm_class.methods[func_name] - - def resfunc(args): - # Helper functions - coder = io.BytesIO(m.code) - s24 = lambda: _s24(coder) - u30 = lambda: _u30(coder) - - registers = [avm_class.variables] + list(args) + [None] * m.local_count - stack = [] - scopes = collections.deque([ - self._classes_by_name, avm_class.constants, avm_class.variables]) - while True: - opcode = _read_byte(coder) - if opcode == 9: # label - pass # Spec says: "Do nothing." - elif opcode == 16: # jump - offset = s24() - coder.seek(coder.tell() + offset) - elif opcode == 17: # iftrue - offset = s24() - value = stack.pop() - if value: - coder.seek(coder.tell() + offset) - elif opcode == 18: # iffalse - offset = s24() - value = stack.pop() - if not value: - coder.seek(coder.tell() + offset) - elif opcode == 19: # ifeq - offset = s24() - value2 = stack.pop() - value1 = stack.pop() - if value2 == value1: - coder.seek(coder.tell() + offset) - elif opcode == 20: # ifne - offset = s24() - value2 = stack.pop() - value1 = stack.pop() - if value2 != value1: - coder.seek(coder.tell() + offset) - elif opcode == 21: # iflt - offset = s24() - value2 = stack.pop() - value1 = stack.pop() - if value1 < value2: - coder.seek(coder.tell() + offset) - elif opcode == 32: # pushnull - stack.append(None) - elif opcode == 33: # pushundefined - stack.append(undefined) - elif opcode == 36: # pushbyte - v = _read_byte(coder) - stack.append(v) - elif opcode == 37: # pushshort - v = u30() - stack.append(v) - elif opcode == 38: # pushtrue - stack.append(True) - elif opcode == 39: # pushfalse - stack.append(False) - elif opcode == 40: # pushnan - stack.append(float('NaN')) - elif opcode == 42: # dup - value = stack[-1] - stack.append(value) - elif opcode == 44: # pushstring - idx = u30() - stack.append(self.constant_strings[idx]) - elif opcode == 48: # pushscope - new_scope = stack.pop() - scopes.append(new_scope) - elif opcode == 66: # construct - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - res = obj.avm_class.make_object() - stack.append(res) - elif opcode == 70: # callproperty - index = u30() - mname = self.multinames[index] - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - - if obj == StringClass: - if mname == 'String': - assert len(args) == 1 - assert isinstance(args[0], ( - int, compat_str, _Undefined)) - if args[0] == undefined: - res = 'undefined' - else: - res = compat_str(args[0]) - stack.append(res) - continue - else: - raise NotImplementedError( - 'Function String.%s is not yet implemented' - % mname) - elif isinstance(obj, _AVMClass_Object): - func = self.extract_function(obj.avm_class, mname) - res = func(args) - stack.append(res) - continue - elif isinstance(obj, _AVMClass): - func = self.extract_function(obj, mname) - res = func(args) - stack.append(res) - continue - elif isinstance(obj, _ScopeDict): - if mname in obj.avm_class.method_names: - func = self.extract_function(obj.avm_class, mname) - res = func(args) - else: - res = obj[mname] - stack.append(res) - continue - elif isinstance(obj, compat_str): - if mname == 'split': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - if args[0] == '': - res = list(obj) - else: - res = obj.split(args[0]) - stack.append(res) - continue - elif mname == 'charCodeAt': - assert len(args) <= 1 - idx = 0 if len(args) == 0 else args[0] - assert isinstance(idx, int) - res = ord(obj[idx]) - stack.append(res) - continue - elif isinstance(obj, list): - if mname == 'slice': - assert len(args) == 1 - assert isinstance(args[0], int) - res = obj[args[0]:] - stack.append(res) - continue - elif mname == 'join': - assert len(args) == 1 - assert isinstance(args[0], compat_str) - res = args[0].join(obj) - stack.append(res) - continue - raise NotImplementedError( - 'Unsupported property %r on %r' - % (mname, obj)) - elif opcode == 71: # returnvoid - res = undefined - return res - elif opcode == 72: # returnvalue - res = stack.pop() - return res - elif opcode == 73: # constructsuper - # Not yet implemented, just hope it works without it - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - elif opcode == 74: # constructproperty - index = u30() - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - - mname = self.multinames[index] - assert isinstance(obj, _AVMClass) - - # We do not actually call the constructor for now; - # we just pretend it does nothing - stack.append(obj.make_object()) - elif opcode == 79: # callpropvoid - index = u30() - mname = self.multinames[index] - arg_count = u30() - args = list(reversed( - [stack.pop() for _ in range(arg_count)])) - obj = stack.pop() - if isinstance(obj, _AVMClass_Object): - func = self.extract_function(obj.avm_class, mname) - res = func(args) - assert res is undefined - continue - if isinstance(obj, _ScopeDict): - assert mname in obj.avm_class.method_names - func = self.extract_function(obj.avm_class, mname) - res = func(args) - assert res is undefined - continue - if mname == 'reverse': - assert isinstance(obj, list) - obj.reverse() - else: - raise NotImplementedError( - 'Unsupported (void) property %r on %r' - % (mname, obj)) - elif opcode == 86: # newarray - arg_count = u30() - arr = [] - for i in range(arg_count): - arr.append(stack.pop()) - arr = arr[::-1] - stack.append(arr) - elif opcode == 93: # findpropstrict - index = u30() - mname = self.multinames[index] - for s in reversed(scopes): - if mname in s: - res = s - break - else: - res = scopes[0] - if mname not in res and mname in _builtin_classes: - stack.append(_builtin_classes[mname]) - else: - stack.append(res[mname]) - elif opcode == 94: # findproperty - index = u30() - mname = self.multinames[index] - for s in reversed(scopes): - if mname in s: - res = s - break - else: - res = avm_class.variables - stack.append(res) - elif opcode == 96: # getlex - index = u30() - mname = self.multinames[index] - for s in reversed(scopes): - if mname in s: - scope = s - break - else: - scope = avm_class.variables - - if mname in scope: - res = scope[mname] - elif mname in _builtin_classes: - res = _builtin_classes[mname] - else: - # Assume uninitialized - # TODO warn here - res = undefined - stack.append(res) - elif opcode == 97: # setproperty - index = u30() - value = stack.pop() - idx = self.multinames[index] - if isinstance(idx, _Multiname): - idx = stack.pop() - obj = stack.pop() - obj[idx] = value - elif opcode == 98: # getlocal - index = u30() - stack.append(registers[index]) - elif opcode == 99: # setlocal - index = u30() - value = stack.pop() - registers[index] = value - elif opcode == 102: # getproperty - index = u30() - pname = self.multinames[index] - if pname == 'length': - obj = stack.pop() - assert isinstance(obj, (compat_str, list)) - stack.append(len(obj)) - elif isinstance(pname, compat_str): # Member access - obj = stack.pop() - if isinstance(obj, _AVMClass): - res = obj.static_properties[pname] - stack.append(res) - continue - - assert isinstance(obj, (dict, _ScopeDict)),\ - 'Accessing member %r on %r' % (pname, obj) - res = obj.get(pname, undefined) - stack.append(res) - else: # Assume attribute access - idx = stack.pop() - assert isinstance(idx, int) - obj = stack.pop() - assert isinstance(obj, list) - stack.append(obj[idx]) - elif opcode == 104: # initproperty - index = u30() - value = stack.pop() - idx = self.multinames[index] - if isinstance(idx, _Multiname): - idx = stack.pop() - obj = stack.pop() - obj[idx] = value - elif opcode == 115: # convert_ - value = stack.pop() - intvalue = int(value) - stack.append(intvalue) - elif opcode == 128: # coerce - u30() - elif opcode == 130: # coerce_a - value = stack.pop() - # um, yes, it's any value - stack.append(value) - elif opcode == 133: # coerce_s - assert isinstance(stack[-1], (type(None), compat_str)) - elif opcode == 147: # decrement - value = stack.pop() - assert isinstance(value, int) - stack.append(value - 1) - elif opcode == 149: # typeof - value = stack.pop() - return { - _Undefined: 'undefined', - compat_str: 'String', - int: 'Number', - float: 'Number', - }[type(value)] - elif opcode == 160: # add - value2 = stack.pop() - value1 = stack.pop() - res = value1 + value2 - stack.append(res) - elif opcode == 161: # subtract - value2 = stack.pop() - value1 = stack.pop() - res = value1 - value2 - stack.append(res) - elif opcode == 162: # multiply - value2 = stack.pop() - value1 = stack.pop() - res = value1 * value2 - stack.append(res) - elif opcode == 164: # modulo - value2 = stack.pop() - value1 = stack.pop() - res = value1 % value2 - stack.append(res) - elif opcode == 168: # bitand - value2 = stack.pop() - value1 = stack.pop() - assert isinstance(value1, int) - assert isinstance(value2, int) - res = value1 & value2 - stack.append(res) - elif opcode == 171: # equals - value2 = stack.pop() - value1 = stack.pop() - result = value1 == value2 - stack.append(result) - elif opcode == 175: # greaterequals - value2 = stack.pop() - value1 = stack.pop() - result = value1 >= value2 - stack.append(result) - elif opcode == 192: # increment_i - value = stack.pop() - assert isinstance(value, int) - stack.append(value + 1) - elif opcode == 208: # getlocal_0 - stack.append(registers[0]) - elif opcode == 209: # getlocal_1 - stack.append(registers[1]) - elif opcode == 210: # getlocal_2 - stack.append(registers[2]) - elif opcode == 211: # getlocal_3 - stack.append(registers[3]) - elif opcode == 212: # setlocal_0 - registers[0] = stack.pop() - elif opcode == 213: # setlocal_1 - registers[1] = stack.pop() - elif opcode == 214: # setlocal_2 - registers[2] = stack.pop() - elif opcode == 215: # setlocal_3 - registers[3] = stack.pop() - else: - raise NotImplementedError( - 'Unsupported opcode %d' % opcode) - - avm_class.method_pyfunctions[func_name] = resfunc - return resfunc diff --git a/yt_dlp/update.py b/yt_dlp/update.py deleted file mode 100644 index a208e163c..000000000 --- a/yt_dlp/update.py +++ /dev/null @@ -1,288 +0,0 @@ -from __future__ import unicode_literals - -import hashlib -import json -import os -import platform -import subprocess -import sys -import traceback -from zipimport import zipimporter - -from .compat import compat_realpath -from .utils import encode_compat_str, Popen, write_string - -from .version import __version__ - - -''' # Not signed -def rsa_verify(message, signature, key): - from hashlib import sha256 - assert isinstance(message, bytes) - byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8 - signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode() - signature = (byte_size * 2 - len(signature)) * b'0' + signature - asn1 = b'3031300d060960864801650304020105000420' - asn1 += sha256(message).hexdigest().encode() - if byte_size < len(asn1) // 2 + 11: - return False - expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1 - return expected == signature -''' - - -def detect_variant(): - if hasattr(sys, 'frozen'): - prefix = 'mac' if sys.platform == 'darwin' else 'win' - if getattr(sys, '_MEIPASS', None): - if sys._MEIPASS == os.path.dirname(sys.executable): - return f'{prefix}_dir' - return f'{prefix}_exe' - return 'py2exe' - elif isinstance(globals().get('__loader__'), zipimporter): - return 'zip' - elif os.path.basename(sys.argv[0]) == '__main__.py': - return 'source' - return 'unknown' - - -_NON_UPDATEABLE_REASONS = { - 'win_exe': None, - 'zip': None, - 'mac_exe': None, - 'py2exe': None, - 'win_dir': 'Auto-update is not supported for unpackaged windows executable; Re-download the latest release', - 'mac_dir': 'Auto-update is not supported for unpackaged MacOS executable; Re-download the latest release', - 'source': 'You cannot update when running from source code; Use git to pull the latest changes', - 'unknown': 'It looks like you installed yt-dlp with a package manager, pip or setup.py; Use that to update', -} - - -def is_non_updateable(): - return _NON_UPDATEABLE_REASONS.get(detect_variant(), _NON_UPDATEABLE_REASONS['unknown']) - - -def run_update(ydl): - """ - Update the program file with the latest version from the repository - Returns whether the program should terminate - """ - - JSON_URL = 'https://api.github.com/repos/yt-dlp/yt-dlp/releases/latest' - - def report_error(msg, expected=False): - ydl.report_error(msg, tb='' if expected else None) - - def report_unable(action, expected=False): - report_error(f'Unable to {action}', expected) - - def report_permission_error(file): - report_unable(f'write to {file}; Try running as administrator', True) - - def report_network_error(action, delim=';'): - report_unable(f'{action}{delim} Visit https://github.com/yt-dlp/yt-dlp/releases/latest', True) - - def calc_sha256sum(path): - h = hashlib.sha256() - b = bytearray(128 * 1024) - mv = memoryview(b) - with open(os.path.realpath(path), 'rb', buffering=0) as f: - for n in iter(lambda: f.readinto(mv), 0): - h.update(mv[:n]) - return h.hexdigest() - - # Download and check versions info - try: - version_info = ydl._opener.open(JSON_URL).read().decode('utf-8') - version_info = json.loads(version_info) - except Exception: - return report_network_error('obtain version info', delim='; Please try again later or') - - def version_tuple(version_str): - return tuple(map(int, version_str.split('.'))) - - version_id = version_info['tag_name'] - ydl.to_screen(f'Latest version: {version_id}, Current version: {__version__}') - if version_tuple(__version__) >= version_tuple(version_id): - ydl.to_screen(f'yt-dlp is up to date ({__version__})') - return - - err = is_non_updateable() - if err: - return report_error(err, True) - - # sys.executable is set to the full pathname of the exe-file for py2exe - # though symlinks are not followed so that we need to do this manually - # with help of realpath - filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0]) - ydl.to_screen(f'Current Build Hash {calc_sha256sum(filename)}') - ydl.to_screen(f'Updating to version {version_id} ...') - - version_labels = { - 'zip_3': '', - 'win_exe_64': '.exe', - 'py2exe_64': '_min.exe', - 'win_exe_32': '_x86.exe', - 'mac_exe_64': '_macos', - } - - def get_bin_info(bin_or_exe, version): - label = version_labels['%s_%s' % (bin_or_exe, version)] - return next((i for i in version_info['assets'] if i['name'] == 'yt-dlp%s' % label), {}) - - def get_sha256sum(bin_or_exe, version): - filename = 'yt-dlp%s' % version_labels['%s_%s' % (bin_or_exe, version)] - urlh = next( - (i for i in version_info['assets'] if i['name'] in ('SHA2-256SUMS')), - {}).get('browser_download_url') - if not urlh: - return None - hash_data = ydl._opener.open(urlh).read().decode('utf-8') - return dict(ln.split()[::-1] for ln in hash_data.splitlines()).get(filename) - - if not os.access(filename, os.W_OK): - return report_permission_error(filename) - - # PyInstaller - variant = detect_variant() - if variant in ('win_exe', 'py2exe'): - directory = os.path.dirname(filename) - if not os.access(directory, os.W_OK): - return report_permission_error(directory) - try: - if os.path.exists(filename + '.old'): - os.remove(filename + '.old') - except (IOError, OSError): - return report_unable('remove the old version') - - try: - arch = platform.architecture()[0][:2] - url = get_bin_info(variant, arch).get('browser_download_url') - if not url: - return report_network_error('fetch updates') - urlh = ydl._opener.open(url) - newcontent = urlh.read() - urlh.close() - except (IOError, OSError): - return report_network_error('download latest version') - - try: - with open(filename + '.new', 'wb') as outf: - outf.write(newcontent) - except (IOError, OSError): - return report_permission_error(f'{filename}.new') - - expected_sum = get_sha256sum(variant, arch) - if not expected_sum: - ydl.report_warning('no hash information found for the release') - elif calc_sha256sum(filename + '.new') != expected_sum: - report_network_error('verify the new executable') - try: - os.remove(filename + '.new') - except OSError: - return report_unable('remove corrupt download') - - try: - os.rename(filename, filename + '.old') - except (IOError, OSError): - return report_unable('move current version') - try: - os.rename(filename + '.new', filename) - except (IOError, OSError): - report_unable('overwrite current version') - os.rename(filename + '.old', filename) - return - try: - # Continues to run in the background - Popen( - 'ping 127.0.0.1 -n 5 -w 1000 & del /F "%s.old"' % filename, - shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) - ydl.to_screen('Updated yt-dlp to version %s' % version_id) - return True # Exit app - except OSError: - report_unable('delete the old version') - - elif variant in ('zip', 'mac_exe'): - pack_type = '3' if variant == 'zip' else '64' - try: - url = get_bin_info(variant, pack_type).get('browser_download_url') - if not url: - return report_network_error('fetch updates') - urlh = ydl._opener.open(url) - newcontent = urlh.read() - urlh.close() - except (IOError, OSError): - return report_network_error('download the latest version') - - expected_sum = get_sha256sum(variant, pack_type) - if not expected_sum: - ydl.report_warning('no hash information found for the release') - elif hashlib.sha256(newcontent).hexdigest() != expected_sum: - return report_network_error('verify the new package') - - try: - with open(filename, 'wb') as outf: - outf.write(newcontent) - except (IOError, OSError): - return report_unable('overwrite current version') - - ydl.to_screen('Updated yt-dlp to version %s; Restart yt-dlp to use the new version' % version_id) - return - - assert False, f'Unhandled variant: {variant}' - - -''' # UNUSED -def get_notes(versions, fromVersion): - notes = [] - for v, vdata in sorted(versions.items()): - if v > fromVersion: - notes.extend(vdata.get('notes', [])) - return notes - - -def print_notes(to_screen, versions, fromVersion=__version__): - notes = get_notes(versions, fromVersion) - if notes: - to_screen('PLEASE NOTE:') - for note in notes: - to_screen(note) -''' - - -# Deprecated -def update_self(to_screen, verbose, opener): - - printfn = to_screen - - write_string( - 'DeprecationWarning: "yt_dlp.update.update_self" is deprecated and may be removed in a future version. ' - 'Use "yt_dlp.update.run_update(ydl)" instead\n') - - class FakeYDL(): - _opener = opener - to_screen = printfn - - @staticmethod - def report_warning(msg, *args, **kwargs): - return printfn('WARNING: %s' % msg, *args, **kwargs) - - @staticmethod - def report_error(msg, tb=None): - printfn('ERROR: %s' % msg) - if not verbose: - return - if tb is None: - # Copied from YoutubeDl.trouble - if sys.exc_info()[0]: - tb = '' - if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: - tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) - tb += encode_compat_str(traceback.format_exc()) - else: - tb_data = traceback.format_list(traceback.extract_stack()) - tb = ''.join(tb_data) - if tb: - printfn(tb) - - return run_update(FakeYDL()) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index bb8d65cad..51931f164 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -58,7 +58,6 @@ from .compat import ( compat_kwargs, compat_os_name, compat_parse_qs, - compat_shlex_split, compat_shlex_quote, compat_str, compat_struct_pack, @@ -145,7 +144,6 @@ std_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-us,en;q=0.5', - 'Sec-Fetch-Mode': 'navigate', } @@ -417,33 +415,17 @@ def get_element_by_id(id, html): return get_element_by_attribute('id', id, html) -def get_element_html_by_id(id, html): - """Return the html of the tag with the specified ID in the passed HTML document""" - return get_element_html_by_attribute('id', id, html) - - def get_element_by_class(class_name, html): """Return the content of the first tag with the specified class in the passed HTML document""" retval = get_elements_by_class(class_name, html) return retval[0] if retval else None -def get_element_html_by_class(class_name, html): - """Return the html of the first tag with the specified class in the passed HTML document""" - retval = get_elements_html_by_class(class_name, html) - return retval[0] if retval else None - - def get_element_by_attribute(attribute, value, html, escape_value=True): retval = get_elements_by_attribute(attribute, value, html, escape_value) return retval[0] if retval else None -def get_element_html_by_attribute(attribute, value, html, escape_value=True): - retval = get_elements_html_by_attribute(attribute, value, html, escape_value) - return retval[0] if retval else None - - def get_elements_by_class(class_name, html): """Return the content of all tags with the specified class in the passed HTML document as a list""" return get_elements_by_attribute( @@ -451,123 +433,29 @@ def get_elements_by_class(class_name, html): html, escape_value=False) -def get_elements_html_by_class(class_name, html): - """Return the html of all tags with the specified class in the passed HTML document as a list""" - return get_elements_html_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), - html, escape_value=False) - - -def get_elements_by_attribute(*args, **kwargs): +def get_elements_by_attribute(attribute, value, html, escape_value=True): """Return the content of the tag with the specified attribute in the passed HTML document""" - return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)] - - -def get_elements_html_by_attribute(*args, **kwargs): - """Return the html of the tag with the specified attribute in the passed HTML document""" - return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)] - - -def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True): - """ - Return the text (content) and the html (whole) of the tag with the specified - attribute in the passed HTML document - """ - - value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?' value = re.escape(value) if escape_value else value - partial_element_re = r'''(?x) - <(?P<tag>[a-zA-Z0-9:._-]+) - (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)? - \s%(attribute)s\s*=\s*(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q) - ''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional} - - for m in re.finditer(partial_element_re, html): - content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():]) - - yield ( - unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)), - whole - ) - - -class HTMLBreakOnClosingTagParser(compat_HTMLParser): - """ - HTML parser which raises HTMLBreakOnClosingTagException upon reaching the - closing tag for the first opening tag it has encountered, and can be used - as a context manager - """ - - class HTMLBreakOnClosingTagException(Exception): - pass - - def __init__(self): - self.tagstack = collections.deque() - compat_HTMLParser.__init__(self) - - def __enter__(self): - return self - - def __exit__(self, *_): - self.close() - - def close(self): - # handle_endtag does not return upon raising HTMLBreakOnClosingTagException, - # so data remains buffered; we no longer have any interest in it, thus - # override this method to discard it - pass - - def handle_starttag(self, tag, _): - self.tagstack.append(tag) + retlist = [] + for m in re.finditer(r'''(?xs) + <([a-zA-Z0-9:._-]+) + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? + \s+%s=['"]?%s['"]? + (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*? + \s*> + (?P<content>.*?) + </\1> + ''' % (re.escape(attribute), value), html): + res = m.group('content') - def handle_endtag(self, tag): - if not self.tagstack: - raise compat_HTMLParseError('no tags in the stack') - while self.tagstack: - inner_tag = self.tagstack.pop() - if inner_tag == tag: - break - else: - raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found') - if not self.tagstack: - raise self.HTMLBreakOnClosingTagException() + if res.startswith('"') or res.startswith("'"): + res = res[1:-1] + retlist.append(unescapeHTML(res)) -def get_element_text_and_html_by_tag(tag, html): - """ - For the first element with the specified tag in the passed HTML document - return its' content (text) and the whole element (html) - """ - def find_or_raise(haystack, needle, exc): - try: - return haystack.index(needle) - except ValueError: - raise exc - closing_tag = f'</{tag}>' - whole_start = find_or_raise( - html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found')) - content_start = find_or_raise( - html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag')) - content_start += whole_start + 1 - with HTMLBreakOnClosingTagParser() as parser: - parser.feed(html[whole_start:content_start]) - if not parser.tagstack or parser.tagstack[0] != tag: - raise compat_HTMLParseError(f'parser did not match opening {tag} tag') - offset = content_start - while offset < len(html): - next_closing_tag_start = find_or_raise( - html[offset:], closing_tag, - compat_HTMLParseError(f'closing {tag} tag not found')) - next_closing_tag_end = next_closing_tag_start + len(closing_tag) - try: - parser.feed(html[offset:offset + next_closing_tag_end]) - offset += next_closing_tag_end - except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException: - return html[content_start:offset + next_closing_tag_start], \ - html[whole_start:offset + next_closing_tag_end] - raise compat_HTMLParseError('unexpected end of html') + return retlist class HTMLAttributeParser(compat_HTMLParser): @@ -639,9 +527,10 @@ def clean_html(html): if html is None: # Convenience for sanitizing descriptions etc. return html - html = re.sub(r'\s+', ' ', html) - html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html) - html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html) + # Newline vs <br /> + html = html.replace('\n', ' ') + html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html) + html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html) # Strip html tags html = re.sub('<.*?>', '', html) # Replace html entities @@ -665,7 +554,7 @@ def sanitize_open(filename, open_mode): import msvcrt msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename) - stream = locked_file(filename, open_mode, block=False).open() + stream = open(encodeFilename(filename), open_mode) return (stream, filename) except (IOError, OSError) as err: if err.errno in (errno.EACCES,): @@ -677,7 +566,7 @@ def sanitize_open(filename, open_mode): raise else: # An exception here should be caught in the caller - stream = locked_file(filename, open_mode, block=False).open() + stream = open(encodeFilename(alt_filename), open_mode) return (stream, alt_filename) @@ -996,8 +885,6 @@ def make_HTTPS_handler(params, **kwargs): opts_check_certificate = not params.get('nocheckcertificate') context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context.check_hostname = opts_check_certificate - if params.get('legacyserverconnect'): - context.options |= 4 # SSL_OP_LEGACY_SERVER_CONNECT context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE if opts_check_certificate: try: @@ -1018,9 +905,13 @@ def make_HTTPS_handler(params, **kwargs): def bug_reports_message(before=';'): - msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , ' - 'filling out the "Broken site" issue template properly. ' - 'Confirm you are on the latest version using -U') + if ytdl_is_updateable(): + update_cmd = 'type doas pacman -Sy hypervideo to update' + else: + update_cmd = 'see https://git.conocimientoslibres.ga/software/hypervideo.git/about/#how-do-i-update-hypervideo' + msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .' + msg += ' Make sure you are using the latest version; %s.' % update_cmd + msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.' before = before.rstrip() if not before or before.endswith(('.', '!', '?')): @@ -1843,7 +1734,7 @@ def datetime_from_str(date_str, precision='auto', format='%Y%m%d'): if precision == 'auto': auto_precision = True precision = 'microsecond' - today = datetime_round(datetime.datetime.utcnow(), precision) + today = datetime_round(datetime.datetime.now(), precision) if date_str in ('now', 'today'): return today if date_str == 'yesterday': @@ -2119,7 +2010,7 @@ if sys.platform == 'win32': whole_low = 0xffffffff whole_high = 0x7fffffff - def _lock_file(f, exclusive, block): # todo: block unused on win32 + def _lock_file(f, exclusive): overlapped = OVERLAPPED() overlapped.Offset = 0 overlapped.OffsetHigh = 0 @@ -2142,19 +2033,15 @@ else: try: import fcntl - def _lock_file(f, exclusive, block): - fcntl.flock(f, - fcntl.LOCK_SH if not exclusive - else fcntl.LOCK_EX if block - else fcntl.LOCK_EX | fcntl.LOCK_NB) + def _lock_file(f, exclusive): + fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH) def _unlock_file(f): fcntl.flock(f, fcntl.LOCK_UN) - except ImportError: UNSUPPORTED_MSG = 'file locking is not supported on this platform' - def _lock_file(f, exclusive, block): + def _lock_file(f, exclusive): raise IOError(UNSUPPORTED_MSG) def _unlock_file(f): @@ -2162,16 +2049,15 @@ else: class locked_file(object): - def __init__(self, filename, mode, block=True, encoding=None): - assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb'] + def __init__(self, filename, mode, encoding=None): + assert mode in ['r', 'a', 'w'] self.f = io.open(filename, mode, encoding=encoding) self.mode = mode - self.block = block def __enter__(self): - exclusive = 'r' not in self.mode + exclusive = self.mode != 'r' try: - _lock_file(self.f, exclusive, self.block) + _lock_file(self.f, exclusive) except IOError: self.f.close() raise @@ -2192,15 +2078,6 @@ class locked_file(object): def read(self, *args): return self.f.read(*args) - def flush(self): - self.f.flush() - - def open(self): - return self.__enter__() - - def close(self, *args): - self.__exit__(self, *args, value=False, traceback=False) - def get_filesystem_encoding(): encoding = sys.getfilesystemencoding() @@ -2243,11 +2120,9 @@ def format_decimal_suffix(num, fmt='%d%s', *, factor=1000): if num is None: return None exponent = 0 if num == 0 else int(math.log(num, factor)) - suffix = ['', *'kMGTPEZY'][exponent] - if factor == 1024: - suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i') + suffix = ['', *'KMGTPEZY'][exponent] converted = num / (factor ** exponent) - return fmt % (converted, suffix) + return fmt % (converted, f'{suffix}i' if suffix and factor == 1024 else suffix) def format_bytes(bytes): @@ -2507,8 +2382,13 @@ class PUTRequest(compat_urllib_request.Request): def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): - if get_attr and v is not None: - v = getattr(v, get_attr, None) + if get_attr: + if v is not None: + v = getattr(v, get_attr, None) + if v == '': + v = None + if v is None: + return default try: return int(v) * invscale // scale except (ValueError, TypeError, OverflowError): @@ -2572,14 +2452,9 @@ def parse_duration(s): return None days, hours, mins, secs, ms = [None] * 5 - m = re.match(r'''(?x) - (?P<before_secs> - (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)? - (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+)) - (?P<ms>[.:][0-9]+)?Z?$ - ''', s) + m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s) if m: - days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms') + days, hours, mins, secs, ms = m.groups() else: m = re.match( r'''(?ix)(?:P? @@ -2624,7 +2499,7 @@ def parse_duration(s): if days: duration += float(days) * 24 * 60 * 60 if ms: - duration += float(ms.replace(':', '.')) + duration += float(ms) return duration @@ -2858,7 +2733,8 @@ class InAdvancePagedList(PagedList): def _getslice(self, start, end): start_page = start // self._pagesize - end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1) + end_page = ( + self._pagecount if end is None else (end // self._pagesize + 1)) skip_elems = start - start_page * self._pagesize only_more = None if end is None else end - start for pagenum in range(start_page, end_page): @@ -3162,9 +3038,6 @@ def qualities(quality_ids): return q -POSTPROCESS_WHEN = {'pre_process', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'} - - DEFAULT_OUTTMPL = { 'default': '%(title)s [%(id)s].%(ext)s', 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s', @@ -3177,7 +3050,6 @@ OUTTMPL_TYPES = { 'annotation': 'annotations.xml', 'infojson': 'info.json', 'link': None, - 'pl_video': None, 'pl_thumbnail': None, 'pl_description': 'description', 'pl_infojson': 'info.json', @@ -3326,7 +3198,7 @@ def parse_codecs(codecs_str): return {} split_codecs = list(filter(None, map( str.strip, codecs_str.strip().strip(',').split(',')))) - vcodec, acodec, tcodec, hdr = None, None, None, None + vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: parts = full_codec.split('.') codec = parts[0].replace('0', '') @@ -3343,17 +3215,13 @@ def parse_codecs(codecs_str): elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec - elif codec in ('stpp', 'wvtt',): - if not tcodec: - tcodec = full_codec else: write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr) - if vcodec or acodec or tcodec: + if vcodec or acodec: return { 'vcodec': vcodec or 'none', 'acodec': acodec or 'none', 'dynamic_range': hdr, - **({'tcodec': tcodec} if tcodec is not None else {}), } elif len(split_codecs) == 2: return { @@ -3443,11 +3311,12 @@ def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): return [max(width(str(v)) for v in col) for col in zip(*table)] def filter_using_list(row, filterArray): - return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take] + return [col for (take, col) in zip(filterArray, row) if take] - max_lens = get_max_lens(data) if hide_empty else [] - header_row = filter_using_list(header_row, max_lens) - data = [filter_using_list(row, max_lens) for row in data] + if hide_empty: + max_lens = get_max_lens(data) + header_row = filter_using_list(header_row, max_lens) + data = [filter_using_list(row, max_lens) for row in data] table = [header_row] + data max_lens = get_max_lens(table) @@ -4986,10 +4855,13 @@ def to_high_limit_path(path): def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None): - val = traverse_obj(obj, *variadic(field)) - if val in ignore: - return default - return template % (func(val) if func else val) + if field is None: + val = obj if obj is not None else default + else: + val = obj.get(field, default) + if func and val not in ignore: + val = func(val) + return template % val if val not in ignore else default def clean_podcast_url(url): @@ -5065,12 +4937,11 @@ def traverse_obj( ''' Traverse nested list/dict/tuple @param path_list A list of paths which are checked one by one. Each path is a list of keys where each key is a string, - a function, a tuple of strings/None or "...". + a function, a tuple of strings or "...". When a fuction is given, it takes the key as argument and returns whether the key matches or not. When a tuple is given, all the keys given in the tuple are traversed, and "..." traverses all the keys in the object - "None" returns the object without traversal @param default Default value to return @param expected_type Only accept final value of this type (Can also be any callable) @param get_all Return all the values obtained from a path or only the first one @@ -5089,8 +4960,8 @@ def traverse_obj( nonlocal depth path = tuple(variadic(path)) for i, key in enumerate(path): - if None in (key, obj): - return obj + if obj is None: + return None if isinstance(key, (list, tuple)): obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key] key = ... @@ -5158,6 +5029,7 @@ def traverse_obj( return default +# Deprecated def traverse_dict(dictn, keys, casesense=True): write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated ' 'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead') @@ -5222,92 +5094,3 @@ def join_nonempty(*values, delim='-', from_dict=None): if from_dict is not None: values = map(from_dict.get, values) return delim.join(map(str, filter(None, values))) - - -class Config: - own_args = None - filename = None - __initialized = False - - def __init__(self, parser, label=None): - self._parser, self.label = parser, label - self._loaded_paths, self.configs = set(), [] - - def init(self, args=None, filename=None): - assert not self.__initialized - directory = '' - if filename: - location = os.path.realpath(filename) - directory = os.path.dirname(location) - if location in self._loaded_paths: - return False - self._loaded_paths.add(location) - - self.__initialized = True - self.own_args, self.filename = args, filename - for location in self._parser.parse_args(args)[0].config_locations or []: - location = os.path.join(directory, expand_path(location)) - if os.path.isdir(location): - location = os.path.join(location, 'yt-dlp.conf') - if not os.path.exists(location): - self._parser.error(f'config location {location} does not exist') - self.append_config(self.read_file(location), location) - return True - - def __str__(self): - label = join_nonempty( - self.label, 'config', f'"{self.filename}"' if self.filename else '', - delim=' ') - return join_nonempty( - self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}', - *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs), - delim='\n') - - @staticmethod - def read_file(filename, default=[]): - try: - optionf = open(filename) - except IOError: - return default # silently skip if file is not present - try: - # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56 - contents = optionf.read() - if sys.version_info < (3,): - contents = contents.decode(preferredencoding()) - res = compat_shlex_split(contents, comments=True) - finally: - optionf.close() - return res - - @staticmethod - def hide_login_info(opts): - PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username']) - eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$') - - def _scrub_eq(o): - m = eqre.match(o) - if m: - return m.group('key') + '=PRIVATE' - else: - return o - - opts = list(map(_scrub_eq, opts)) - for idx, opt in enumerate(opts): - if opt in PRIVATE_OPTS and idx + 1 < len(opts): - opts[idx + 1] = 'PRIVATE' - return opts - - def append_config(self, *args, label=None): - config = type(self)(self._parser, label) - config._loaded_paths = self._loaded_paths - if config.init(*args): - self.configs.append(config) - - @property - def all_args(self): - for config in reversed(self.configs): - yield from config.all_args - yield from self.own_args or [] - - def parse_args(self): - return self._parser.parse_args(list(self.all_args)) |