From cd9ea4104b8b5075ea4bfe92c76130e267686805 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 02:54:39 +0000 Subject: [instagram] Add more formats when logged in (#1487) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index ccfcddd5b..8c935c251 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -222,8 +222,8 @@ class InstagramIE(InfoExtractor): dict) if media: video_url = media.get('video_url') - height = int_or_none(media.get('dimensions', {}).get('height')) - width = int_or_none(media.get('dimensions', {}).get('width')) + height = try_get(media, lambda x: x['dimensions']['height']) + width = try_get(media, lambda x: x['dimensions']['width']) description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') @@ -231,8 +231,8 @@ class InstagramIE(InfoExtractor): thumbnail = media.get('display_src') or media.get('display_url') duration = float_or_none(media.get('video_duration')) timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) - uploader = media.get('owner', {}).get('full_name') - uploader_id = media.get('owner', {}).get('username') + uploader = try_get(media, lambda x: x['owner']['full_name']) + uploader_id = try_get(media, lambda x: x['owner']['username']) def get_count(keys, kind): for key in variadic(keys): @@ -294,6 +294,10 @@ class InstagramIE(InfoExtractor): 'width': width, 'height': height, }] + dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) + if dash: + formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) + self._sort_formats(formats) if not uploader_id: uploader_id = self._search_regex( -- cgit v1.2.3 From 404f611f1c4aa516fbc4301aa7b8f734ee4bc67b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 09:53:58 +0530 Subject: [youtube] Fix throttling by decrypting n-sig (#1437) --- .gitignore | 1 + test/test_jsinterp.py | 50 +++++ test/test_youtube_signature.py | 70 ++++-- yt_dlp/extractor/youtube.py | 91 ++++++-- yt_dlp/jsinterp.py | 488 ++++++++++++++++++++++++++++++++--------- 5 files changed, 550 insertions(+), 150 deletions(-) diff --git a/.gitignore b/.gitignore index bf06c81f0..790989b3c 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ cookies *.webp *.annotations.xml *.description +.cache/ # Allow config/media files in testdata !test/** diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 8b2b60403..380e52c33 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -112,6 +112,56 @@ class TestJSInterpreter(unittest.TestCase): ''') self.assertEqual(jsi.call_function('z'), 5) + def test_for_loop(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) {a++} a } + ''') + self.assertEqual(jsi.call_function('x'), 10) + + def test_switch(self): + jsi = JSInterpreter(''' + function x(f) { switch(f){ + case 1:f+=1; + case 2:f+=2; + case 3:f+=3;break; + case 4:f+=4; + default:f=0; + } return f } + ''') + self.assertEqual(jsi.call_function('x', 1), 7) + self.assertEqual(jsi.call_function('x', 3), 6) + self.assertEqual(jsi.call_function('x', 5), 0) + + def test_try(self): + jsi = JSInterpreter(''' + function x() { try{return 10} catch(e){return 5} } + ''') + self.assertEqual(jsi.call_function('x'), 10) + + def test_for_loop_continue(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) { continue; a++ } a } + ''') + self.assertEqual(jsi.call_function('x'), 0) + + def test_for_loop_break(self): + jsi = JSInterpreter(''' + function x() { a=0; for (i=0; i-10; i++) { break; a++ } a } + ''') + self.assertEqual(jsi.call_function('x'), 0) + + def test_literal_list(self): + jsi = JSInterpreter(''' + function x() { [1, 2, "asdf", [5, 6, 7]][3] } + ''') + self.assertEqual(jsi.call_function('x'), [5, 6, 7]) + + def test_comma(self): + jsi = JSInterpreter(''' + function x() { a=5; a -= 1, a+=3; return a } + ''') + self.assertEqual(jsi.call_function('x'), 7) + if __name__ == '__main__': unittest.main() diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index dcf6ab60d..f40a06952 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -14,9 +14,10 @@ import string from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE +from yt_dlp.jsinterp import JSInterpreter from yt_dlp.compat import compat_str, compat_urlretrieve -_TESTS = [ +_SIG_TESTS = [ ( 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', 86, @@ -64,6 +65,13 @@ _TESTS = [ ) ] +_NSIG_TESTS = [ + ( + 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', + 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', + ), # TODO: Add more tests +] + @is_download_test class TestPlayerInfo(unittest.TestCase): @@ -97,35 +105,49 @@ class TestSignature(unittest.TestCase): os.mkdir(self.TESTDATA_DIR) -def make_tfunc(url, sig_input, expected_sig): - m = re.match(r'.*-([a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$', url) - assert m, '%r should follow URL format' % url - test_id = m.group(1) +def t_factory(name, sig_func, url_pattern): + def make_tfunc(url, sig_input, expected_sig): + m = url_pattern.match(url) + assert m, '%r should follow URL format' % url + test_id = m.group('id') + + def test_func(self): + basename = f'player-{name}-{test_id}.js' + fn = os.path.join(self.TESTDATA_DIR, basename) + + if not os.path.exists(fn): + compat_urlretrieve(url, fn) + with io.open(fn, encoding='utf-8') as testf: + jscode = testf.read() + self.assertEqual(sig_func(jscode, sig_input), expected_sig) + + test_func.__name__ = f'test_{name}_js_{test_id}' + setattr(TestSignature, test_func.__name__, test_func) + return make_tfunc + - def test_func(self): - basename = 'player-%s.js' % test_id - fn = os.path.join(self.TESTDATA_DIR, basename) +def signature(jscode, sig_input): + func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) + src_sig = ( + compat_str(string.printable[:sig_input]) + if isinstance(sig_input, int) else sig_input) + return func(src_sig) - if not os.path.exists(fn): - compat_urlretrieve(url, fn) - ydl = FakeYDL() - ie = YoutubeIE(ydl) - with io.open(fn, encoding='utf-8') as testf: - jscode = testf.read() - func = ie._parse_sig_js(jscode) - src_sig = ( - compat_str(string.printable[:sig_input]) - if isinstance(sig_input, int) else sig_input) - got_sig = func(src_sig) - self.assertEqual(got_sig, expected_sig) +def n_sig(jscode, sig_input): + funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) + return JSInterpreter(jscode).call_function(funcname, sig_input) - test_func.__name__ = str('test_signature_js_' + test_id) - setattr(TestSignature, test_func.__name__, test_func) +make_sig_test = t_factory( + 'signature', signature, re.compile(r'.*-(?P[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.[a-z]+$')) +for test_spec in _SIG_TESTS: + make_sig_test(*test_spec) -for test_spec in _TESTS: - make_tfunc(*test_spec) +make_nsig_test = t_factory( + 'nsig', n_sig, re.compile(r'.+/player/(?P[a-zA-Z0-9_-]+)/.+.js$')) +for test_spec in _NSIG_TESTS: + make_nsig_test(*test_spec) if __name__ == '__main__': diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 658b45fe1..56cd2ed8d 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -1720,7 +1720,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError('Cannot identify player %r' % player_url) return id_m.group('id') - def _load_player(self, video_id, player_url, fatal=True) -> bool: + def _load_player(self, video_id, player_url, fatal=True): player_id = self._extract_player_info(player_url) if player_id not in self._code_cache: code = self._download_webpage( @@ -1729,7 +1729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): errnote='Download of %s failed' % player_url) if code: self._code_cache[player_id] = code - return player_id in self._code_cache + return self._code_cache.get(player_id) def _extract_signature_function(self, video_id, player_url, example_sig): player_id = self._extract_player_info(player_url) @@ -1743,8 +1743,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if cache_spec is not None: return lambda s: ''.join(s[i] for i in cache_spec) - if self._load_player(video_id, player_url): - code = self._code_cache[player_id] + code = self._load_player(video_id, player_url) + if code: res = self._parse_sig_js(code) test_string = ''.join(map(compat_chr, range(len(example_sig)))) @@ -1755,6 +1755,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return res def _print_sig_code(self, func, example_sig): + if not self.get_param('youtube_print_sig_code'): + return + def gen_sig_code(idxs): def _genslice(start, end, step): starts = '' if start == 0 else str(start) @@ -1831,13 +1834,58 @@ class YoutubeIE(YoutubeBaseInfoExtractor): ) self._player_cache[player_id] = func func = self._player_cache[player_id] - if self.get_param('youtube_print_sig_code'): - self._print_sig_code(func, s) + self._print_sig_code(func, s) return func(s) except Exception as e: - tb = traceback.format_exc() - raise ExtractorError( - 'Signature extraction failed: ' + tb, cause=e) + raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e) + + def _decrypt_nsig(self, s, video_id, player_url): + """Turn the encrypted n field into a working signature""" + if player_url is None: + raise ExtractorError('Cannot decrypt nsig without player_url') + if player_url.startswith('//'): + player_url = 'https:' + player_url + elif not re.match(r'https?://', player_url): + player_url = compat_urlparse.urljoin( + 'https://www.youtube.com', player_url) + + sig_id = ('nsig_value', s) + if sig_id in self._player_cache: + return self._player_cache[sig_id] + + try: + player_id = ('nsig', player_url) + if player_id not in self._player_cache: + self._player_cache[player_id] = self._extract_n_function(video_id, player_url) + func = self._player_cache[player_id] + self._player_cache[sig_id] = func(s) + self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}') + return self._player_cache[sig_id] + except Exception as e: + raise ExtractorError(traceback.format_exc(), cause=e) + + def _extract_n_function_name(self, jscode): + return self._search_regex( + (r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), + jscode, 'Initial JS player n function name', group='nfunc') + + def _extract_n_function(self, video_id, player_url): + player_id = self._extract_player_info(player_url) + func_code = self._downloader.cache.load('youtube-nsig', player_id) + + if func_code: + jsi = JSInterpreter(func_code) + else: + jscode = self._load_player(video_id, player_url) + funcname = self._extract_n_function_name(jscode) + jsi = JSInterpreter(jscode) + func_code = jsi.extract_function_code(funcname) + self._downloader.cache.store('youtube-nsig', player_id, func_code) + + if self.get_param('youtube_print_sig_code'): + self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n') + + return lambda s: jsi.extract_function_from_code(*func_code)([s]) def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): """ @@ -1856,9 +1904,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): raise ExtractorError(error_msg) self.report_warning(error_msg) return - if self._load_player(video_id, player_url, fatal=fatal): - player_id = self._extract_player_info(player_url) - code = self._code_cache[player_id] + code = self._load_player(video_id, player_url, fatal=fatal) + if code: sts = int_or_none(self._search_regex( r'(?:signatureTimestamp|sts)\s*:\s*(?P[0-9]{5})', code, 'JS player signature timestamp', group='sts', fatal=fatal)) @@ -2440,6 +2487,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sp = try_get(sc, lambda x: x['sp'][0]) or 'signature' fmt_url += '&' + sp + '=' + signature + query = parse_qs(fmt_url) + throttled = False + if query.get('ratebypass') != ['yes'] and query.get('n'): + try: + fmt_url = update_url_query(fmt_url, { + 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)}) + except ExtractorError as e: + self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True) + throttled = True + if itag: itags.append(itag) stream_ids.append(stream_id) @@ -2453,7 +2510,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format_note': ', '.join(filter(None, ( '%s%s' % (audio_track.get('displayName') or '', ' (default)' if audio_track.get('audioIsDefault') else ''), - fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))), + fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), + throttled and 'THROTTLED'))), + 'source_preference': -10 if not throttled else -1, 'fps': int_or_none(fmt.get('fps')), 'height': height, 'quality': q(quality), @@ -2645,12 +2704,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if reason: self.raise_no_formats(reason, expected=True) - for f in formats: - if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled - f['source_preference'] = -10 - # TODO: this method is not reliable - f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)' - # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang')) diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 7bda59610..5c79a8110 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -1,5 +1,4 @@ -from __future__ import unicode_literals - +from collections.abc import MutableMapping import json import operator import re @@ -22,11 +21,54 @@ _OPERATORS = [ ('*', operator.mul), ] _ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] -_ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) +_ASSIGN_OPERATORS.append(('=', (lambda cur, right: right))) _NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' +class JS_Break(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid break') + + +class JS_Continue(ExtractorError): + def __init__(self): + ExtractorError.__init__(self, 'Invalid continue') + + +class LocalNameSpace(MutableMapping): + def __init__(self, *stack): + self.stack = tuple(stack) + + def __getitem__(self, key): + for scope in self.stack: + if key in scope: + return scope[key] + raise KeyError(key) + + def __setitem__(self, key, value): + for scope in self.stack: + if key in scope: + scope[key] = value + break + else: + self.stack[0][key] = value + return value + + def __delitem__(self, key): + raise NotImplementedError('Deleting is not supported') + + def __iter__(self): + for scope in self.stack: + yield from scope + + def __len__(self, key): + return len(iter(self)) + + def __repr__(self): + return f'LocalNameSpace{self.stack}' + + class JSInterpreter(object): def __init__(self, code, objects=None): if objects is None: @@ -34,11 +76,58 @@ class JSInterpreter(object): self.code = code self._functions = {} self._objects = objects + self.__named_object_counter = 0 + + def _named_object(self, namespace, obj): + self.__named_object_counter += 1 + name = f'__yt_dlp_jsinterp_obj{self.__named_object_counter}' + namespace[name] = obj + return name + + @staticmethod + def _seperate(expr, delim=',', max_split=None): + if not expr: + return + parens = {'(': 0, '{': 0, '[': 0, ']': 0, '}': 0, ')': 0} + start, splits, pos, max_pos = 0, 0, 0, len(delim) - 1 + for idx, char in enumerate(expr): + if char in parens: + parens[char] += 1 + is_in_parens = (parens['['] - parens[']'] + or parens['('] - parens[')'] + or parens['{'] - parens['}']) + if char == delim[pos] and not is_in_parens: + if pos == max_pos: + pos = 0 + yield expr[start: idx - max_pos] + start = idx + 1 + splits += 1 + if max_split and splits >= max_split: + break + else: + pos += 1 + else: + pos = 0 + yield expr[start:] + + @staticmethod + def _seperate_at_paren(expr, delim): + seperated = list(JSInterpreter._seperate(expr, delim, 1)) + if len(seperated) < 2: + raise ExtractorError(f'No terminating paren {delim} in {expr}') + return seperated[0][1:].strip(), seperated[1].strip() def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise ExtractorError('Recursion limit reached') + sub_statements = list(self._seperate(stmt, ';')) + stmt = (sub_statements or ['']).pop() + for sub_stmt in sub_statements: + ret, should_abort = self.interpret_statement(sub_stmt, local_vars, allow_recursion - 1) + if should_abort: + return ret + should_abort = False stmt = stmt.lstrip() stmt_m = re.match(r'var\s', stmt) @@ -61,25 +150,118 @@ class JSInterpreter(object): if expr == '': # Empty expression return None + if expr.startswith('{'): + inner, outer = self._seperate_at_paren(expr, '}') + inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion - 1) + if not outer or should_abort: + return inner + else: + expr = json.dumps(inner) + outer + if expr.startswith('('): - parens_count = 0 - for m in re.finditer(r'[()]', expr): - if m.group(0) == '(': - parens_count += 1 + inner, outer = self._seperate_at_paren(expr, ')') + inner = self.interpret_expression(inner, local_vars, allow_recursion) + if not outer: + return inner + else: + expr = json.dumps(inner) + outer + + if expr.startswith('['): + inner, outer = self._seperate_at_paren(expr, ']') + name = self._named_object(local_vars, [ + self.interpret_expression(item, local_vars, allow_recursion) + for item in self._seperate(inner)]) + expr = name + outer + + m = re.match(r'try\s*', expr) + if m: + if expr[m.end()] == '{': + try_expr, expr = self._seperate_at_paren(expr[m.end():], '}') + else: + try_expr, expr = expr[m.end() - 1:], '' + ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion - 1) + if should_abort: + return ret + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'catch\s*\(', expr) + if m: + # We ignore the catch block + _, expr = self._seperate_at_paren(expr, '}') + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'for\s*\(', expr) + if m: + constructor, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + if remaining.startswith('{'): + body, expr = self._seperate_at_paren(remaining, '}') + else: + m = re.match(r'switch\s*\(', remaining) # FIXME + if m: + switch_val, remaining = self._seperate_at_paren(remaining[m.end() - 1:], ')') + body, expr = self._seperate_at_paren(remaining, '}') + body = 'switch(%s){%s}' % (switch_val, body) else: - parens_count -= 1 - if parens_count == 0: - sub_expr = expr[1:m.start()] - sub_result = self.interpret_expression( - sub_expr, local_vars, allow_recursion) - remaining_expr = expr[m.end():].strip() - if not remaining_expr: - return sub_result - else: - expr = json.dumps(sub_result) + remaining_expr + body, expr = remaining, '' + start, cndn, increment = self._seperate(constructor, ';') + if self.interpret_statement(start, local_vars, allow_recursion - 1)[1]: + raise ExtractorError( + f'Premature return in the initialization of a for loop in {constructor!r}') + while True: + if not self.interpret_expression(cndn, local_vars, allow_recursion): + break + try: + ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion - 1) + if should_abort: + return ret + except JS_Break: + break + except JS_Continue: + pass + if self.interpret_statement(increment, local_vars, allow_recursion - 1)[1]: + raise ExtractorError( + f'Premature return in the initialization of a for loop in {constructor!r}') + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + m = re.match(r'switch\s*\(', expr) + if m: + switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') + switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) + body, expr = self._seperate_at_paren(remaining, '}') + body, default = body.split('default:') if 'default:' in body else (body, None) + items = body.split('case ')[1:] + if default: + items.append(f'default:{default}') + matched = False + for item in items: + case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion) + if matched: + try: + ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1) + if should_abort: + return ret + except JS_Break: break - else: - raise ExtractorError('Premature end of parens in %r' % expr) + return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] + + # Comma seperated statements + sub_expressions = list(self._seperate(expr)) + expr = sub_expressions.pop().strip() if sub_expressions else '' + for sub_expr in sub_expressions: + self.interpret_expression(sub_expr, local_vars, allow_recursion) + + for m in re.finditer(rf'''(?x) + (?P\+\+|--)(?P{_NAME_RE})| + (?P{_NAME_RE})(?P\+\+|--)''', expr): + var = m.group('var1') or m.group('var2') + start, end = m.span() + sign = m.group('pre_sign') or m.group('post_sign') + ret = local_vars[var] + local_vars[var] += 1 if sign[0] == '+' else -1 + if m.group('pre_sign'): + ret = local_vars[var] + expr = expr[:start] + json.dumps(ret) + expr[end:] for op, opfunc in _ASSIGN_OPERATORS: m = re.match(r'''(?x) @@ -88,14 +270,13 @@ class JSInterpreter(object): (?P.*)$''' % (_NAME_RE, re.escape(op)), expr) if not m: continue - right_val = self.interpret_expression( - m.group('expr'), local_vars, allow_recursion - 1) + right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion) if m.groupdict().get('index'): lvar = local_vars[m.group('out')] - idx = self.interpret_expression( - m.group('index'), local_vars, allow_recursion) - assert isinstance(idx, int) + idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) + if not isinstance(idx, int): + raise ExtractorError(f'List indices must be integers: {idx}') cur = lvar[idx] val = opfunc(cur, right_val) lvar[idx] = val @@ -109,8 +290,13 @@ class JSInterpreter(object): if expr.isdigit(): return int(expr) + if expr == 'break': + raise JS_Break() + elif expr == 'continue': + raise JS_Continue() + var_m = re.match( - r'(?!if|return|true|false)(?P%s)$' % _NAME_RE, + r'(?!if|return|true|false|null)(?P%s)$' % _NAME_RE, expr) if var_m: return local_vars[var_m.group('name')] @@ -124,91 +310,154 @@ class JSInterpreter(object): r'(?P%s)\[(?P.+)\]$' % _NAME_RE, expr) if m: val = local_vars[m.group('in')] - idx = self.interpret_expression( - m.group('idx'), local_vars, allow_recursion - 1) + idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion) return val[idx] + for op, opfunc in _OPERATORS: + seperated = list(self._seperate(expr, op)) + if len(seperated) < 2: + continue + right_val = seperated.pop() + left_val = op.join(seperated) + left_val, should_abort = self.interpret_statement( + left_val, local_vars, allow_recursion - 1) + if should_abort: + raise ExtractorError(f'Premature left-side return of {op} in {expr!r}') + right_val, should_abort = self.interpret_statement( + right_val, local_vars, allow_recursion - 1) + if should_abort: + raise ExtractorError(f'Premature right-side return of {op} in {expr!r}') + return opfunc(left_val or 0, right_val) + m = re.match( - r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*(?:\(+(?P[^()]*)\))?$' % _NAME_RE, + r'(?P%s)(?:\.(?P[^(]+)|\[(?P[^]]+)\])\s*' % _NAME_RE, expr) if m: variable = m.group('var') member = remove_quotes(m.group('member') or m.group('member2')) - arg_str = m.group('args') - - if variable in local_vars: - obj = local_vars[variable] - else: - if variable not in self._objects: - self._objects[variable] = self.extract_object(variable) - obj = self._objects[variable] - - if arg_str is None: - # Member access - if member == 'length': - return len(obj) - return obj[member] - - assert expr.endswith(')') - # Function call - if arg_str == '': - argvals = tuple() + arg_str = expr[m.end():] + if arg_str.startswith('('): + arg_str, remaining = self._seperate_at_paren(arg_str, ')') else: - argvals = tuple([ + arg_str, remaining = None, arg_str + + def assertion(cndn, msg): + """ assert, but without risk of getting optimized out """ + if not cndn: + raise ExtractorError(f'{member} {msg}: {expr}') + + def eval_method(): + nonlocal member + if variable == 'String': + obj = str + elif variable in local_vars: + obj = local_vars[variable] + else: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + + if arg_str is None: + # Member access + if member == 'length': + return len(obj) + return obj[member] + + # Function call + argvals = [ self.interpret_expression(v, local_vars, allow_recursion) - for v in arg_str.split(',')]) - - if member == 'split': - assert argvals == ('',) - return list(obj) - if member == 'join': - assert len(argvals) == 1 - return argvals[0].join(obj) - if member == 'reverse': - assert len(argvals) == 0 - obj.reverse() - return obj - if member == 'slice': - assert len(argvals) == 1 - return obj[argvals[0]:] - if member == 'splice': - assert isinstance(obj, list) - index, howMany = argvals - res = [] - for i in range(index, min(index + howMany, len(obj))): - res.append(obj.pop(index)) - return res - - return obj[member](argvals) - - for op, opfunc in _OPERATORS: - m = re.match(r'(?P.+?)%s(?P.+)' % re.escape(op), expr) - if not m: - continue - x, abort = self.interpret_statement( - m.group('x'), local_vars, allow_recursion - 1) - if abort: - raise ExtractorError( - 'Premature left-side return of %s in %r' % (op, expr)) - y, abort = self.interpret_statement( - m.group('y'), local_vars, allow_recursion - 1) - if abort: - raise ExtractorError( - 'Premature right-side return of %s in %r' % (op, expr)) - return opfunc(x, y) + for v in self._seperate(arg_str)] + + if obj == str: + if member == 'fromCharCode': + assertion(argvals, 'takes one or more arguments') + return ''.join(map(chr, argvals)) + raise ExtractorError(f'Unsupported string method {member}') + + if member == 'split': + assertion(argvals, 'takes one or more arguments') + assertion(argvals == [''], 'with arguments is not implemented') + return list(obj) + elif member == 'join': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return argvals[0].join(obj) + elif member == 'reverse': + assertion(not argvals, 'does not take any arguments') + obj.reverse() + return obj + elif member == 'slice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(len(argvals) == 1, 'takes exactly one argument') + return obj[argvals[0]:] + elif member == 'splice': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + index, howMany = (argvals + [len(obj)])[:2] + if index < 0: + index += len(obj) + add_items = argvals[2:] + res = [] + for i in range(index, min(index + howMany, len(obj))): + res.append(obj.pop(index)) + for i, item in enumerate(add_items): + obj.insert(index + i, item) + return res + elif member == 'unshift': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(argvals, 'takes one or more arguments') + for item in reversed(argvals): + obj.insert(0, item) + return obj + elif member == 'pop': + assertion(isinstance(obj, list), 'must be applied on a list') + assertion(not argvals, 'does not take any arguments') + if not obj: + return + return obj.pop() + elif member == 'push': + assertion(argvals, 'takes one or more arguments') + obj.extend(argvals) + return obj + elif member == 'forEach': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + f, this = (argvals + [''])[:2] + return [f((item, idx, obj), this=this) for idx, item in enumerate(obj)] + elif member == 'indexOf': + assertion(argvals, 'takes one or more arguments') + assertion(len(argvals) <= 2, 'takes at-most 2 arguments') + idx, start = (argvals + [0])[:2] + try: + return obj.index(idx, start) + except ValueError: + return -1 + + if isinstance(obj, list): + member = int(member) + return obj[member](argvals) + + if remaining: + return self.interpret_expression( + self._named_object(local_vars, eval_method()) + remaining, + local_vars, allow_recursion) + else: + return eval_method() - m = re.match( - r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) + m = re.match(r'^(?P%s)\((?P[a-zA-Z0-9_$,]*)\)$' % _NAME_RE, expr) if m: fname = m.group('func') argvals = tuple([ int(v) if v.isdigit() else local_vars[v] - for v in m.group('args').split(',')]) if len(m.group('args')) > 0 else tuple() - if fname not in self._functions: + for v in self._seperate(m.group('args'))]) + if fname in local_vars: + return local_vars[fname](argvals) + elif fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) - raise ExtractorError('Unsupported JS expression %r' % expr) + if expr: + raise ExtractorError('Unsupported JS expression %r' % expr) def extract_object(self, objname): _FUNC_NAME_RE = r'''(?:[a-zA-Z$0-9]+|"[a-zA-Z$0-9]+"|'[a-zA-Z$0-9]+')''' @@ -233,30 +482,55 @@ class JSInterpreter(object): return obj - def extract_function(self, funcname): + def extract_function_code(self, funcname): + """ @returns argnames, code """ func_m = re.search( r'''(?x) (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* \((?P[^)]*)\)\s* - \{(?P[^}]+)\}''' % ( + (?P\{(?:(?!};)[^"]|"([^"]|\\")*")+\})''' % ( re.escape(funcname), re.escape(funcname), re.escape(funcname)), self.code) + code, _ = self._seperate_at_paren(func_m.group('code'), '}') # refine the match if func_m is None: raise ExtractorError('Could not find JS function %r' % funcname) - argnames = func_m.group('args').split(',') + return func_m.group('args').split(','), code - return self.build_function(argnames, func_m.group('code')) + def extract_function(self, funcname): + return self.extract_function_from_code(*self.extract_function_code(funcname)) + + def extract_function_from_code(self, argnames, code, *global_stack): + local_vars = {} + while True: + mobj = re.search(r'function\((?P[^)]*)\)\s*{', code) + if mobj is None: + break + start, body_start = mobj.span() + body, remaining = self._seperate_at_paren(code[body_start - 1:], '}') + name = self._named_object( + local_vars, + self.extract_function_from_code( + [str.strip(x) for x in mobj.group('args').split(',')], + body, local_vars, *global_stack)) + code = code[:start] + name + remaining + return self.build_function(argnames, code, local_vars, *global_stack) def call_function(self, funcname, *args): - f = self.extract_function(funcname) - return f(args) - - def build_function(self, argnames, code): - def resf(args): - local_vars = dict(zip(argnames, args)) - for stmt in code.split(';'): - res, abort = self.interpret_statement(stmt, local_vars) - if abort: + return self.extract_function(funcname)(args) + + def build_function(self, argnames, code, *global_stack): + global_stack = list(global_stack) or [{}] + local_vars = global_stack.pop(0) + + def resf(args, **kwargs): + local_vars.update({ + **dict(zip(argnames, args)), + **kwargs + }) + var_stack = LocalNameSpace(local_vars, *global_stack) + for stmt in self._seperate(code.replace('\n', ''), ';'): + ret, should_abort = self.interpret_statement(stmt, var_stack) + if should_abort: break - return res + return ret return resf -- cgit v1.2.3 From 92592bd30588ae3797d7085a58c6189b774e3ae5 Mon Sep 17 00:00:00 2001 From: Marcel Date: Sun, 31 Oct 2021 05:49:03 +0100 Subject: [ceskatelevize] Fix extractor (#1489) Authored by: flashdagger --- yt_dlp/extractor/ceskatelevize.py | 122 ++++++++++++++++---------------------- yt_dlp/extractor/extractors.py | 5 +- 2 files changed, 51 insertions(+), 76 deletions(-) diff --git a/yt_dlp/extractor/ceskatelevize.py b/yt_dlp/extractor/ceskatelevize.py index 5e04d38a2..f766dfbb7 100644 --- a/yt_dlp/extractor/ceskatelevize.py +++ b/yt_dlp/extractor/ceskatelevize.py @@ -20,22 +20,8 @@ from ..utils import ( class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady)/(?:[^/?#&]+/)*(?P[^/#?]+)' _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', - 'info_dict': { - 'id': '61924494877246241', - 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Život v Grónsku', - 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3350, - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { 'id': '61924494877028507', @@ -66,12 +52,58 @@ class CeskaTelevizeIE(InfoExtractor): }, { 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', 'only_matching': True, + }, { + # video with 18+ caution trailer + 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', + 'info_dict': { + 'id': '215562210900007-bogotart', + 'title': 'Queer: Bogotart', + 'description': 'Hlavní město Kolumbie v doprovodu queer umělců. Vroucí svět plný vášně, sebevědomí, ale i násilí a bolesti. Připravil Peter Serge Butko', + }, + 'playlist': [{ + 'info_dict': { + 'id': '61924494877311053', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Varování 18+)', + 'duration': 11.9, + }, + }, { + 'info_dict': { + 'id': '61924494877068022', + 'ext': 'mp4', + 'title': 'Queer: Bogotart (Queer)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1558.3, + }, + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', + 'only_matching': True, }] def _real_extract(self, url): playlist_id = self._match_id(url) - + parsed_url = compat_urllib_parse_urlparse(url) webpage = self._download_webpage(url, playlist_id) + site_name = self._og_search_property('site_name', webpage, fatal=False, default=None) + playlist_title = self._og_search_title(webpage, default=None) + if site_name and playlist_title: + playlist_title = playlist_title.replace(f' — {site_name}', '', 1) + playlist_description = self._og_search_description(webpage, default=None) + if playlist_description: + playlist_description = playlist_description.replace('\xa0', ' ') + + if parsed_url.path.startswith('/porady/'): + refer_url = update_url_query(unescapeHTML(self._search_regex( + (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', + r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), + webpage, 'iframe player url', group='url')), query={'autoStart': 'true'}) + webpage = self._download_webpage(refer_url, playlist_id) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s

' % NOT_AVAILABLE_STRING in webpage: @@ -100,7 +132,7 @@ class CeskaTelevizeIE(InfoExtractor): data = { 'playlist[0][type]': type_, 'playlist[0][id]': episode_id, - 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestUrl': parsed_url.path, 'requestSource': 'iVysilani', } @@ -108,7 +140,7 @@ class CeskaTelevizeIE(InfoExtractor): for user_agent in (None, USER_AGENTS['Safari']): req = sanitized_Request( - 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') @@ -130,9 +162,6 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - playlist = self._download_json(req, playlist_id, fatal=False) if not playlist: continue @@ -237,54 +266,3 @@ class CeskaTelevizeIE(InfoExtractor): yield line return '\r\n'.join(_fix_subtitle(subtitles)) - - -class CeskaTelevizePoradyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P[^/#?]+)' - _TESTS = [{ - # video with 18+ caution trailer - 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', - 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Alternativní průvodce současným queer světem', - }, - 'playlist': [{ - 'info_dict': { - 'id': '61924494876844842', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varování 18+)', - 'duration': 10.2, - }, - }, { - 'info_dict': { - 'id': '61924494877068022', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 1558.3, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # iframe embed - 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - data_url = update_url_query(unescapeHTML(self._search_regex( - (r']*\bdata-url=(["\'])(?P(?:(?!\1).)+)\1', - r']+\bsrc=(["\'])(?P(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={ - 'autoStart': 'true', - }) - - return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 9d963ee46..78952d268 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -235,10 +235,7 @@ from .ccc import ( from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE -from .ceskatelevize import ( - CeskaTelevizeIE, - CeskaTelevizePoradyIE, -) +from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE -- cgit v1.2.3 From 8dcf65c92ec899a34cf57a02809520698f1d7b66 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:08:04 +0000 Subject: [Instagram] Add login to playlist (#1488) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 108 ++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 52 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 8c935c251..6ed20d9c6 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,3 +1,4 @@ +# coding: utf-8 from __future__ import unicode_literals import itertools @@ -25,9 +26,55 @@ from ..utils import ( ) -class InstagramIE(InfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' +class InstagramBaseIE(InfoExtractor): _NETRC_MACHINE = 'instagram' + _IS_LOGGED_IN = False + + def _login(self): + username, password = self._get_login_info() + if username is None or self._IS_LOGGED_IN: + return + + login_webpage = self._download_webpage( + 'https://www.instagram.com/accounts/login/', None, + note='Downloading login webpage', errnote='Failed to download login webpage') + + shared_data = self._parse_json( + self._search_regex( + r'window\._sharedData\s*=\s*({.+?});', + login_webpage, 'shared data', default='{}'), + None) + + login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ + 'Accept': '*/*', + 'X-IG-App-ID': '936619743392459', + 'X-ASBD-ID': '198387', + 'X-IG-WWW-Claim': '0', + 'X-Requested-With': 'XMLHttpRequest', + 'X-CSRFToken': shared_data['config']['csrf_token'], + 'X-Instagram-AJAX': shared_data['rollout_hash'], + 'Referer': 'https://www.instagram.com/', + }, data=urlencode_postdata({ + 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', + 'username': username, + 'queryParams': '{}', + 'optIntoOneTap': 'false', + 'stopDeletionNonce': '', + 'trustedDeviceRecords': '{}', + })) + + if not login.get('authenticated'): + if login.get('message'): + raise ExtractorError(f'Unable to login: {login["message"]}') + raise ExtractorError('Unable to login') + InstagramBaseIE._IS_LOGGED_IN = True + + def _real_initialize(self): + self._login() + + +class InstagramIE(InstagramBaseIE): + _VALID_URL = r'(?Phttps?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc', 'md5': '0d2da106a9d2631273e192b372806516', @@ -143,47 +190,6 @@ class InstagramIE(InfoExtractor): if mobj: return mobj.group('link') - def _login(self): - username, password = self._get_login_info() - if username is None: - return - - login_webpage = self._download_webpage( - 'https://www.instagram.com/accounts/login/', None, - note='Downloading login webpage', errnote='Failed to download login webpage') - - shared_data = self._parse_json( - self._search_regex( - r'window\._sharedData\s*=\s*({.+?});', - login_webpage, 'shared data', default='{}'), - None) - - login = self._download_json('https://www.instagram.com/accounts/login/ajax/', None, note='Logging in', headers={ - 'Accept': '*/*', - 'X-IG-App-ID': '936619743392459', - 'X-ASBD-ID': '198387', - 'X-IG-WWW-Claim': '0', - 'X-Requested-With': 'XMLHttpRequest', - 'X-CSRFToken': shared_data['config']['csrf_token'], - 'X-Instagram-AJAX': shared_data['rollout_hash'], - 'Referer': 'https://www.instagram.com/', - }, data=urlencode_postdata({ - 'enc_password': f'#PWD_INSTAGRAM_BROWSER:0:{int(time.time())}:{password}', - 'username': username, - 'queryParams': '{}', - 'optIntoOneTap': 'false', - 'stopDeletionNonce': '', - 'trustedDeviceRecords': '{}', - })) - - if not login.get('authenticated'): - if login.get('message'): - raise ExtractorError(f'Unable to login: {login["message"]}') - raise ExtractorError('Unable to login') - - def _real_initialize(self): - self._login() - def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') @@ -333,9 +339,7 @@ class InstagramIE(InfoExtractor): } -class InstagramPlaylistIE(InfoExtractor): - # A superclass for handling any kind of query based on GraphQL which - # results in a playlist. +class InstagramPlaylistBaseIE(InstagramBaseIE): _gis_tmpl = None # used to cache GIS request type @@ -462,11 +466,11 @@ class InstagramPlaylistIE(InfoExtractor): self._extract_graphql(data, url), user_or_tag, user_or_tag) -class InstagramUserIE(InstagramPlaylistIE): +class InstagramUserIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/(?P[^/]{2,})/?(?:$|[?#])' IE_DESC = 'Instagram user profile' IE_NAME = 'instagram:user' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/porsche', 'info_dict': { 'id': 'porsche', @@ -478,7 +482,7 @@ class InstagramUserIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 5, } - } + }] _QUERY_HASH = '42323d64886122307be10013ad2dcc44', @@ -496,11 +500,11 @@ class InstagramUserIE(InstagramPlaylistIE): } -class InstagramTagIE(InstagramPlaylistIE): +class InstagramTagIE(InstagramPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?instagram\.com/explore/tags/(?P[^/]+)' IE_DESC = 'Instagram hashtag search' IE_NAME = 'instagram:tag' - _TEST = { + _TESTS = [{ 'url': 'https://instagram.com/explore/tags/lolcats', 'info_dict': { 'id': 'lolcats', @@ -512,7 +516,7 @@ class InstagramTagIE(InstagramPlaylistIE): 'skip_download': True, 'playlistend': 50, } - } + }] _QUERY_HASH = 'f92f56d47dc7a55b606908374b43a314', -- cgit v1.2.3 From 2f9e021299a451b576ce67c43135393157531991 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sun, 31 Oct 2021 10:39:26 +0530 Subject: [PlanetMarathi] Add extractor (#1484) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/planetmarathi.py | 76 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 yt_dlp/extractor/planetmarathi.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 78952d268..5fc18f7a0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1072,6 +1072,7 @@ from .pinterest import ( PinterestCollectionIE, ) from .pladform import PladformIE +from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziIE, PlatziCourseIE, diff --git a/yt_dlp/extractor/planetmarathi.py b/yt_dlp/extractor/planetmarathi.py new file mode 100644 index 000000000..d1d9911f7 --- /dev/null +++ b/yt_dlp/extractor/planetmarathi.py @@ -0,0 +1,76 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + try_get, + unified_strdate, +) + + +class PlanetMarathiIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?planetmarathi\.com/titles/(?P[^/#&?$]+)' + _TESTS = [{ + 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', + 'playlist_mincount': 2, + 'info_dict': { + 'id': 'ek-unad-divas', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', + 'ext': 'mp4', + 'title': 'ek unad divas', + 'alt_title': 'चित्रपट', + 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', + 'season_number': None, + 'episode_number': 1, + 'duration': 5539, + 'upload_date': '20210829', + }, + }] # Trailer skipped + }, { + 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', + 'playlist_mincount': 10, + 'info_dict': { + 'id': 'baap-beep-baap-season-1', + }, + 'playlist': [{ + 'info_dict': { + 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', + 'ext': 'mp4', + 'title': 'Manohar Kanhere', + 'alt_title': 'मनोहर कान्हेरे', + 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', + 'season_number': 1, + 'episode_number': 1, + 'duration': 29, + 'upload_date': '20210829', + }, + }] # Trailers, Episodes, other Character profiles skipped + }] + + def _real_extract(self, url): + id = self._match_id(url) + entries = [] + json_data = self._download_json(f'https://www.planetmarathi.com/api/v1/titles/{id}/assets', id)['assets'] + for asset in json_data: + asset_title = asset['mediaAssetName']['en'] + if asset_title == 'Movie': + asset_title = id.replace('-', ' ') + asset_id = f'{asset["sk"]}_{id}'.replace('#', '-') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) + self._sort_formats(formats) + entries.append({ + 'id': asset_id, + 'title': asset_title, + 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), + 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), + 'season_number': asset.get('mediaAssetSeason'), + 'episode_number': asset.get('mediaAssetIndexForAssetType'), + 'duration': asset.get('mediaAssetDurationInSeconds'), + 'upload_date': unified_strdate(asset.get('created')), + 'formats': formats, + 'subtitles': subtitles, + }) + return self.playlist_result(entries, playlist_id=id) -- cgit v1.2.3 From b2f25dc242616bd9eae6d5dbbe7ff56280e7d396 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 05:10:42 +0000 Subject: [Olympics] Fix extractor (#1483) Authored by: u-spec-png --- yt_dlp/extractor/olympics.py | 73 ++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/yt_dlp/extractor/olympics.py b/yt_dlp/extractor/olympics.py index 0bc9206ed..bca1f1928 100644 --- a/yt_dlp/extractor/olympics.py +++ b/yt_dlp/extractor/olympics.py @@ -2,22 +2,27 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import unified_strdate +from ..utils import ( + int_or_none, + try_get +) class OlympicsReplayIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?olympics\.com/tokyo-2020/(?:[a-z]{2}/)?replay/(?P[^/#&?]+)' + _VALID_URL = r'https?://(?:www\.)?olympics\.com(?:/tokyo-2020)?/[a-z]{2}/(?:replay|video)/(?P[^/#&?]+)' _TESTS = [{ - 'url': 'https://olympics.com/tokyo-2020/en/replay/300622eb-abc0-43ea-b03b-c5f2d429ec7b/jumping-team-qualifier', + 'url': 'https://olympics.com/fr/video/men-s-109kg-group-a-weightlifting-tokyo-2020-replays', 'info_dict': { - 'id': '300622eb-abc0-43ea-b03b-c5f2d429ec7b', + 'id': 'f6a0753c-8e6f-4b7d-a435-027054a4f8e9', 'ext': 'mp4', - 'title': 'Jumping Team Qualifier', - 'release_date': '20210806', - 'upload_date': '20210713', + 'title': '+109kg (H) Groupe A - Haltérophilie | Replay de Tokyo 2020', + 'upload_date': '20210801', + 'timestamp': 1627783200, + 'description': 'md5:c66af4a5bc7429dbcc43d15845ff03b3', }, 'params': { - 'format': 'bv', + 'format': 'bestvideo', + 'skip_download': True, }, }, { 'url': 'https://olympics.com/tokyo-2020/en/replay/bd242924-4b22-49a5-a846-f1d4c809250d/mens-bronze-medal-match-hun-esp', @@ -26,31 +31,41 @@ class OlympicsReplayIE(InfoExtractor): def _real_extract(self, url): id = self._match_id(url) - # The parameters are hardcoded in the webpage, it's not necessary to download the webpage just for these parameters. - # If in downloading webpage serves other functions aswell, then extract these parameters from it. - token_url = 'https://appovptok.ovpobs.tv/api/identity/app/token?api_key=OTk5NDcxOjpvY3N3LWFwaXVzZXI%3D&api_secret=ODY4ODM2MjE3ODMwYmVjNTAxMWZlMDJiMTYxZmY0MjFiMjMwMjllMjJmNDA1YWRiYzA5ODcxYTZjZTljZDkxOTo6NTM2NWIzNjRlMTM1ZmI2YWNjNmYzMGMzOGM3NzZhZTY%3D' - token = self._download_webpage(token_url, id) - headers = {'x-obs-app-token': token} - data_json = self._download_json(f'https://appocswtok.ovpobs.tv/api/schedule-sessions/{id}?include=stream', - id, headers=headers) - meta_data = data_json['data']['attributes'] - for t_dict in data_json['included']: - if t_dict.get('type') == 'Stream': - stream_data = t_dict['attributes'] + + webpage = self._download_webpage(url, id) + title = self._html_search_meta(('title', 'og:title', 'twitter:title'), webpage) + uuid = self._html_search_meta('episode_uid', webpage) + m3u8_url = self._html_search_meta('video_url', webpage) + json_ld = self._search_json_ld(webpage, uuid) + thumbnails_list = json_ld.get('image') + if not thumbnails_list: + thumbnails_list = self._html_search_regex( + r'["\']image["\']:\s*["\']([^"\']+)["\']', webpage, 'images', default='') + thumbnails_list = thumbnails_list.replace('[', '').replace(']', '').split(',') + thumbnails_list = [thumbnail.strip() for thumbnail in thumbnails_list] + thumbnails = [] + for thumbnail in thumbnails_list: + width_a, height_a, width = self._search_regex( + r'/images/image/private/t_(?P\d+)-(?P\d+)_(?P\d+)/primary/[\W\w\d]+', + thumbnail, 'thumb', group=(1, 2, 3), default=(None, None, None)) + width_a, height_a, width = int_or_none(width_a), int_or_none(height_a), int_or_none(width) + thumbnails.append({ + 'url': thumbnail, + 'width': width, + 'height': int_or_none(try_get(width, lambda x: x * height_a / width_a)) + }) m3u8_url = self._download_json( - 'https://meteringtok.ovpobs.tv/api/playback-sessions', id, headers=headers, query={ - 'alias': stream_data['alias'], - 'stream': stream_data['stream'], - 'type': 'vod' - })['data']['attributes']['url'] - formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + f'https://olympics.com/tokenGenerator?url={m3u8_url}', uuid, note='Downloading m3u8 url') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, uuid, m3u8_id='hls') self._sort_formats(formats) return { - 'id': id, - 'title': meta_data['title'], - 'release_date': unified_strdate(meta_data.get('start') or meta_data.get('broadcastPublished')), - 'upload_date': unified_strdate(meta_data.get('publishedAt')), + 'id': uuid, + 'title': title, + 'timestamp': json_ld.get('timestamp'), + 'description': json_ld.get('description'), + 'thumbnails': thumbnails, + 'duration': json_ld.get('duration'), 'formats': formats, 'subtitles': subtitles, } -- cgit v1.2.3 From 5b6cb5620797e745a113cfb8118ea7def1484784 Mon Sep 17 00:00:00 2001 From: kaz-us <32769754+kaz-us@users.noreply.github.com> Date: Sun, 31 Oct 2021 09:13:49 +0400 Subject: [vk] Add subtitles (#1480) Authored by: kaz-us --- yt_dlp/extractor/vk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index d8a9b9ab4..a8a980de6 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -471,6 +471,13 @@ class VKIE(VKBaseIE): }) self._sort_formats(formats) + subtitles = {} + for sub in data.get('subs') or {}: + subtitles.setdefault(sub.get('lang', 'en'), []).append({ + 'ext': sub.get('title', '.srt').split('.')[-1], + 'url': url_or_none(sub.get('url')), + }) + return { 'id': video_id, 'formats': formats, @@ -484,6 +491,7 @@ class VKIE(VKBaseIE): 'like_count': int_or_none(mv_data.get('likes')), 'comment_count': int_or_none(mv_data.get('commcount')), 'is_live': is_live, + 'subtitles': subtitles, } -- cgit v1.2.3 From da4832007574a60b397dff11f26cc20cace685de Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:08:03 +0530 Subject: [linkedin] Don't login multiple times --- yt_dlp/extractor/linkedin.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 3ce906e2f..c2d347efd 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -19,6 +19,7 @@ from ..utils import ( class LinkedInLearningBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' + _logged_in = False def _call_api(self, course_slug, fields, video_slug=None, resolution=None): query = { @@ -34,6 +35,8 @@ class LinkedInLearningBaseIE(InfoExtractor): }) sub = ' %dp' % resolution api_url = 'https://www.linkedin.com/learning-api/detailedCourses' + if not self._get_cookies(api_url).get('JSESSIONID'): + self.raise_login_required() return self._download_json( api_url, video_slug, 'Downloading%s JSON metadata' % sub, headers={ 'Csrf-Token': self._get_cookies(api_url)['JSESSIONID'].value, @@ -50,6 +53,8 @@ class LinkedInLearningBaseIE(InfoExtractor): return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) def _real_initialize(self): + if self._logged_in: + return email, password = self._get_login_info() if email is None: return @@ -72,6 +77,7 @@ class LinkedInLearningBaseIE(InfoExtractor): login_submit_page, 'error', default=None) if error: raise ExtractorError(error, expected=True) + LinkedInLearningBaseIE._logged_in = True class LinkedInLearningIE(LinkedInLearningBaseIE): -- cgit v1.2.3 From a0bb6ce58db5b3124962037ca12e78cbd348f56c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:26:44 +0530 Subject: [youtube] refactor itag processing --- yt_dlp/extractor/youtube.py | 56 +++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56cd2ed8d..64475edec 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2434,7 +2434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return prs, player_url def _extract_formats(self, streaming_data, video_id, player_url, is_live): - itags, stream_ids = [], [] + itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {} q = qualities([ # Normally tiny is the smallest video-only formats. But @@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): throttled = True if itag: - itags.append(itag) + itags[itag] = 'https' stream_ids.append(stream_id) tbr = float_or_none( @@ -2548,46 +2548,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor): and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)) get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) - def guess_quality(f): - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)): - if val in qdict: - return q(qdict[val]) - return -1 + def process_manifest_format(f, proto, itag): + if itag in itags: + if itags[itag] == proto or f'{itag}-{proto}' in itags: + return False + itag = f'{itag}-{proto}' + if itag: + f['format_id'] = itag + itags[itag] = proto + + f['quality'] = next(( + q(qdict[val]) + for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + if val in qdict), -1) + return True for sd in streaming_data: hls_manifest_url = get_hls and sd.get('hlsManifestUrl') if hls_manifest_url: for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): - itag = self._search_regex( - r'/itag/(\d+)', f['url'], 'itag', default=None) - if itag in itags: - itag += '-hls' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - yield f + if process_manifest_format(f, 'hls', self._search_regex( + r'/itag/(\d+)', f['url'], 'itag', default=None)): + yield f dash_manifest_url = get_dash and sd.get('dashManifestUrl') if dash_manifest_url: for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): - itag = f['format_id'] - if itag in itags: - itag += '-dash' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - filesize = int_or_none(self._search_regex( - r'/clen/(\d+)', f.get('fragment_base_url') - or f['url'], 'file size', default=None)) - if filesize: - f['filesize'] = filesize - yield f + if process_manifest_format(f, 'dash', f['format_id']): + f['filesize'] = int_or_none(self._search_regex( + r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) + yield f def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) -- cgit v1.2.3 From 0930b11fdaff2141ad951a8ed6d90417bfde7059 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 14:45:59 +0530 Subject: [docs,cleanup] Improve docs and minor cleanup Closes #1387, #1404, #1408, #1485, #1415, #1450, #1492 --- .github/workflows/build.yml | 6 +-- CONTRIBUTING.md | 2 +- README.md | 116 ++++++++++++++++++++++++++---------------- yt_dlp/YoutubeDL.py | 4 +- yt_dlp/__init__.py | 1 + yt_dlp/cookies.py | 4 +- yt_dlp/extractor/common.py | 4 +- yt_dlp/extractor/telemundo.py | 2 +- yt_dlp/extractor/tiktok.py | 8 +-- yt_dlp/options.py | 12 ++--- 10 files changed, 93 insertions(+), 66 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3329c141f..0fff6cae3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -115,12 +115,12 @@ jobs: release_name: yt-dlp ${{ steps.bump_version.outputs.ytdlp_version }} commitish: ${{ steps.push_update.outputs.head_sha }} body: | - ### Changelog: - ${{ env.changelog }} + #### [A description of the various files]((https://github.com/yt-dlp/yt-dlp#release-files)) are in the README --- - ### See [this](https://github.com/yt-dlp/yt-dlp#release-files) for a description of the release files + ### Changelog: + ${{ env.changelog }} draft: false prerelease: false - name: Upload yt-dlp Unix binary diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index fb539ec0d..249000490 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -148,7 +148,7 @@ If you want to create a build of yt-dlp yourself, you can follow the instruction Before you start writing code for implementing a new feature, open an issue explaining your feature request and atleast one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. -The same applies for overarching changes to the architecture, documentation or code style +The same applies for changes to the documentation, code style, or overarching changes to the architecture ## Adding support for a new site diff --git a/README.md b/README.md index e2fbbf2ae..31bfca6a8 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,6 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [MORE](#more) - # NEW FEATURES @@ -123,7 +122,7 @@ If you are coming from [youtube-dl](https://github.com/ytdl-org/youtube-dl), the ### Differences in default behavior -Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc. +Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as as an alternative to `ffmpeg` @@ -143,7 +142,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead * Some private fields such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this -* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this. +* When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the seperate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options @@ -152,17 +151,14 @@ For ease of use, a few more compat options are available: # INSTALLATION -yt-dlp is not platform specific. So it should work on your Unix box, on Windows or on macOS You can install yt-dlp using one of the following methods: -* Download [the binary](#release-files) from the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) -* With Homebrew, `brew install yt-dlp/taps/yt-dlp` -* Use [PyPI package](https://pypi.org/project/yt-dlp): `python3 -m pip install --upgrade yt-dlp` -* Install master branch: `python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip` -Note that on some systems, you may need to use `py` or `python` instead of `python3` +#### Using the release binary + +You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]** -UNIX users (Linux, macOS, BSD) can also install the [latest release](https://github.com/yt-dlp/yt-dlp/releases/latest) one of the following ways: +In UNIX-like OSes (MacOS, Linux, BSD), you can also install the same in one of the following ways: ``` sudo curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp @@ -179,16 +175,41 @@ sudo aria2c https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o sudo chmod a+rx /usr/local/bin/yt-dlp ``` -macOS or Linux users that are using Homebrew (formerly known as Linuxbrew for Linux users) can also install it by: +PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) + +#### With [PIP](https://pypi.org/project/pip) + +You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: +``` +python3 -m pip install -U yt-dlp +``` + +On some systems (like Termux), it is not possible to install pycryptodomex. In that case, install without dependancies: +``` +python3 -m pip install --no-deps -U yt-dlp +``` + +You can also install the master branch with: +``` +python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip +``` + +Note that on some systems, you may need to use `py` or `python` instead of `python3` + +#### With [Homebrew](https://brew.sh) + +macOS or Linux users that are using Homebrew can also install it by: ``` brew install yt-dlp/taps/yt-dlp ``` ### UPDATE -You can use `yt-dlp -U` to update if you are using the provided release. -If you are using `pip`, simply re-run the same command that was used to install the program. -If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` +You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) + +If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program + +If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` ### RELEASE FILES @@ -196,18 +217,18 @@ If you have installed using Homebrew, run `brew upgrade yt-dlp/taps/yt-dlp` File|Description :---|:--- -[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform independant binary. Needs Python (Recommended for **UNIX-like systems**) -[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (Recommended for **Windows**) +[yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independant binary. Needs Python (recommended for **UNIX-like systems**) +[yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) #### Alternatives File|Description :---|:--- [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|MacOS (10.15+) standalone executable -[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32bit) binary +[yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Vista SP2+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`.
Does not contain `pycryptodomex`, needs VC++14 -[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged windows executable (No auto-update) -[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (No auto-update) +[yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) +[yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) #### Misc @@ -227,20 +248,20 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) -* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) -* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) -* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) -* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licenced under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) -* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licenced under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) -* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu) -* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) -* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licenced under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) -* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) +* [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) +* [**pycryptodomex**](https://github.com/Legrandin/pycryptodome) - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) +* [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licensed under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) +* [**keyring**](https://github.com/jaraco/keyring) - For decrypting cookies of chromium-based browsers on Linux. Licensed under [MIT](https://github.com/jaraco/keyring/blob/main/LICENSE) +* [**AtomicParsley**](https://github.com/wez/atomicparsley) - For embedding thumbnail in mp4/m4a if mutagen is not present. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) +* [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) +* [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) +* [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * Any external downloader that you want to use with `--downloader` To use or redistribute the dependencies, you must agree to their respective licensing terms. -The windows releases are already built with the python interpreter, mutagen, pycryptodomex and websockets included. +The Windows and MacOS standalone release binaries are already built with the python interpreter, mutagen, pycryptodomex and websockets included. **Note**: There are some regressions in newer ffmpeg versions that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependancy, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds/wiki/Latest#latest-autobuilds) with patches for these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specifc issues solved by these builds @@ -276,7 +297,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t sure that you have sufficient permissions (run with sudo if needed) -i, --ignore-errors Ignore download and postprocessing errors. - The download will be considered successfull + The download will be considered successful even if the postprocessing fails --no-abort-on-error Continue with next video on download errors; e.g. to skip unavailable videos in @@ -366,7 +387,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t SIZE (e.g. 50k or 44.6m) --max-filesize SIZE Do not download any videos larger than SIZE (e.g. 50k or 44.6m) - --date DATE Download only videos uploaded in this date. + --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format "(now|today)[+-][0-9](day|week|month|year)(s)?" --datebefore DATE Download only videos uploaded on or before @@ -510,9 +531,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t filenames --no-restrict-filenames Allow Unicode characters, "&" and spaces in filenames (default) - --windows-filenames Force filenames to be windows compatible - --no-windows-filenames Make filenames windows compatible only if - using windows (default) + --windows-filenames Force filenames to be Windows-compatible + --no-windows-filenames Make filenames Windows-compatible only if + using Windows (default) --trim-filenames LENGTH Limit the filename length (excluding extension) to the specified number of characters @@ -608,9 +629,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t anything to disk --no-simulate Download the video even if printing/listing options are used - --ignore-no-formats-error Ignore "No video formats" error. Usefull - for extracting metadata even if the videos - are not actually available for download + --ignore-no-formats-error Ignore "No video formats" error. Useful for + extracting metadata even if the videos are + not actually available for download (experimental) --no-ignore-no-formats-error Throw error when no downloadable video formats are found (default) @@ -644,7 +665,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t "postprocess:", or "postprocess-title:". The video's fields are accessible under the "info" key and the progress attributes are - accessible under "progress" key. Eg: + accessible under "progress" key. E.g.: --console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s" -v, --verbose Print various debugging information @@ -657,7 +678,7 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) - --no-check-certificate Suppress HTTPS certificate validation + --no-check-certificates Suppress HTTPS certificate validation --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) @@ -706,10 +727,12 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) - --check-formats Check that the formats selected are + --check-formats Check that the selected formats are actually downloadable - --no-check-formats Do not check that the formats selected are + --check-all-formats Check all formats for whether they are actually downloadable + --no-check-formats Do not check that the formats are actually + downloadable -F, --list-formats List available formats of each video. Simulate unless --no-simulate is used --merge-output-format FORMAT If a merge is required (e.g. @@ -1018,7 +1041,7 @@ The `-o` option is used to indicate a template for the output file names while ` The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). -It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. +It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields @@ -1159,7 +1182,7 @@ Each aforementioned sequence when referenced in an output template will be repla Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). -**Tip**: Look at the `-j` output to identify which fields are available for the purticular URL +**Tip**: Look at the `-j` output to identify which fields are available for the particular URL For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. @@ -1303,7 +1326,7 @@ The available fields are: - `vext`: Video Extension (`mp4` > `webm` > `flv` > other > unknown). If `--prefer-free-formats` is used, `webm` is prefered. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other > unknown). If `--prefer-free-formats` is used, the order changes to `opus` > `ogg` > `webm` > `m4a` > `mp3` > `aac`. - `ext`: Equivalent to `vext,aext` - - `filesize`: Exact filesize, if know in advance. This will be unavailable for mu38 and DASH formats. + - `filesize`: Exact filesize, if known in advance - `fs_approx`: Approximate filesize calculated from the manifests - `size`: Exact filesize if available, otherwise approximate filesize - `height`: Height of video @@ -1506,6 +1529,9 @@ $ yt-dlp --parse-metadata '%(series)s S%(season_number)02dE%(episode_number)02d: # Set "comment" field in video metadata using description instead of webpage_url $ yt-dlp --parse-metadata 'description:(?s)(?P.+)' --add-metadata +# Remove "formats" field from the infojson by setting it to an empty string +$ yt-dlp --parse-metadata ':(?P)' -j + # Replace all spaces and "_" in title and uploader with a `-` $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' @@ -1513,7 +1539,7 @@ $ yt-dlp --replace-in-metadata 'title,uploader' '[ _]' '-' # EXTRACTOR ARGUMENTS -Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) seperated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` +Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. Eg: `--extractor-args "youtube:player_client=android_agegate,web;include_live_dash" --extractor-args "funimation:version=uncut"` The following extractors use this feature: diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2c2b17b20..4a9f4775b 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -214,8 +214,8 @@ class YoutubeDL(object): ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually available for download (experimental) - format_sort: How to sort the video formats. see "Sorting Formats" - for more details. + format_sort: A list of fields by which to sort the video formats. + See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. allow_multiple_video_streams: Allow multiple video streams to be merged diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 5c3d33df0..84628bf45 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -225,6 +225,7 @@ def _real_main(argv=None): if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart: raise ValueError('Playlist end must be greater than playlist start') if opts.extractaudio: + opts.audioformat = opts.audioformat.lower() if opts.audioformat not in ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS): parser.error('invalid audio format specified') if opts.audioquality: diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index c9ae9b6db..ec68a809d 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -117,7 +117,7 @@ def _extract_firefox_cookies(profile, logger): raise FileNotFoundError('could not find firefox cookies database in {}'.format(search_root)) logger.debug('Extracting cookies from: "{}"'.format(cookie_database_path)) - with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) @@ -236,7 +236,7 @@ def _extract_chrome_cookies(browser_name, profile, logger): decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger) - with tempfile.TemporaryDirectory(prefix='youtube_dl') as tmpdir: + with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index aa98c0cc9..2bbe23699 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -441,11 +441,11 @@ class InfoExtractor(object): _WORKING = True _LOGIN_HINTS = { - 'any': 'Use --cookies, --username and --password or --netrc to provide account credentials', + 'any': 'Use --cookies, --username and --password, or --netrc to provide account credentials', 'cookies': ( 'Use --cookies-from-browser or --cookies for the authentication. ' 'See https://github.com/ytdl-org/youtube-dl#how-do-i-pass-cookies-to-youtube-dl for how to manually pass cookies'), - 'password': 'Use --username and --password or --netrc to provide account credentials', + 'password': 'Use --username and --password, or --netrc to provide account credentials', } def __init__(self, downloader=None): diff --git a/yt_dlp/extractor/telemundo.py b/yt_dlp/extractor/telemundo.py index 18552a0ef..e326bbdd5 100644 --- a/yt_dlp/extractor/telemundo.py +++ b/yt_dlp/extractor/telemundo.py @@ -1,4 +1,4 @@ -# coding=utf-8 +# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 1db6327e2..859951637 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -38,8 +38,8 @@ class TikTokBaseIE(InfoExtractor): 'build_number': self._APP_VERSION, 'manifest_version_code': self._MANIFEST_APP_VERSION, 'update_version_code': self._MANIFEST_APP_VERSION, - 'openudid': ''.join(random.choice('0123456789abcdef') for i in range(16)), - 'uuid': ''.join([random.choice(string.digits) for num in range(16)]), + 'openudid': ''.join(random.choice('0123456789abcdef') for _ in range(16)), + 'uuid': ''.join([random.choice(string.digits) for _ in range(16)]), '_rticket': int(time.time() * 1000), 'ts': int(time.time()), 'device_brand': 'Google', @@ -66,7 +66,7 @@ class TikTokBaseIE(InfoExtractor): 'as': 'a1qwert123', 'cp': 'cbfhckdckkde1', } - self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for i in range(160))) + self._set_cookie(self._API_HOSTNAME, 'odin_tt', ''.join(random.choice('0123456789abcdef') for _ in range(160))) return self._download_json( 'https://%s/aweme/v1/%s/' % (self._API_HOSTNAME, ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, headers={ @@ -416,7 +416,7 @@ class TikTokUserIE(TikTokBaseIE): 'max_cursor': 0, 'min_cursor': 0, 'retry_type': 'no_retry', - 'device_id': ''.join(random.choice(string.digits) for i in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. + 'device_id': ''.join(random.choice(string.digits) for _ in range(19)), # Some endpoints don't like randomized device_id, so it isn't directly set in _call_api. } max_retries = self.get_param('extractor_retries', 3) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 5499ab13e..a3a6c74b3 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -209,7 +209,7 @@ def parseOpts(overrideArguments=None): general.add_option( '-i', '--ignore-errors', action='store_true', dest='ignoreerrors', - help='Ignore download and postprocessing errors. The download will be considered successfull even if the postprocessing fails') + help='Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails') general.add_option( '--no-abort-on-error', action='store_const', dest='ignoreerrors', const='only_download', @@ -383,7 +383,7 @@ def parseOpts(overrideArguments=None): '--date', metavar='DATE', dest='date', default=None, help=( - 'Download only videos uploaded in this date. ' + 'Download only videos uploaded on this date. ' 'The date can be "YYYYMMDD" or in the format ' '"(now|today)[+-][0-9](day|week|month|year)(s)?"')) selection.add_option( @@ -840,7 +840,7 @@ def parseOpts(overrideArguments=None): '--ignore-no-formats-error', action='store_true', dest='ignore_no_formats_error', default=False, help=( - 'Ignore "No video formats" error. Usefull for extracting metadata ' + 'Ignore "No video formats" error. Useful for extracting metadata ' 'even if the videos are not actually available for download (experimental)')) verbosity.add_option( '--no-ignore-no-formats-error', @@ -935,7 +935,7 @@ def parseOpts(overrideArguments=None): 'Template for progress outputs, optionally prefixed with one of "download:" (default), ' '"download-title:" (the console title), "postprocess:", or "postprocess-title:". ' 'The video\'s fields are accessible under the "info" key and ' - 'the progress attributes are accessible under "progress" key. Eg: ' + 'the progress attributes are accessible under "progress" key. E.g.: ' # TODO: Document the fields inside "progress" '--console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s"')) verbosity.add_option( @@ -1028,11 +1028,11 @@ def parseOpts(overrideArguments=None): filesystem.add_option( '--windows-filenames', action='store_true', dest='windowsfilenames', default=False, - help='Force filenames to be windows compatible') + help='Force filenames to be Windows-compatible') filesystem.add_option( '--no-windows-filenames', action='store_false', dest='windowsfilenames', - help='Make filenames windows compatible only if using windows (default)') + help='Make filenames Windows-compatible only if using Windows (default)') filesystem.add_option( '--trim-filenames', '--trim-file-names', metavar='LENGTH', dest='trim_file_name', default=0, type=int, -- cgit v1.2.3 From f0ffaa1621fc40ba033aa3c98a14aa4c93533915 Mon Sep 17 00:00:00 2001 From: kaz-us <32769754+kaz-us@users.noreply.github.com> Date: Sun, 31 Oct 2021 18:16:12 +0400 Subject: [vk] Fix login (#1495) Closes #1459 Authored by: kaz-us --- yt_dlp/extractor/vk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/vk.py b/yt_dlp/extractor/vk.py index a8a980de6..9a5c9ee6b 100644 --- a/yt_dlp/extractor/vk.py +++ b/yt_dlp/extractor/vk.py @@ -51,7 +51,7 @@ class VKBaseIE(InfoExtractor): self._apply_first_set_cookie_header(url_handle, 'remixlhk') login_page = self._download_webpage( - 'https://login.vk.com/?act=login', None, + 'https://vk.com/login', None, note='Logging in', data=urlencode_postdata(login_form)) -- cgit v1.2.3 From c588b602d34f005dc018ae004281226741414192 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sun, 31 Oct 2021 14:20:09 +0000 Subject: [Instagram] Fix incorrect resolution (#1494) Authored by: u-spec-png --- yt_dlp/extractor/instagram.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 6ed20d9c6..4eca9eb92 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -228,8 +228,8 @@ class InstagramIE(InstagramBaseIE): dict) if media: video_url = media.get('video_url') - height = try_get(media, lambda x: x['dimensions']['height']) - width = try_get(media, lambda x: x['dimensions']['width']) + height = int_or_none(self._html_search_meta(('og:video:height', 'video:height'), webpage)) or try_get(media, lambda x: x['dimensions']['height']) + width = int_or_none(self._html_search_meta(('og:video:width', 'video:width'), webpage)) or try_get(media, lambda x: x['dimensions']['width']) description = try_get( media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], compat_str) or media.get('caption') -- cgit v1.2.3 From a1fc7ca0743c8df06416e68ee74b64e07dfe7135 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:25:48 +0530 Subject: [jsinterp] Handle default in switch better --- test/test_jsinterp.py | 15 +++++++++++++++ test/test_youtube_signature.py | 6 +++++- yt_dlp/jsinterp.py | 22 +++++++++++++--------- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py index 380e52c33..e230b045f 100644 --- a/test/test_jsinterp.py +++ b/test/test_jsinterp.py @@ -132,6 +132,21 @@ class TestJSInterpreter(unittest.TestCase): self.assertEqual(jsi.call_function('x', 3), 6) self.assertEqual(jsi.call_function('x', 5), 0) + def test_switch_default(self): + jsi = JSInterpreter(''' + function x(f) { switch(f){ + case 2: f+=2; + default: f-=1; + case 5: + case 6: f+=6; + case 0: break; + case 1: f+=1; + } return f } + ''') + self.assertEqual(jsi.call_function('x', 1), 2) + self.assertEqual(jsi.call_function('x', 5), 11) + self.assertEqual(jsi.call_function('x', 9), 14) + def test_try(self): jsi = JSInterpreter(''' function x() { try{return 10} catch(e){return 5} } diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index f40a06952..60d8eabf5 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -69,7 +69,11 @@ _NSIG_TESTS = [ ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', - ), # TODO: Add more tests + ), + ( + 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index 5c79a8110..bb2a0ae0b 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -228,21 +228,25 @@ class JSInterpreter(object): switch_val, remaining = self._seperate_at_paren(expr[m.end() - 1:], ')') switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion) body, expr = self._seperate_at_paren(remaining, '}') - body, default = body.split('default:') if 'default:' in body else (body, None) - items = body.split('case ')[1:] - if default: - items.append(f'default:{default}') - matched = False - for item in items: - case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] - matched = matched or case == 'default' or switch_val == self.interpret_expression(case, local_vars, allow_recursion) - if matched: + items = body.replace('default:', 'case default:').split('case ')[1:] + for default in (False, True): + matched = False + for item in items: + case, stmt = [i.strip() for i in self._seperate(item, ':', 1)] + if default: + matched = matched or case == 'default' + elif not matched: + matched = case != 'default' and switch_val == self.interpret_expression(case, local_vars, allow_recursion) + if not matched: + continue try: ret, should_abort = self.interpret_statement(stmt, local_vars, allow_recursion - 1) if should_abort: return ret except JS_Break: break + if matched: + break return self.interpret_statement(expr, local_vars, allow_recursion - 1)[0] # Comma seperated statements -- cgit v1.2.3 From 9bd979ca40f4f7b1f3918386b8347e03820766b4 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:26:34 +0530 Subject: [utils] Parse `vp09` as vp9 --- yt_dlp/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 080bf260a..2953909fc 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4656,19 +4656,18 @@ def parse_codecs(codecs_str): str.strip, codecs_str.strip().strip(',').split(',')))) vcodec, acodec, hdr = None, None, None for full_codec in split_codecs: - codec = full_codec.split('.')[0] - if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'): + parts = full_codec.split('.') + codec = parts[0].replace('0', '') + if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', + 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'): if not vcodec: - vcodec = full_codec + vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec if codec in ('dvh1', 'dvhe'): hdr = 'DV' - elif codec == 'vp9' and vcodec.startswith('vp9.2'): + elif codec == 'av1' and len(parts) > 3 and parts[3] == '10': + hdr = 'HDR10' + elif full_codec.replace('0', '').startswith('vp9.2'): hdr = 'HDR10' - elif codec == 'av01': - parts = full_codec.split('.') - if len(parts) > 3 and parts[3] == '10': - hdr = 'HDR10' - vcodec = '.'.join(parts[:4]) elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'): if not acodec: acodec = full_codec -- cgit v1.2.3 From d89257f398fed8a44fae7d12d849114f9f4ca2be Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:27:34 +0530 Subject: [youtube] Remove unnecessary no-playlist warning --- yt_dlp/extractor/youtube.py | 83 ++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 42 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 64475edec..428414383 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2623,49 +2623,48 @@ class YoutubeIE(YoutubeBaseInfoExtractor): or search_meta(['og:title', 'twitter:title', 'title'])) video_description = get_first(video_details, 'shortDescription') - if not smuggled_data.get('force_singlefeed', False): - if not self.get_param('noplaylist'): - multifeed_metadata_list = get_first( - player_responses, - ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), - expected_type=str) - if multifeed_metadata_list: - entries = [] - feed_ids = [] - for feed in multifeed_metadata_list.split(','): - # Unquote should take place before split on comma (,) since textual - # fields may contain comma as well (see - # https://github.com/ytdl-org/youtube-dl/issues/8536) - feed_data = compat_parse_qs( - compat_urllib_parse_unquote_plus(feed)) - - def feed_entry(name): - return try_get( - feed_data, lambda x: x[name][0], compat_str) - - feed_id = feed_entry('id') - if not feed_id: - continue - feed_title = feed_entry('title') - title = video_title - if feed_title: - title += ' (%s)' % feed_title - entries.append({ - '_type': 'url_transparent', - 'ie_key': 'Youtube', - 'url': smuggle_url( - '%swatch?v=%s' % (base_url, feed_data['id'][0]), - {'force_singlefeed': True}), - 'title': title, - }) - feed_ids.append(feed_id) - self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) - return self.playlist_result( - entries, video_id, video_title, video_description) - else: + multifeed_metadata_list = get_first( + player_responses, + ('multicamera', 'playerLegacyMulticameraRenderer', 'metadataList'), + expected_type=str) + if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): + if self.get_param('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + else: + entries = [] + feed_ids = [] + for feed in multifeed_metadata_list.split(','): + # Unquote should take place before split on comma (,) since textual + # fields may contain comma as well (see + # https://github.com/ytdl-org/youtube-dl/issues/8536) + feed_data = compat_parse_qs( + compat_urllib_parse_unquote_plus(feed)) + + def feed_entry(name): + return try_get( + feed_data, lambda x: x[name][0], compat_str) + + feed_id = feed_entry('id') + if not feed_id: + continue + feed_title = feed_entry('title') + title = video_title + if feed_title: + title += ' (%s)' % feed_title + entries.append({ + '_type': 'url_transparent', + 'ie_key': 'Youtube', + 'url': smuggle_url( + '%swatch?v=%s' % (base_url, feed_data['id'][0]), + {'force_singlefeed': True}), + 'title': title, + }) + feed_ids.append(feed_id) + self.to_screen( + 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' + % (', '.join(feed_ids), video_id)) + return self.playlist_result( + entries, video_id, video_title, video_description) live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) is_live = get_first(video_details, 'isLive') -- cgit v1.2.3 From bd93fd5d45e104561bad919d4775feba869d0145 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 3 Nov 2021 16:28:45 +0530 Subject: [fragment] Fix progress display in fragmented downloads Closes #1517 --- yt_dlp/downloader/common.py | 2 ++ yt_dlp/downloader/fragment.py | 14 +++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 6cfbb6657..bcf8ac955 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -319,6 +319,8 @@ class FileDownloader(object): msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' else: msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + if s.get('fragment_index'): + msg_template += ' (frag %(fragment_index)s)' s['_default_template'] = msg_template % s self._report_progress_status(s) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index a9d1471f8..d08fd52a1 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -31,6 +31,10 @@ class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass + def report_retry(self, err, count, retries): + super().to_screen( + f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...') + class FragmentFD(FileDownloader): """ @@ -167,7 +171,7 @@ class FragmentFD(FileDownloader): self.ydl, { 'continuedl': True, - 'quiet': True, + 'quiet': self.params.get('quiet'), 'noprogress': True, 'ratelimit': self.params.get('ratelimit'), 'retries': self.params.get('retries', 0), @@ -237,6 +241,7 @@ class FragmentFD(FileDownloader): start = time.time() ctx.update({ 'started': start, + 'fragment_started': start, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation 'prev_frag_downloaded_bytes': 0, @@ -267,6 +272,9 @@ class FragmentFD(FileDownloader): ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_total_bytes) + ctx['fragment_started'] = time.time() ctx['prev_frag_downloaded_bytes'] = 0 else: frag_downloaded_bytes = s['downloaded_bytes'] @@ -275,8 +283,8 @@ class FragmentFD(FileDownloader): state['eta'] = self.calc_eta( start, time_now, estimated_size - resume_len, state['downloaded_bytes'] - resume_len) - state['speed'] = s.get('speed') or ctx.get('speed') - ctx['speed'] = state['speed'] + ctx['speed'] = state['speed'] = self.calc_speed( + ctx['fragment_started'], time_now, frag_downloaded_bytes) ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes self._hook_progress(state, info_dict) -- cgit v1.2.3 From 31c49255bf647373734c2c7f917e0d24ab81ac95 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 00:05:53 +0530 Subject: [ExtractAudio] Rescale --audio-quality correctly Authored by: CrypticSignal, pukkandan --- yt_dlp/__init__.py | 4 +++- yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 37 +++++++++++++++++++++++-------------- yt_dlp/utils.py | 2 +- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 84628bf45..0070d50a8 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -29,6 +29,8 @@ from .utils import ( error_to_compat_str, ExistingVideoReached, expand_path, + float_or_none, + int_or_none, match_filter_func, MaxDownloadsReached, parse_duration, @@ -230,7 +232,7 @@ def _real_main(argv=None): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') - if not opts.audioquality.isdigit(): + if int_or_none(float_or_none(opts.audioquality)) is None: # int_or_none prevents inf, nan parser.error('invalid audio quality specified') if opts.recodevideo is not None: opts.recodevideo = opts.recodevideo.replace(' ', '') diff --git a/yt_dlp/options.py b/yt_dlp/options.py index a3a6c74b3..bd9fdd37b 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1215,7 +1215,7 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--audio-quality', metavar='QUALITY', dest='audioquality', default='5', - help='Specify ffmpeg audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)') + help='Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)') postproc.add_option( '--remux-video', metavar='FORMAT', dest='remuxvideo', default=None, diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b7fcc569b..96f7be6ff 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -371,9 +371,29 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) self._preferredcodec = preferredcodec or 'best' - self._preferredquality = preferredquality + self._preferredquality = float_or_none(preferredquality) self._nopostoverwrites = nopostoverwrites + def _quality_args(self, codec): + if self._preferredquality is None: + return [] + elif self._preferredquality > 10: + return ['-b:a', f'{self._preferredquality}k'] + + limits = { + 'libmp3lame': (10, 0), + 'aac': (0.1, 11), + 'vorbis': (0, 10), + 'opus': None, # doesn't support -q:a + 'wav': None, + 'flac': None, + }[codec] + if not limits: + return [] + + q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10) + return ['-q:a', f'{q}'] + def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -417,23 +437,12 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # MP3 otherwise. acodec = 'libmp3lame' extension = 'mp3' - more_opts = [] - if self._preferredquality is not None: - if int(self._preferredquality) < 10: - more_opts += ['-q:a', self._preferredquality] - else: - more_opts += ['-b:a', self._preferredquality + 'k'] + more_opts = self._quality_args(acodec) else: # We convert the audio (lossy if codec is lossy) acodec = ACODECS[self._preferredcodec] extension = self._preferredcodec - more_opts = [] - if self._preferredquality is not None: - # The opus codec doesn't support the -aq option - if int(self._preferredquality) < 10 and extension != 'opus': - more_opts += ['-q:a', self._preferredquality] - else: - more_opts += ['-b:a', self._preferredquality + 'k'] + more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2953909fc..62f83c9ce 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3871,7 +3871,7 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): return default try: return int(v) * invscale // scale - except (ValueError, TypeError): + except (ValueError, TypeError, OverflowError): return default -- cgit v1.2.3 From 9af98e17bd2b761d304e88a359b0f7a40e6c0a67 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 00:23:48 +0530 Subject: [ffmpeg] Framework for feature detection Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581 --- yt_dlp/__init__.py | 3 +- yt_dlp/postprocessor/ffmpeg.py | 79 +++++++++++++++++++++++------------------- yt_dlp/utils.py | 15 +++++--- 3 files changed, 55 insertions(+), 42 deletions(-) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 0070d50a8..3020b6e95 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -232,7 +232,8 @@ def _real_main(argv=None): parser.error('invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') - if int_or_none(float_or_none(opts.audioquality)) is None: # int_or_none prevents inf, nan + audioquality = int_or_none(float_or_none(opts.audioquality)) # int_or_none prevents inf, nan + if audioquality is None or audioquality < 0: parser.error('invalid audio quality specified') if opts.recodevideo is not None: opts.recodevideo = opts.recodevideo.replace(' ', '') diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 96f7be6ff..c2415c59a 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -16,7 +16,8 @@ from ..utils import ( encodeArgument, encodeFilename, float_or_none, - get_exe_version, + _get_exe_version_output, + detect_exe_version, is_outdated_version, ISO639Utils, orderedSet, @@ -80,10 +81,10 @@ class FFmpegPostProcessor(PostProcessor): def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] - prefer_ffmpeg = True - def get_ffmpeg_version(path): - ver = get_exe_version(path, args=['-version']) + def get_ffmpeg_version(path, prog): + out = _get_exe_version_output(path, ['-bsfs']) + ver = detect_exe_version(out) if out else False if ver: regexs = [ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] @@ -94,42 +95,46 @@ class FFmpegPostProcessor(PostProcessor): mobj = re.match(regex, ver) if mobj: ver = mobj.group(1) - return ver + self._versions[prog] = ver + if prog != 'ffmpeg' or not out: + return + + # TODO: Feature detection self.basename = None self.probe_basename = None - self._paths = None self._versions = None - if self._downloader: - prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) - location = self.get_param('ffmpeg_location') - if location is not None: - if not os.path.exists(location): - self.report_warning( - 'ffmpeg-location %s does not exist! ' - 'Continuing without ffmpeg.' % (location)) - self._versions = {} - return - elif os.path.isdir(location): - dirname, basename = location, None - else: - basename = os.path.splitext(os.path.basename(location))[0] - basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') - dirname = os.path.dirname(os.path.abspath(location)) - if basename in ('ffmpeg', 'ffprobe'): - prefer_ffmpeg = True - - self._paths = dict( - (p, os.path.join(dirname, p)) for p in programs) - if basename: - self._paths[basename] = location - self._versions = dict( - (p, get_ffmpeg_version(self._paths[p])) for p in programs) - if self._versions is None: - self._versions = dict( - (p, get_ffmpeg_version(p)) for p in programs) - self._paths = dict((p, p) for p in programs) + self._features = {} + + prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) + location = self.get_param('ffmpeg_location') + if location is None: + self._paths = {p: p for p in programs} + else: + if not os.path.exists(location): + self.report_warning( + 'ffmpeg-location %s does not exist! ' + 'Continuing without ffmpeg.' % (location)) + self._versions = {} + return + elif os.path.isdir(location): + dirname, basename = location, None + else: + basename = os.path.splitext(os.path.basename(location))[0] + basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') + dirname = os.path.dirname(os.path.abspath(location)) + if basename in ('ffmpeg', 'ffprobe'): + prefer_ffmpeg = True + + self._paths = dict( + (p, os.path.join(dirname, p)) for p in programs) + if basename: + self._paths[basename] = location + + self._versions = {} + for p in programs: + get_ffmpeg_version(self._paths[p], p) if prefer_ffmpeg is False: prefs = ('avconv', 'ffmpeg') @@ -382,7 +387,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): limits = { 'libmp3lame': (10, 0), - 'aac': (0.1, 11), + # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a. + # Experimentally, with values over 4, bitrate changes were minimal or non-existent + 'aac': (0.1, 4), 'vorbis': (0, 10), 'opus': None, # doesn't support -q:a 'wav': None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 62f83c9ce..55e452a15 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4007,10 +4007,7 @@ def check_executable(exe, args=[]): return exe -def get_exe_version(exe, args=['--version'], - version_re=None, unrecognized='present'): - """ Returns the version of the specified executable, - or False if the executable is not present """ +def _get_exe_version_output(exe, args): try: # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers # SIGTTOU if yt-dlp is run in the background. @@ -4022,7 +4019,7 @@ def get_exe_version(exe, args=['--version'], return False if isinstance(out, bytes): # Python 2.x out = out.decode('ascii', 'ignore') - return detect_exe_version(out, version_re, unrecognized) + return out def detect_exe_version(output, version_re=None, unrecognized='present'): @@ -4036,6 +4033,14 @@ def detect_exe_version(output, version_re=None, unrecognized='present'): return unrecognized +def get_exe_version(exe, args=['--version'], + version_re=None, unrecognized='present'): + """ Returns the version of the specified executable, + or False if the executable is not present """ + out = _get_exe_version_output(exe, args) + return detect_exe_version(out, version_re, unrecognized) if out else False + + class LazyList(collections.abc.Sequence): ''' Lazy immutable list from an iterable Note that slices of a LazyList are lists and not LazyList''' -- cgit v1.2.3 From 673c0057e81410b3da2b0c07ebf7abca13286eab Mon Sep 17 00:00:00 2001 From: CrypticSignal Date: Thu, 4 Nov 2021 02:23:40 +0530 Subject: [ExtractAudio] Use `libfdk_aac` if available Closes #1502 Authored by: CrypticSignal --- yt_dlp/postprocessor/ffmpeg.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index c2415c59a..3f82eabf5 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -99,7 +99,7 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return - # TODO: Feature detection + self._features['fdk'] = '--enable-libfdk-aac' in out self.basename = None self.probe_basename = None @@ -391,6 +391,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # Experimentally, with values over 4, bitrate changes were minimal or non-existent 'aac': (0.1, 4), 'vorbis': (0, 10), + 'libfdk_aac': (1, 5), 'opus': None, # doesn't support -q:a 'wav': None, 'flac': None, @@ -399,6 +400,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): return [] q = limits[1] + (limits[0] - limits[1]) * (self._preferredquality / 10) + if codec == 'libfdk_aac': + return ['-vbr', f'{int(q)}'] return ['-q:a', f'{q}'] def run_ffmpeg(self, path, out_path, codec, more_opts): @@ -448,6 +451,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): else: # We convert the audio (lossy if codec is lossy) acodec = ACODECS[self._preferredcodec] + if acodec == 'aac' and self._features.get('fdk'): + acodec = 'libfdk_aac' extension = self._preferredcodec more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': -- cgit v1.2.3 From 832e9000c71c5bbd97c93d21051044cf61a3b87f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 02:24:12 +0530 Subject: [ffmpeg] Accurately detect presence of setts Closes #1237 --- yt_dlp/postprocessor/ffmpeg.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 3f82eabf5..139b97fb4 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -99,7 +99,10 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return - self._features['fdk'] = '--enable-libfdk-aac' in out + self._features = { + 'fdk': '--enable-libfdk-aac' in out, + 'setts': 'setts' in out.splitlines(), + } self.basename = None self.probe_basename = None @@ -827,11 +830,10 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): - required_version = '4.4' - if is_outdated_version(self._versions[self.basename], required_version): + if not self._features.get('setts'): self.report_warning( 'A re-encode is needed to fix timestamps in older versions of ffmpeg. ' - f'Please install ffmpeg {required_version} or later to fixup without re-encoding') + 'Please install ffmpeg 4.4 or later to fixup without re-encoding') opts = ['-vf', 'setpts=PTS-STARTPTS'] else: opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] -- cgit v1.2.3 From 8913ef74d76d8e93e4aeaf9d2827ca950c17f8ce Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 03:10:49 +0530 Subject: [ffmpeg] Detect libavformat version for `aac_adtstoasc` and print available features in verbose head Based on https://github.com/ytdl-org/youtube-dl/pull/29581 --- yt_dlp/YoutubeDL.py | 6 +++++- yt_dlp/downloader/external.py | 3 +-- yt_dlp/postprocessor/ffmpeg.py | 10 +++++++++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4a9f4775b..a866178b0 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3350,7 +3350,11 @@ class YoutubeDL(object): platform.architecture()[0], platform_name())) - exe_versions = FFmpegPostProcessor.get_versions(self) + exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) + ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} + if ffmpeg_features: + exe_versions['ffmpeg'] += f' (%s)' % ','.join(ffmpeg_features) + exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index ce3370fb7..1efbb2fab 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -21,7 +21,6 @@ from ..utils import ( encodeArgument, handle_youtubedl_headers, check_executable, - is_outdated_version, Popen, sanitize_open, ) @@ -459,7 +458,7 @@ class FFmpegFD(ExternalFD): args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] - if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 139b97fb4..46e87baeb 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -75,9 +75,14 @@ class FFmpegPostProcessor(PostProcessor): self.basename, self.basename, required_version) self.report_warning(warning) + @staticmethod + def get_versions_and_features(downloader=None): + pp = FFmpegPostProcessor(downloader) + return pp._versions, pp._features + @staticmethod def get_versions(downloader=None): - return FFmpegPostProcessor(downloader)._versions + return FFmpegPostProcessor.get_version_and_features(downloader)[0] def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] @@ -99,9 +104,12 @@ class FFmpegPostProcessor(PostProcessor): if prog != 'ffmpeg' or not out: return + mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P[0-9. ]+)', out) + lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None self._features = { 'fdk': '--enable-libfdk-aac' in out, 'setts': 'setts' in out.splitlines(), + 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False), } self.basename = None -- cgit v1.2.3 From a4211baff55f72bd1ca0649407c3d134bfcd2646 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 4 Nov 2021 03:40:35 +0530 Subject: [cleanup] Minor cleanup --- .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml | 2 +- .../ISSUE_TEMPLATE_tmpl/2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml | 4 ++-- README.md | 24 +++++++++++----------- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/downloader/common.py | 4 +++- yt_dlp/extractor/picarto.py | 2 +- yt_dlp/extractor/youtube.py | 2 +- yt_dlp/options.py | 2 +- 9 files changed, 24 insertions(+), 22 deletions(-) diff --git a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml index fdca0e53a..e23bc4195 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml @@ -43,7 +43,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml index f7a48edc7..f35384821 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml @@ -54,7 +54,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output using one of the example URLs provided above. + Provide the complete verbose output **using one of the example URLs provided above**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml index e4d669bb7..8219ebfd4 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml @@ -37,8 +37,8 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. - Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. + Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] diff --git a/README.md b/README.md index 31bfca6a8..ccd221bb4 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content * Most (but not all) age-gated content can be downloaded without cookies - * Partial workaround for throttling issue + * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given * Youtube music Albums, channels etc can be downloaded ([except self-uploaded music](https://github.com/yt-dlp/yt-dlp/issues/723)) @@ -154,7 +154,7 @@ For ease of use, a few more compat options are available: You can install yt-dlp using one of the following methods: -#### Using the release binary +### Using the release binary You can simply download the [correct binary file](#release-files) for your OS: **[[Windows](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)] [[UNIX-like](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)]** @@ -177,7 +177,7 @@ sudo chmod a+rx /usr/local/bin/yt-dlp PS: The manpages, shell completion files etc. are available in [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) -#### With [PIP](https://pypi.org/project/pip) +### With [PIP](https://pypi.org/project/pip) You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: ``` @@ -196,7 +196,7 @@ python3 -m pip3 install -U https://github.com/yt-dlp/yt-dlp/archive/master.zip Note that on some systems, you may need to use `py` or `python` instead of `python3` -#### With [Homebrew](https://brew.sh) +### With [Homebrew](https://brew.sh) macOS or Linux users that are using Homebrew can also install it by: @@ -204,14 +204,14 @@ macOS or Linux users that are using Homebrew can also install it by: brew install yt-dlp/taps/yt-dlp ``` -### UPDATE +## UPDATE You can use `yt-dlp -U` to update if you are [using the provided release](#using-the-release-binary) If you [installed with pip](#with-pip), simply re-run the same command that was used to install the program If you [installed using Homebrew](#with-homebrew), run `brew upgrade yt-dlp/taps/yt-dlp` -### RELEASE FILES +## RELEASE FILES #### Recommended @@ -238,7 +238,7 @@ File|Description [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums -### DEPENDENCIES +## DEPENDENCIES Python versions 3.6+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. ', '', html) -- cgit v1.2.3 From 89fcdff5d8e62c6153763650f12ec4eb4453bdff Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sat, 23 Oct 2021 03:25:09 +0200 Subject: [polskieradio] Add extractors (#1386) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/polskieradio.py | 303 +++++++++++++++++++++++++++++++++------ 2 files changed, 267 insertions(+), 40 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 641481d01..741b9f021 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1108,6 +1108,10 @@ from .pokemon import ( from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, + PolskieRadioPlayerIE, + PolskieRadioPodcastIE, + PolskieRadioPodcastListIE, + PolskieRadioRadioKierowcowIE, ) from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE diff --git a/yt_dlp/extractor/polskieradio.py b/yt_dlp/extractor/polskieradio.py index 53fe0340a..b2b3eb29c 100644 --- a/yt_dlp/extractor/polskieradio.py +++ b/yt_dlp/extractor/polskieradio.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals import itertools +import json +import math import re from .common import InfoExtractor @@ -12,15 +14,45 @@ from ..compat import ( ) from ..utils import ( extract_attributes, + ExtractorError, + InAdvancePagedList, int_or_none, + js_to_json, + parse_iso8601, strip_or_none, unified_timestamp, unescapeHTML, + url_or_none, ) -class PolskieRadioIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P[0-9]+)' +class PolskieRadioBaseExtractor(InfoExtractor): + def _extract_webpage_player_entries(self, webpage, playlist_id, base_data): + media_urls = set() + + for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage): + media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) + if not media.get('file') or not media.get('desc'): + continue + media_url = self._proto_relative_url(media['file']) + if media_url in media_urls: + continue + media_urls.add(media_url) + entry = base_data.copy() + entry.update({ + 'id': compat_str(media['id']), + 'url': media_url, + 'duration': int_or_none(media.get('length')), + 'vcodec': 'none' if media.get('provider') == 'audio' else None, + }) + entry_title = compat_urllib_parse_unquote(media['desc']) + if entry_title: + entry['title'] = entry_title + yield entry + + +class PolskieRadioIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/Artykul/(?P[0-9]+)' _TESTS = [{ # Old-style single broadcast. 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', 'info_dict': { @@ -59,22 +91,14 @@ class PolskieRadioIE(InfoExtractor): 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$' }, }], - }, { # Old-style multiple broadcast playlist. - 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2487823,Marek-Kondrat-czyta-Mistrza-i-Malgorzate', - 'info_dict': { - 'id': '2487823', - 'title': 'Marek Kondrat czyta "Mistrza i Małgorzatę"', - 'description': 'md5:8422a95cc83834f2aaeff9d82e9c8f39', - }, - 'playlist_mincount': 50, - }, { # New-style multiple broadcast playlist. - 'url': 'https://www.polskieradio.pl/8/4346/Artykul/2541317,Czytamy-Kalendarz-i-klepsydre-Tadeusza-Konwickiego', + }, { + # PR4 audition - other frontend + 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301', 'info_dict': { - 'id': '2541317', - 'title': 'Czytamy "Kalendarz i klepsydrę" Tadeusza Konwickiego', - 'description': 'md5:0baeaa46d877f1351fb2eeed3e871f9f', + 'id': '2610977', + 'ext': 'mp3', + 'title': 'Pogłos 29 października godz. 23:01', }, - 'playlist_mincount': 15, }, { 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', 'only_matching': True, @@ -85,6 +109,9 @@ class PolskieRadioIE(InfoExtractor): # with mp4 video 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', 'only_matching': True, + }, { + 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', + 'only_matching': True, }] def _real_extract(self, url): @@ -94,39 +121,37 @@ class PolskieRadioIE(InfoExtractor): content = self._search_regex( r'(?s)]+class="\s*this-article\s*"[^>]*>(.+?)]+class="tags"[^>]*>', - webpage, 'content') + webpage, 'content', default=None) timestamp = unified_timestamp(self._html_search_regex( r'(?s)]+id="datetime2"[^>]*>(.+?)', - webpage, 'timestamp', fatal=False)) + webpage, 'timestamp', default=None)) - thumbnail_url = self._og_search_thumbnail(webpage) + thumbnail_url = self._og_search_thumbnail(webpage, default=None) - entries = [] + title = self._og_search_title(webpage).strip() - media_urls = set() + description = strip_or_none(self._og_search_description(webpage, default=None)) + description = description.replace('\xa0', ' ') if description is not None else None - for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', content): - media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) - if not media.get('file') or not media.get('desc'): - continue - media_url = self._proto_relative_url(media['file'], 'http:') - if media_url in media_urls: - continue - media_urls.add(media_url) - entries.append({ - 'id': compat_str(media['id']), - 'url': media_url, - 'title': compat_urllib_parse_unquote(media['desc']), - 'duration': int_or_none(media.get('length')), - 'vcodec': 'none' if media.get('provider') == 'audio' else None, + if not content: + return { + 'id': playlist_id, + 'url': self._proto_relative_url( + self._search_regex( + r"source:\s*'(//static\.prsa\.pl/[^']+)'", + webpage, 'audition record url')), + 'title': title, + 'description': description, 'timestamp': timestamp, - 'thumbnail': thumbnail_url - }) + 'thumbnail': thumbnail_url, + } - title = self._og_search_title(webpage).strip() - description = strip_or_none(self._og_search_description(webpage)) - description = description.replace('\xa0', ' ') if description is not None else None + entries = self._extract_webpage_player_entries(content, playlist_id, { + 'title': title, + 'timestamp': timestamp, + 'thumbnail': thumbnail_url, + }) return self.playlist_result(entries, playlist_id, title, description) @@ -207,3 +232,201 @@ class PolskieRadioCategoryIE(InfoExtractor): return self.playlist_result( self._entries(url, webpage, category_id), category_id, title) + + +class PolskieRadioPlayerIE(InfoExtractor): + IE_NAME = 'polskieradio:player' + _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P[^/]+)' + + _BASE_URL = 'https://player.polskieradio.pl' + _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js' + _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje' + + _TESTS = [{ + 'url': 'https://player.polskieradio.pl/anteny/trojka', + 'info_dict': { + 'id': '3', + 'ext': 'm4a', + 'title': 'Trójka', + }, + 'params': { + 'format': 'bestaudio', + 'skip_download': 'endless stream', + }, + }] + + def _get_channel_list(self, channel_url='no_channel'): + player_code = self._download_webpage( + self._PLAYER_URL, channel_url, + note='Downloading js player') + channel_list = js_to_json(self._search_regex( + r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) + return self._parse_json(channel_list, channel_url) + + def _real_extract(self, url): + channel_url = self._match_id(url) + channel_list = self._get_channel_list(channel_url) + + channel = next((c for c in channel_list if c.get('url') == channel_url), None) + + if not channel: + raise ExtractorError('Channel not found') + + station_list = self._download_json(self._STATIONS_API_URL, channel_url, + note='Downloading stream url list', + headers={ + 'Accept': 'application/json', + 'Referer': url, + 'Origin': self._BASE_URL, + }) + station = next((s for s in station_list + if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None) + if not station: + raise ExtractorError('Station not found even though we extracted channel') + + formats = [] + for stream_url in station['Streams']: + stream_url = self._proto_relative_url(stream_url) + if stream_url.endswith('/playlist.m3u8'): + formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) + elif stream_url.endswith('/manifest.f4m'): + formats.extend(self._extract_mpd_formats(stream_url, channel_url)) + elif stream_url.endswith('/Manifest'): + formats.extend(self._extract_ism_formats(stream_url, channel_url)) + else: + formats.append({ + 'url': stream_url, + }) + + self._sort_formats(formats) + + return { + 'id': compat_str(channel['id']), + 'formats': formats, + 'title': channel.get('name') or channel.get('streamName'), + 'display_id': channel_url, + 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png', + 'is_live': True, + } + + +class PolskieRadioPodcastBaseExtractor(InfoExtractor): + _API_BASE = 'https://apipodcasts.polskieradio.pl/api' + + def _parse_episode(self, data): + return { + 'id': data['guid'], + 'formats': [{ + 'url': data['url'], + 'filesize': int_or_none(data.get('fileSize')), + }], + 'title': data['title'], + 'description': data.get('description'), + 'duration': int_or_none(data.get('length')), + 'timestamp': parse_iso8601(data.get('publishDate')), + 'thumbnail': url_or_none(data.get('image')), + 'series': data.get('podcastTitle'), + 'episode': data['title'], + } + + +class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast:list' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P\d+)' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/podcast/8/', + 'info_dict': { + 'id': '8', + 'title': 'Śniadanie w Trójce', + 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef', + 'uploader': 'Beata Michniewicz', + }, + 'playlist_mincount': 714, + }] + _PAGE_SIZE = 10 + + def _call_api(self, podcast_id, page): + return self._download_json( + f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}', + podcast_id, f'Downloading page {page}') + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._call_api(podcast_id, 1) + + def get_page(page_num): + page_data = self._call_api(podcast_id, page_num + 1) if page_num else data + yield from (self._parse_episode(ep) for ep in page_data['items']) + + return { + '_type': 'playlist', + 'entries': InAdvancePagedList( + get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), + 'id': str(data['id']), + 'title': data['title'], + 'description': data.get('description'), + 'uploader': data.get('announcer'), + } + + +class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): + IE_NAME = 'polskieradio:podcast' + _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})' + _TESTS = [{ + 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32', + 'info_dict': { + 'id': '6eafe403-cb8f-4756-b896-4455c3713c32', + 'ext': 'mp3', + 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', + 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', + }, + }] + + def _real_extract(self, url): + podcast_id = self._match_id(url) + data = self._download_json( + f'{self._API_BASE}/audio', + podcast_id, 'Downloading podcast metadata', + data=json.dumps({ + 'guids': [podcast_id], + }).encode('utf-8'), + headers={ + 'Content-Type': 'application/json', + }) + return self._parse_episode(data[0]) + + +class PolskieRadioRadioKierowcowIE(PolskieRadioBaseExtractor): + _VALID_URL = r'https?://(?:www\.)?radiokierowcow\.pl/artykul/(?P[0-9]+)' + IE_NAME = 'polskieradio:kierowcow' + + _TESTS = [{ + 'url': 'https://radiokierowcow.pl/artykul/2694529', + 'info_dict': { + 'id': '2694529', + 'title': 'Zielona fala reliktem przeszłości?', + 'description': 'md5:343950a8717c9818fdfd4bd2b8ca9ff2', + }, + 'playlist_count': 3, + }] + + def _real_extract(self, url): + media_id = self._match_id(url) + webpage = self._download_webpage(url, media_id) + nextjs_build = self._search_nextjs_data(webpage, media_id)['buildId'] + article = self._download_json( + f'https://radiokierowcow.pl/_next/data/{nextjs_build}/artykul/{media_id}.json?articleId={media_id}', + media_id) + data = article['pageProps']['data'] + title = data['title'] + entries = self._extract_webpage_player_entries(data['content'], media_id, { + 'title': title, + }) + + return { + '_type': 'playlist', + 'id': media_id, + 'entries': entries, + 'title': title, + 'description': data.get('lead'), + } -- cgit v1.2.3 From ed76230b3f61d3440da5b71170e243cd2bfe693b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sat, 23 Oct 2021 01:46:56 +0200 Subject: [polsatgo] Add extractor (#1386) Authored by: selfisekai, sdomi Co-authored-by: Dominika Liberda --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/polsatgo.py | 90 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 yt_dlp/extractor/polsatgo.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 741b9f021..bd0da2c38 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1105,6 +1105,7 @@ from .pokemon import ( PokemonIE, PokemonWatchIE, ) +from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioIE, PolskieRadioCategoryIE, diff --git a/yt_dlp/extractor/polsatgo.py b/yt_dlp/extractor/polsatgo.py new file mode 100644 index 000000000..1e3f46c07 --- /dev/null +++ b/yt_dlp/extractor/polsatgo.py @@ -0,0 +1,90 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from uuid import uuid4 +import json + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + url_or_none, + ExtractorError, +) + + +class PolsatGoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P[0-9a-fA-F]+)(?:[/#?]|$)' + _TESTS = [{ + 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121', + 'info_dict': { + 'id': '4121', + 'ext': 'mp4', + 'title': 'Świat według Kiepskich - Odcinek 88', + 'age_limit': 12, + }, + }] + + def _extract_formats(self, sources, video_id): + for source in sources or []: + if not source.get('id'): + continue + url = url_or_none(self._call_api( + 'drm', video_id, 'getPseudoLicense', + {'mediaId': video_id, 'sourceId': source['id']}).get('url')) + if not url: + continue + yield { + 'url': url, + 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])) + } + + def _real_extract(self, url): + video_id = self._match_id(url) + media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem'] + + formats = list(self._extract_formats( + try_get(media, lambda x: x['playback']['mediaSources']), video_id)) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': media['displayInfo']['title'], + 'formats': formats, + 'age_limit': int_or_none(media['displayInfo']['ageGroup']) + } + + def _call_api(self, endpoint, media_id, method, params): + rand_uuid = str(uuid4()) + res = self._download_json( + f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, + note=f'Downloading {method} JSON metadata', + data=json.dumps({ + 'method': method, + 'id': '2137', + 'jsonrpc': '2.0', + 'params': { + **params, + 'userAgentData': { + 'deviceType': 'mobile', + 'application': 'native', + 'os': 'android', + 'build': 10003, + 'widevine': False, + 'portal': 'pg', + 'player': 'cpplayer', + }, + 'deviceId': { + 'type': 'other', + 'value': rand_uuid, + }, + 'clientId': rand_uuid, + 'cpid': 1, + }, + }).encode('utf-8'), + headers={'Content-type': 'application/json'}) + if not res.get('result'): + if res['error']['code'] == 13404: + raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True) + raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}') + return res['result'] -- cgit v1.2.3 From 3f771f75d7277e54411a6e2ae36e74d7ddb993dd Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 10:58:57 +0530 Subject: [radiokapital] Add extractors (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 ++ yt_dlp/extractor/radiokapital.py | 99 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 yt_dlp/extractor/radiokapital.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index bd0da2c38..4a06ec578 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1159,6 +1159,10 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radiokapital import ( + RadioKapitalIE, + RadioKapitalShowIE, +) from .radlive import ( RadLiveIE, RadLiveChannelIE, diff --git a/yt_dlp/extractor/radiokapital.py b/yt_dlp/extractor/radiokapital.py new file mode 100644 index 000000000..2e93e034f --- /dev/null +++ b/yt_dlp/extractor/radiokapital.py @@ -0,0 +1,99 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + clean_html, + traverse_obj, + unescapeHTML, +) + +import itertools +from urllib.parse import urlencode + + +class RadioKapitalBaseIE(InfoExtractor): + def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): + return self._download_json( + f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urlencode(qs)}', + video_id, note=note) + + def _parse_episode(self, data): + release = '%s%s%s' % (data['published'][6:11], data['published'][3:6], data['published'][:3]) + return { + '_type': 'url_transparent', + 'url': data['mixcloud_url'], + 'ie_key': 'Mixcloud', + 'title': unescapeHTML(data['title']), + 'description': clean_html(data.get('content')), + 'tags': traverse_obj(data, ('tags', ..., 'name')), + 'release_date': release, + 'series': traverse_obj(data, ('show', 'title')), + } + + +class RadioKapitalIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P[a-z\d-]+)' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', + 'info_dict': { + 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', + 'ext': 'm4a', + 'title': '#5: It’s okay to\xa0be\xa0immaterial', + 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', + 'uploader': 'Radio Kapitał', + 'uploader_id': 'radiokapital', + 'timestamp': 1621640164, + 'upload_date': '20210521', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + + episode = self._call_api('episodes/%s' % video_id, video_id) + return self._parse_episode(episode) + + +class RadioKapitalShowIE(RadioKapitalBaseIE): + IE_NAME = 'radiokapital:show' + _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P[a-z\d-]+)/?(?:$|[?#])' + + _TESTS = [{ + 'url': 'https://radiokapital.pl/shows/wesz', + 'info_dict': { + 'id': '100', + 'title': 'WĘSZ', + 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', + }, + 'playlist_mincount': 17, + }] + + def _get_episode_list(self, series_id, page_no): + return self._call_api( + 'episodes', series_id, + f'Downloading episode list page #{page_no}', qs={ + 'show': series_id, + 'page': page_no, + }) + + def _entries(self, series_id): + for page_no in itertools.count(1): + episode_list = self._get_episode_list(series_id, page_no) + yield from (self._parse_episode(ep) for ep in episode_list['items']) + if episode_list['next'] is None: + break + + def _real_extract(self, url): + series_id = self._match_id(url) + + show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') + entries = self._entries(series_id) + return { + '_type': 'playlist', + 'entries': entries, + 'id': str(show['id']), + 'title': show.get('title'), + 'description': clean_html(show.get('content')), + } -- cgit v1.2.3 From c0599d4fe493730236c7e62ed63575ea0d3f3fa2 Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 10:59:17 +0530 Subject: [wppilot] Add extractors (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 4 + yt_dlp/extractor/wppilot.py | 177 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 yt_dlp/extractor/wppilot.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4a06ec578..d47c06647 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1788,6 +1788,10 @@ from .wistia import ( WistiaPlaylistIE, ) from .worldstarhiphop import WorldStarHipHopIE +from .wppilot import ( + WPPilotIE, + WPPilotChannelsIE, +) from .wsj import ( WSJIE, WSJArticleIE, diff --git a/yt_dlp/extractor/wppilot.py b/yt_dlp/extractor/wppilot.py new file mode 100644 index 000000000..3003a0f10 --- /dev/null +++ b/yt_dlp/extractor/wppilot.py @@ -0,0 +1,177 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + try_get, + ExtractorError, +) + +import json +import random +import re + + +class WPPilotBaseIE(InfoExtractor): + _VIDEO_URL = 'https://pilot.wp.pl/api/v1/channel/%s' + _VIDEO_GUEST_URL = 'https://pilot.wp.pl/api/v1/guest/channel/%s' + + _HEADERS_WEB = { + 'Content-Type': 'application/json; charset=UTF-8', + 'Referer': 'https://pilot.wp.pl/tv/', + } + + def _get_channel_list(self, cache=True): + if cache is True: + cache_res = self._downloader.cache.load('wppilot', 'channel-list') + if cache_res: + return cache_res, True + webpage = self._download_webpage('https://pilot.wp.pl/tv/', None, 'Downloading webpage') + page_data_base_url = self._search_regex( + r'', + ], webpage, 'video id', default=page_id) return { '_type': 'url_transparent', 'url': 'tvp:' + video_id, 'description': self._og_search_description( - webpage, default=None) or self._html_search_meta( - 'description', webpage, default=None), + webpage, default=None) or (self._html_search_meta( + 'description', webpage, default=None) + if '//s.tvp.pl/files/portal/v' in webpage else None), 'thumbnail': self._og_search_thumbnail(webpage, default=None), 'ie_key': 'TVPEmbed', } @@ -252,18 +417,20 @@ class TVPWebsiteIE(InfoExtractor): _TESTS = [{ # series - 'url': 'https://vod.tvp.pl/website/lzy-cennet,38678312/video', + 'url': 'https://vod.tvp.pl/website/wspaniale-stulecie,17069012/video', 'info_dict': { - 'id': '38678312', + 'id': '17069012', }, - 'playlist_count': 115, + 'playlist_count': 312, }, { # film - 'url': 'https://vod.tvp.pl/website/gloria,35139666', + 'url': 'https://vod.tvp.pl/website/krzysztof-krawczyk-cale-moje-zycie,51374466', 'info_dict': { - 'id': '36637049', + 'id': '51374509', 'ext': 'mp4', - 'title': 'Gloria, Gloria', + 'title': 'Krzysztof Krawczyk – całe moje życie, Krzysztof Krawczyk – całe moje życie', + 'description': 'md5:2e80823f00f5fc263555482f76f8fa42', + 'age_limit': 12, }, 'params': { 'skip_download': True, -- cgit v1.2.3 From ebfab36fca0901f99076158f9eb4f7fc9d87589b Mon Sep 17 00:00:00 2001 From: Lauren Liberda Date: Sun, 31 Oct 2021 11:03:04 +0530 Subject: [tvp] Add TVPStreamIE (#1401) Authored by: selfisekai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/tvp.py | 46 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d47c06647..4f9de71e2 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1571,6 +1571,7 @@ from .tvnow import ( from .tvp import ( TVPEmbedIE, TVPIE, + TVPStreamIE, TVPWebsiteIE, ) from .tvplay import ( diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py index 22cfbd25e..48e2c6e76 100644 --- a/yt_dlp/extractor/tvp.py +++ b/yt_dlp/extractor/tvp.py @@ -251,6 +251,52 @@ class TVPIE(InfoExtractor): } +class TVPStreamIE(InfoExtractor): + IE_NAME = 'tvp:stream' + _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P\d*)' + _TESTS = [{ + # untestable as "video" id changes many times across a day + 'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455', + 'only_matching': True, + }, { + 'url': 'tvpstream:39821455', + 'only_matching': True, + }, { + # the default stream when you provide no channel_id, most probably TVP Info + 'url': 'tvpstream:', + 'only_matching': True, + }, { + 'url': 'https://tvpstream.vod.tvp.pl/', + 'only_matching': True, + }] + + _PLAYER_BOX_RE = r']*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)' + _BUTTON_RE = r']*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')' + + def _real_extract(self, url): + channel_id = self._match_id(url) + channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default') + webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage') + if not channel_id: + channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel', + webpage, 'default channel id') + video_id = self._search_regex(self._PLAYER_BOX_RE % 'video', + webpage, 'video id') + audition_title, station_name = self._search_regex( + self._BUTTON_RE % (re.escape(channel_id)), webpage, + 'audition title and station name', + group=(1, 2)) + return { + '_type': 'url_transparent', + 'id': channel_id, + 'url': 'tvp:%s' % video_id, + 'title': audition_title, + 'alt_title': station_name, + 'is_live': True, + 'ie_key': 'TVPEmbed', + } + + class TVPEmbedIE(InfoExtractor): IE_NAME = 'tvp:embed' IE_DESC = 'Telewizja Polska' -- cgit v1.2.3 From 86c1a8aae4db4a5b720cbd7c9465de350d64edef Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 6 Nov 2021 09:30:38 +0530 Subject: Release 2021.11.10 --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 2 +- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 4 +- CONTRIBUTORS | 10 +++ Changelog.md | 85 +++++++++++++++++++++++ README.md | 18 +++-- supportedsites.md | 24 ++++++- 7 files changed, 133 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 862e7235f..67145d8b2 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -43,7 +43,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index aa00b8ad7..30cebec91 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -54,7 +54,7 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output using one of the example URLs provided above. + Provide the complete verbose output **using one of the example URLs provided above**. Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 9003bb19a..445945df4 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -37,8 +37,8 @@ body: attributes: label: Verbose log description: | - Provide the complete verbose output of yt-dlp that clearly demonstrates the problem. - Add the `-Uv` flag to your command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. + Provide the complete verbose output of yt-dlp **that clearly demonstrates the problem**. + Add the `-Uv` flag to **your** command line you run yt-dlp with (`yt-dlp -Uv `), copy the WHOLE output and insert it below. It should look similar to this: placeholder: | [debug] Command-line config: ['-Uv', 'http://www.youtube.com/watch?v=BaW_jenozKc'] diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 2bf96affe..f035ce10d 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -129,3 +129,13 @@ Bojidarist nixklai smplayer-dev Zirro +CrypticSignal +flashdagger +fractalf +frafra +kaz-us +ozburo +rhendric +sdomi +selfisekai +stanoarn diff --git a/Changelog.md b/Changelog.md index d74237dd4..6124d6bd0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,91 @@ --> +### 2021.11.10 + +* [youtube] **Fix throttling by decrypting n-sig** +* Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai) + * [extractor] Add `_search_nextjs_data` + * [tvp] Fix extractors + * [tvp] Add TVPStreamIE + * [wppilot] Add extractors + * [polskieradio] Add extractors + * [radiokapital] Add extractors + * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi) +* Separate `--check-all-formats` from `--check-formats` +* Approximate filesize from bitrate +* Don't create console in `windows_enable_vt_mode` +* Fix bug in `--load-infojson` of playlists +* [minicurses] Add colors to `-F` and standardize color-printing code +* [outtmpl] Add type `link` for internet shortcut files +* [outtmpl] Add alternate forms for `q` and `j` +* [outtmpl] Do not traverse `None` +* [fragment] Fix progress display in fragmented downloads +* [downloader/ffmpeg] Fix vtt download with ffmpeg +* [ffmpeg] Detect presence of setts and libavformat version +* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) +* [FormatSort] `eac3` is better than `ac3` +* [FormatSort] Fix some fields' defaults +* [generic] Detect more json_ld +* [generic] parse jwplayer with only the json URL +* [extractor] Add keyword automatically to SearchIE descriptions +* [extractor] Fix some errors being converted to `ExtractorError` +* [utils] Add `join_nonempty` +* [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804) +* [utils] Create `DownloadCancelled` exception +* [utils] Parse `vp09` as vp9 +* [utils] Sanitize URL when determining protocol +* [test/download] Fallback test to `bv` +* [docs] Minor documentation improvements +* [cleanup] Improvements to error and debug messages +* [cleanup] Minor fixes and cleanup +* [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804) +* [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png) +* [mediaset] Add playlist support by [nixxo](https://github.com/nixxo) +* [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png) +* [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804) +* [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra) +* [roosterteeth] Add series extractor +* [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8) +* [youtube] Fix sorting for some videos +* [youtube] Populate `thumbnail` with the best "known" thumbnail +* [youtube] Refactor itag processing +* [youtube] Remove unnecessary no-playlist warning +* [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric) +* [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png) +* [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger) +* [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi) +* [crunchyroll] Add extractor-args `language` and `hardsub` +* [DiscoveryPlus] Allow language codes in URL +* [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo) +* [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png) +* [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png) +* [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png) +* [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn) +* [itv] Add support for ITV News by [ajj8](https://github.com/ajj8) +* [la7] Fix extractor by [nixxo](https://github.com/nixxo) +* [linkedin] Don't login multiple times +* [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon) +* [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png) +* [Nrk] Minor fixes by [fractalf](https://github.com/fractalf) +* [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [piksel] Fix sorting +* [twitter] Do not sort by codec +* [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) +* [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger) +* [vimeo] Fix ondemand videos and direct URLs with hash +* [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us) +* [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804) +* [VRT] Fix login by [pgaig](https://github.com/pgaig) +* [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png) +* [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h) +* [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h) +* [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png) + + ### 2021.10.22 * [build] Improvements diff --git a/README.md b/README.md index 713c2c4a0..24975ad6f 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * All Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`) and private playlists supports downloading multiple pages of content * Search (`ytsearch:`, `ytsearchdate:`), search URLs and in-channel search works * Mixes supports downloading multiple pages of content - * Most (but not all) age-gated content can be downloaded without cookies + * Some (but not all) age-gated content can be downloaded without cookies * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) * Redirect channel's home URL automatically to `/video` to preserve the old behaviour * `255kbps` audio is extracted (if available) from youtube music when premium cookies are given @@ -92,9 +92,13 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats -* **New extractors**: AnimeLab, Philo MSO, Spectrum MSO, SlingTV MSO, Cablevision MSO, RCN MSO, Rcs, Gedi, bitwave.tv, mildom, audius, zee5, mtv.it, wimtv, pluto.tv, niconico users, discoveryplus.in, mediathek, NFHSNetwork, nebula, ukcolumn, whowatch, MxplayerShow, parlview (au), YoutubeWebArchive, fancode, Saitosan, ShemarooMe, telemundo, VootSeries, SonyLIVSeries, HotstarSeries, VidioPremier, VidioLive, RCTIPlus, TBS Live, douyin, pornflip, ParamountPlusSeries, ScienceChannel, Utreon, OpenRec, BandcampMusic, blackboardcollaborate, eroprofile albums, mirrativ, BannedVideo, bilibili categories, Epicon, filmmodu, GabTV, HungamaAlbum, ManotoTV, Niconico search, Patreon User, peloton, ProjectVeritas, radiko, StarTV, tiktok user, Tokentube, voicy, TV2HuSeries, biliintl, 17live, NewgroundsUser, peertube channel/playlist, ZenYandex, CAM4, CGTN, damtomo, gotostage, Koo, Mediaite, Mediaklikk, MuseScore, nzherald, Olympics replay, radlive, SovietsCloset, Streamanity, Theta, Chingari, ciscowebex, Gettr, GoPro, N1, Theta, Veo, Vupload, NovaPlay, SkyNewsAU, EUScreen, Gronkh, microsoftstream, on24, trovo channels +* **New extractors**: 17live, 3speak, amazonstore, animelab, audius, bandcampmusic, bannedvideo, biliintl, bitwave.tv, blackboardcollaborate, cam4, cgtn, chingari, ciscowebex, damtomo, discoveryplus.in, douyin, epicon, euscreen, fancode, filmmodu, gab, gedi, gettr, gopro, gotostage, gronkh, koo, manototv, mediaite, mediaklikk, mediasetshow, mediathek, microsoftstream, mildom, mirrativ, mlsscoccer, mtv.it, musescore, mxplayershow, n1, nebula, nfhsnetwork, novaplay, nzherald, olympics replay, on24, openrec, parlview-AU, peloton, planetmarathi, pluto.tv, polsatgo, polskieradio, pornflip, projectveritas, radiko, radiokapital, radlive, raiplayradio, rcs, rctiplus, saitosan, sciencechannel, shemaroome, skynews-AU, skynews-story, sovietscloset, startv, streamanity, telemundo, theta, theta, tokentube, tv2huseries, ukcolumn, utreon, veo, vidiolive, vidiopremier, voicy, vupload, whowatch, wim.tv, wppilot, youtube webarchive, zee5, zen.yandex -* **Fixed/improved extractors**: archive.org, roosterteeth.com, skyit, instagram, itv, SouthparkDe, spreaker, Vlive, akamai, ina, rumble, tennistv, amcnetworks, la7 podcasts, linuxacadamy, nitter, twitcasting, viu, crackle, curiositystream, mediasite, rmcdecouverte, sonyliv, tubi, tenplay, patreon, videa, yahoo, BravoTV, crunchyroll, RTP, viki, Hotstar, vidio, vimeo, mediaset, Mxplayer, nbcolympics, ParamountPlus, Newgrounds, SAML Verizon login, Hungama, afreecatv, aljazeera, ATV, bitchute, camtube, CDA, eroprofile, facebook, HearThisAtIE, iwara, kakao, Motherless, Nova, peertube, pornhub, reddit, tiktok, TV2, TV2Hu, tv5mondeplus, VH1, Viafree, XHamster, 9Now, AnimalPlanet, Arte, CBC, Chingari, comedycentral, DIYNetwork, niconico, dw, funimation, globo, HiDive, NDR, Nuvid, Oreilly, pbs, plutotv, reddit, redtube, soundcloud, SpankBang, VrtNU, bbc, Bilibili, LinkedInLearning, parliamentlive, PolskieRadio, Streamable, vidme, francetv, 7plus, tagesschau +* **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series + +* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo + +* **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details @@ -108,7 +112,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **Improvements**: Regex and other operators in `--match-filter`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection) etc -* **Plugin extractors**: Extractors can be loaded from an external file. See [plugins](#plugins) for details +* **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details * **Self-updater**: The releases can be updated using `yt-dlp -U` @@ -184,12 +188,12 @@ You can install the [PyPI package](https://pypi.org/project/yt-dlp) with: python3 -m pip install -U yt-dlp ``` -You can also install without any dependencies using: +You can install without any of the optional dependencies using: ``` python3 -m pip install --no-deps -U yt-dlp ``` -You can also install the master branch with: +If you want to be on the cutting edge, you can also install the master branch with: ``` python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip ``` @@ -790,7 +794,7 @@ You can also fork the project on github and push it to a release branch in your formats are: best (default) or one of best|aac|flac|mp3|m4a|opus|vorbis|wav --audio-quality QUALITY Specify ffmpeg audio quality, insert a - value between 0 (better) and 9 (worse) for + value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if diff --git a/supportedsites.md b/supportedsites.md index 01c3f43a9..50fa7f9f1 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -48,6 +48,7 @@ - **Alura** - **AluraCourse** - **Amara** + - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** @@ -184,7 +185,6 @@ - **CCTV**: 央视网 - **CDA** - **CeskaTelevize** - - **CeskaTelevizePorady** - **CGTN** - **channel9**: Channel 9 - **CharlieRose** @@ -366,6 +366,7 @@ - **Funk** - **Fusion** - **Fux** + - **Gab** - **GabTV** - **Gaia** - **GameInformer** @@ -449,9 +450,11 @@ - **Instagram** - **instagram:tag**: Instagram hashtag search - **instagram:user**: Instagram user profile + - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **IPrima** + - **IPrimaCNN** - **iqiyi**: 爱奇艺 - **Ir90Tv** - **ITTF** @@ -560,6 +563,7 @@ - **MediaKlikk** - **Medialaan** - **Mediaset** + - **MediasetShow** - **Mediasite** - **MediasiteCatalog** - **MediasiteNamedCatalog** @@ -592,6 +596,7 @@ - **mixcloud:user** - **MLB** - **MLBVideo** + - **MLSSoccer** - **Mnet** - **MNetTV** - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net @@ -801,6 +806,7 @@ - **Pinterest** - **PinterestCollection** - **Pladform** + - **PlanetMarathi** - **Platzi** - **PlatziCourse** - **play.fm** @@ -817,7 +823,12 @@ - **podomatic** - **Pokemon** - **PokemonWatch** + - **PolsatGo** - **PolskieRadio** + - **polskieradio:kierowcow** + - **polskieradio:player** + - **polskieradio:podcast** + - **polskieradio:podcast:list** - **PolskieRadioCategory** - **Popcorntimes** - **PopcornTV** @@ -860,6 +871,8 @@ - **radiocanada:audiovideo** - **radiofrance** - **RadioJavan** + - **radiokapital** + - **radiokapital:show** - **radlive** - **radlive:channel** - **radlive:season** @@ -867,6 +880,8 @@ - **RaiPlay** - **RaiPlayLive** - **RaiPlayPlaylist** + - **RaiPlayRadio** + - **RaiPlayRadioPlaylist** - **RayWenderlich** - **RayWenderlichCourse** - **RBMARadio** @@ -894,6 +909,7 @@ - **RMCDecouverte** - **RockstarGames** - **RoosterTeeth** + - **RoosterTeethSeries** - **RottenTomatoes** - **Roxwel** - **Rozhlas** @@ -961,6 +977,7 @@ - **Sina** - **sky.it** - **sky:news** + - **sky:news:story** - **sky:sports** - **sky:sports:news** - **skyacademy.it** @@ -1079,6 +1096,8 @@ - **ThisAmericanLife** - **ThisAV** - **ThisOldHouse** + - **ThreeSpeak** + - **ThreeSpeakUser** - **TikTok** - **tiktok:user** - **tinypic**: tinypic.com videos @@ -1142,6 +1161,7 @@ - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:series** + - **tvp:stream** - **TVPlayer** - **TVPlayHome** - **Tweakers** @@ -1296,6 +1316,8 @@ - **WistiaPlaylist** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **WorldStarHipHop** + - **wppilot** + - **wppilot:channels** - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** -- cgit v1.2.3 From 2e9a445bc34e79182f900909d727ba87f8487522 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 01:14:33 +0000 Subject: [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 67145d8b2..8200bdeb4 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 30cebec91..8736184a3 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 59578b712..a8576e21c 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 445945df4..56b233ce7 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.10.22 (exe) + [debug] yt-dlp version 2021.11.10 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.10.22) + yt-dlp is up to date (2021.11.10) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 134416f4e..0937f09ce 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.10.22**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index e7203be6b..197e7389c 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.10.22' +__version__ = '2021.11.10' -- cgit v1.2.3 From 7144b697fc20d6615690e5ec63e6c134ddb7aa5e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 06:58:42 +0530 Subject: Release 2021.11.10.1 :ci skip all --- .github/workflows/build.yml | 11 ++++++----- Changelog.md | 4 ++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0fff6cae3..f75b11700 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -146,6 +146,7 @@ jobs: build_macos: runs-on: macos-11 needs: build_unix + if: False outputs: sha256_macos: ${{ steps.sha256_macos.outputs.sha256_macos }} sha512_macos: ${{ steps.sha512_macos.outputs.sha512_macos }} @@ -344,7 +345,7 @@ jobs: finish: runs-on: ubuntu-latest - needs: [build_unix, build_windows, build_windows32, build_macos] + needs: [build_unix, build_windows, build_windows32] steps: - name: Make SHA2-256SUMS file @@ -364,8 +365,8 @@ jobs: echo "${{ env.SHA256_PY2EXE }} yt-dlp_min.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN32 }} yt-dlp_x86.exe" >> SHA2-256SUMS echo "${{ env.SHA256_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-256SUMS - echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS - echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS + # echo "${{ env.SHA256_MACOS }} yt-dlp_macos" >> SHA2-256SUMS + # echo "${{ env.SHA256_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-256SUMS - name: Upload 256SUMS file id: upload-sums uses: actions/upload-release-asset@v1 @@ -393,8 +394,8 @@ jobs: echo "${{ env.SHA512_WIN_ZIP }} yt-dlp_win.zip" >> SHA2-512SUMS echo "${{ env.SHA512_PY2EXE }} yt-dlp_min.exe" >> SHA2-512SUMS echo "${{ env.SHA512_WIN32 }} yt-dlp_x86.exe" >> SHA2-512SUMS - echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS - echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS + # echo "${{ env.SHA512_MACOS }} yt-dlp_macos" >> SHA2-512SUMS + # echo "${{ env.SHA512_MACOS_ZIP }} yt-dlp_macos.zip" >> SHA2-512SUMS - name: Upload 512SUMS file id: upload-512sums uses: actions/upload-release-asset@v1 diff --git a/Changelog.md b/Changelog.md index 6124d6bd0..5ac2aa615 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,6 +14,10 @@ --> +### 2021.11.10.1 + +* Temporarily disable MacOS Build + ### 2021.11.10 * [youtube] **Fix throttling by decrypting n-sig** -- cgit v1.2.3 From 9ebf3c6ab97c29b2d5872122e532bc98b93ad8b3 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 01:47:10 +0000 Subject: [version] update :ci skip all --- .github/ISSUE_TEMPLATE/1_broken_site.yml | 6 +++--- .github/ISSUE_TEMPLATE/2_site_support_request.yml | 6 +++--- .github/ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/4_bug_report.yml | 6 +++--- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- yt_dlp/version.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.yml b/.github/ISSUE_TEMPLATE/1_broken_site.yml index 8200bdeb4..27e07fb18 100644 --- a/.github/ISSUE_TEMPLATE/1_broken_site.yml +++ b/.github/ISSUE_TEMPLATE/1_broken_site.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a broken site required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -51,12 +51,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.yml b/.github/ISSUE_TEMPLATE/2_site_support_request.yml index 8736184a3..b27418544 100644 --- a/.github/ISSUE_TEMPLATE/2_site_support_request.yml +++ b/.github/ISSUE_TEMPLATE/2_site_support_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a new site support request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -62,12 +62,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index a8576e21c..9df0902f4 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a site feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.yml b/.github/ISSUE_TEMPLATE/4_bug_report.yml index 56b233ce7..14cc17ac9 100644 --- a/.github/ISSUE_TEMPLATE/4_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/4_bug_report.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a bug unrelated to a specific site required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've checked that all provided URLs are alive and playable in a browser required: true @@ -45,12 +45,12 @@ body: [debug] Portable config file: yt-dlp.conf [debug] Portable config: ['-i'] [debug] Encodings: locale cp1252, fs utf-8, stdout utf-8, stderr utf-8, pref cp1252 - [debug] yt-dlp version 2021.11.10 (exe) + [debug] yt-dlp version 2021.11.10.1 (exe) [debug] Python version 3.8.8 (CPython 64bit) - Windows-10-10.0.19041-SP0 [debug] exe versions: ffmpeg 3.0.1, ffprobe 3.0.1 [debug] Optional libraries: Cryptodome, keyring, mutagen, sqlite, websockets [debug] Proxy map: {} - yt-dlp is up to date (2021.11.10) + yt-dlp is up to date (2021.11.10.1) render: shell validations: diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 0937f09ce..ae0c277b3 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -11,7 +11,7 @@ body: options: - label: I'm reporting a feature request required: true - - label: I've verified that I'm running yt-dlp version **2021.11.10**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) + - label: I've verified that I'm running yt-dlp version **2021.11.10.1**. ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) required: true - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues including closed ones. DO NOT post duplicates required: true diff --git a/yt_dlp/version.py b/yt_dlp/version.py index 197e7389c..5290afa2d 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2021.11.10' +__version__ = '2021.11.10.1' -- cgit v1.2.3 From b47d236d724f7a129c7ed0792fb847eb12e6f8a5 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Wed, 10 Nov 2021 15:28:38 +0000 Subject: [Tokentube] Fix description (#1578) Authored by: u-spec-png --- yt_dlp/extractor/tokentube.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/tokentube.py b/yt_dlp/extractor/tokentube.py index d6362117f..579623fed 100644 --- a/yt_dlp/extractor/tokentube.py +++ b/yt_dlp/extractor/tokentube.py @@ -6,7 +6,10 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_count, + remove_end, unified_strdate, js_to_json, OnDemandPagedList, @@ -35,7 +38,7 @@ class TokentubeIE(InfoExtractor): 'id': '3950239124', 'ext': 'mp4', 'title': 'Linux Ubuntu Studio perus käyttö', - 'description': 'md5:854ff1dc732ff708976de2880ea32050', + 'description': 'md5:46077d0daaba1974f2dc381257f9d64c', 'uploader': 'jyrilehtonen', 'upload_date': '20210825', }, @@ -45,7 +48,7 @@ class TokentubeIE(InfoExtractor): 'id': '3582463289', 'ext': 'mp4', 'title': 'Police for Freedom - toiminta aloitetaan Suomessa ❤️??', - 'description': 'md5:cd92e620d7f5fa162e8410d0fc9a08be', + 'description': 'md5:37ebf1cb44264e0bf23ed98b337ee63e', 'uploader': 'Voitontie', 'upload_date': '20210428', } @@ -90,7 +93,10 @@ class TokentubeIE(InfoExtractor): r']+>(.+?)', webpage, 'uploader', fatal=False) - description = self._html_search_meta('description', webpage) + description = (clean_html(get_element_by_class('p-d-txt', webpage)) + or self._html_search_meta(('og:description', 'description', 'twitter:description'), webpage)) + + description = remove_end(description, 'Category') self._sort_formats(formats) -- cgit v1.2.3 From 013ae2e5038178420966fa7e029908b37ecda821 Mon Sep 17 00:00:00 2001 From: makeworld <25111343+makeworld-the-better-one@users.noreply.github.com> Date: Wed, 10 Nov 2021 14:37:05 -0500 Subject: [CBC Gem] Fix for shows that don't have all seasons (#1621) Closes #1594 Authored by: makeworld-the-better-one --- yt_dlp/extractor/cbc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/cbc.py b/yt_dlp/extractor/cbc.py index 4fcf2a9c1..413053499 100644 --- a/yt_dlp/extractor/cbc.py +++ b/yt_dlp/extractor/cbc.py @@ -390,7 +390,8 @@ class CBCGemPlaylistIE(InfoExtractor): show = match.group('show') show_info = self._download_json(self._API_BASE + show, season_id) season = int(match.group('season')) - season_info = try_get(show_info, lambda x: x['seasons'][season - 1]) + + season_info = next((s for s in show_info['seasons'] if s.get('season') == season), None) if season_info is None: raise ExtractorError(f'Couldn\'t find season {season} of {show}') -- cgit v1.2.3 From 44bcb8d1225c2fcfb9b1814282b74f0563ee26d1 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 18:33:37 +0530 Subject: Fix bug in parsing `--add-header` Closes #1614 --- yt_dlp/options.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 89a1a8637..89401910e 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -151,25 +151,25 @@ def parseOpts(overrideArguments=None): def _dict_from_options_callback( option, opt_str, value, parser, - allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True): + allowed_keys=r'[\w-]+', delimiter=':', default_key=None, process=None, multiple_keys=True, + process_key=str.lower): out_dict = getattr(parser.values, option.dest) if multiple_keys: allowed_keys = r'(%s)(,(%s))*' % (allowed_keys, allowed_keys) mobj = re.match(r'(?i)(?P%s)%s(?P.*)$' % (allowed_keys, delimiter), value) if mobj is not None: - keys = [k.strip() for k in mobj.group('keys').lower().split(',')] - val = mobj.group('val') + keys, val = mobj.group('keys').split(','), mobj.group('val') elif default_key is not None: keys, val = [default_key], value else: raise optparse.OptionValueError( 'wrong %s formatting; it should be %s, not "%s"' % (opt_str, option.metavar, value)) try: + keys = map(process_key, keys) if process_key else keys val = process(val) if process else val except Exception as err: - raise optparse.OptionValueError( - 'wrong %s formatting; %s' % (opt_str, err)) + raise optparse.OptionValueError(f'wrong {opt_str} formatting; {err}') for key in keys: out_dict[key] = val @@ -792,7 +792,7 @@ def parseOpts(overrideArguments=None): '--add-header', metavar='FIELD:VALUE', dest='headers', default={}, type='str', action='callback', callback=_dict_from_options_callback, - callback_kwargs={'multiple_keys': False}, + callback_kwargs={'multiple_keys': False, 'process_key': None}, help='Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times', ) workarounds.add_option( -- cgit v1.2.3 From 093a17107ea5e375ba606ed1c31d1c259f93e0df Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 10 Nov 2021 21:41:41 +0530 Subject: Allow using a custom format selector through API Closes #1619, #1464 --- README.md | 51 +++++++++++++++++++++++++++++++++++++++++++-------- yt_dlp/YoutubeDL.py | 13 ++++++++++--- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 24975ad6f..7a4ec55bb 100644 --- a/README.md +++ b/README.md @@ -1600,14 +1600,14 @@ From a Python program, you can embed yt-dlp in a more powerful fashion, like thi ```python from yt_dlp import YoutubeDL -ydl_opts = {} +ydl_opts = {'format': 'bestaudio'} with YoutubeDL(ydl_opts) as ydl: ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L154-L452). -Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), converts the video to an mp3 file, implements a custom postprocessor and prints the final info_dict as json: +Here's a more complete example demonstrating various functionality: ```python import json @@ -1633,23 +1633,56 @@ class MyLogger: print(msg) +# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor class MyCustomPP(yt_dlp.postprocessor.PostProcessor): + # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info +# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL def my_hook(d): if d['status'] == 'finished': print('Done downloading, now converting ...') +def format_selector(ctx): + """ Select the best video and the best audio that won't result in an mkv. + This is just an example and does not handle all cases """ + + # formats are already sorted worst to best + formats = ctx.get('formats')[::-1] + + # acodec='none' means there is no audio + best_video = next(f for f in formats + if f['vcodec'] != 'none' and f['acodec'] == 'none') + + # find compatible audio extension + audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] + # vcodec='none' means there is no video + best_audio = next(f for f in formats if ( + f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + + yield { + # These are the minimum required fields for a merged format + 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', + 'ext': best_video['ext'], + 'requested_formats': [best_video, best_audio], + # Must be + seperated list of protocols + 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' + } + + +# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { - 'format': 'bestaudio/best', + 'format': format_selector, 'postprocessors': [{ - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - 'preferredquality': '192', + # Embed metadata in video using ffmpeg. + # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts + 'key': 'FFmpegMetadata', + 'add_chapters': True, + 'add_metadata': True, }], 'logger': MyLogger(), 'progress_hooks': [my_hook], @@ -1659,14 +1692,16 @@ ydl_opts = { # Add custom headers yt_dlp.utils.std_headers.update({'Referer': 'https://www.google.com'}) +# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. +# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.add_post_processor(MyCustomPP()) info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') + + # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` -See the public functions in [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py) for other available functions. Eg: `ydl.download`, `ydl.download_with_info_file` - **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2439fc82b..5d6b1d5b2 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -211,6 +211,9 @@ class YoutubeDL(object): simulate: Do not download the video files. If unset (or None), simulate only if listsubtitles, listformats or list_thumbnails is used format: Video format code. see "FORMAT SELECTION" for more details. + You can also pass a function. The function takes 'ctx' as + argument and returns the formats to download. + See "build_format_selector" for an implementation allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually @@ -613,6 +616,7 @@ class YoutubeDL(object): # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( None if self.params.get('format') is None + else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) self._setup_opener() @@ -1927,9 +1931,9 @@ class YoutubeDL(object): 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, 'protocol': '+'.join(map(determine_protocol, formats_info)), - 'language': '+'.join(orderedSet(filtered('language'))), - 'format_note': '+'.join(orderedSet(filtered('format_note'))), - 'filesize_approx': sum(filtered('filesize', 'filesize_approx')), + 'language': '+'.join(orderedSet(filtered('language'))) or None, + 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, + 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } @@ -2357,6 +2361,9 @@ class YoutubeDL(object): info_dict, _ = self.pre_process(info_dict) + # The pre-processors may have modified the formats + formats = info_dict.get('formats', [info_dict]) + if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) if self.params.get('listformats'): -- cgit v1.2.3 From e08a85d86595705126d1304eafd3829e6f3811d0 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 11 Nov 2021 08:00:43 +0530 Subject: Fix writing playlist infojson with `--no-clean-infojson` --- yt_dlp/YoutubeDL.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5d6b1d5b2..4699e58b1 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1583,10 +1583,11 @@ class YoutubeDL(object): if entry is not None] n_entries = len(entries) - if not playlistitems and (playliststart or playlistend): + if not playlistitems and (playliststart != 1 or playlistend): playlistitems = list(range(playliststart, playliststart + n_entries)) ie_result['requested_entries'] = playlistitems + _infojson_written = False if not self.params.get('simulate') and self.params.get('allow_playlist_files', True): ie_copy = { 'playlist': playlist, @@ -1599,8 +1600,9 @@ class YoutubeDL(object): } ie_copy.update(dict(ie_result)) - if self._write_info_json('playlist', ie_result, - self.prepare_filename(ie_copy, 'pl_infojson')) is None: + _infojson_written = self._write_info_json( + 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) + if _infojson_written is None: return if self._write_description('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_description')) is None: @@ -1656,6 +1658,12 @@ class YoutubeDL(object): # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results + + # Write the updated info to json + if _infojson_written and self._write_info_json( + 'updated playlist', ie_result, + self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: + return self.to_screen('[download] Finished downloading playlist: %s' % playlist) return ie_result @@ -3472,8 +3480,10 @@ class YoutubeDL(object): encoding = preferredencoding() return encoding - def _write_info_json(self, label, ie_result, infofn): + def _write_info_json(self, label, ie_result, infofn, overwrite=None): ''' Write infojson and returns True = written, False = skip, None = error ''' + if overwrite is None: + overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): return False elif not infofn: @@ -3481,7 +3491,7 @@ class YoutubeDL(object): return False elif not self._ensure_dir_exists(infofn): return None - elif not self.params.get('overwrites', True) and os.path.exists(infofn): + elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') else: self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') -- cgit v1.2.3 From bf5f605e7674c96d752aabb102cf627f5d7258ae Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 11 Nov 2021 08:44:54 +0530 Subject: bugfix for e08a85d86595705126d1304eafd3829e6f3811d0 --- yt_dlp/YoutubeDL.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4699e58b1..1b3873254 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1506,9 +1506,9 @@ class YoutubeDL(object): raise EntryNotInPlaylist('There are no entries') incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: - def fill_missing_entries(entries, indexes): - ret = [None] * max(*indexes) - for i, entry in zip(indexes, entries): + def fill_missing_entries(entries, indices): + ret = [None] * max(indices) + for i, entry in zip(indices, entries): ret[i - 1] = entry return ret ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries']) @@ -2991,7 +2991,8 @@ class YoutubeDL(object): try: self.__download_wrapper(self.process_ie_result)(info, download=True) except (DownloadError, EntryNotInPlaylist, ThrottledDownload) as e: - self.to_stderr('\r') + if not isinstance(e, EntryNotInPlaylist): + self.to_stderr('\r') webpage_url = info.get('webpage_url') if webpage_url is not None: self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') -- cgit v1.2.3 From c1dc0ee56e0d29cefe6948621d253385fff3e20f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 12 Nov 2021 03:12:53 +0530 Subject: [NovaEmbed] Fix extractor Closes #1570 --- yt_dlp/extractor/nova.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/nova.py b/yt_dlp/extractor/nova.py index 3acb88121..0007b6b12 100644 --- a/yt_dlp/extractor/nova.py +++ b/yt_dlp/extractor/nova.py @@ -10,6 +10,7 @@ from ..utils import ( int_or_none, js_to_json, qualities, + traverse_obj, unified_strdate, url_or_none, ) @@ -17,30 +18,44 @@ from ..utils import ( class NovaEmbedIE(InfoExtractor): _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P[^/?#&]+)' - _TEST = { + _TESTS = [{ 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1', - 'md5': 'ee009bafcc794541570edd44b71cbea3', 'info_dict': { 'id': '8o0n0r', - 'ext': 'mp4', 'title': '2180. díl', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 2578, }, - } + 'params': { + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'expected_warnings': ['DRM protected', 'Requested format is not available'], + }, { + 'url': 'https://media.cms.nova.cz/embed/KybpWYvcgOa', + 'info_dict': { + 'id': 'KybpWYvcgOa', + 'ext': 'mp4', + 'title': 'Borhyová oslavila 60? Soutěžící z pořadu odboural moderátora Ondřeje Sokola', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 114, + }, + 'params': {'skip_download': 'm3u8'}, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + has_drm = False duration = None formats = [] player = self._parse_json( self._search_regex( - r'Player\.init\s*\([^,]+,\s*(?:\w+\s*\?\s*{.+?}\s*:\s*)?({.+})\s*,\s*{.+?}\s*\)\s*;', - webpage, 'player', default='{}'), video_id, fatal=False) + r'Player\.init\s*\([^,]+,(?P\s*\w+\s*\?)?\s*(?P{(?(cndn).+?|.+)})\s*(?(cndn):|,\s*{.+?}\s*\)\s*;)', + webpage, 'player', default='{}', group='json'), video_id, fatal=False) if player: for format_id, format_list in player['tracks'].items(): if not isinstance(format_list, list): @@ -48,6 +63,10 @@ class NovaEmbedIE(InfoExtractor): for format_dict in format_list: if not isinstance(format_dict, dict): continue + if (not self.get_param('allow_unplayable_formats') + and traverse_obj(format_dict, ('drm', 'keySystem'))): + has_drm = True + continue format_url = url_or_none(format_dict.get('src')) format_type = format_dict.get('type') ext = determine_ext(format_url) @@ -104,6 +123,8 @@ class NovaEmbedIE(InfoExtractor): f['format_id'] = f_id formats.append(f) + if not formats and has_drm: + self.report_drm(video_id) self._sort_formats(formats) title = self._og_search_title( -- cgit v1.2.3 From 48e931066091fba7af1c447787685bbf7c889a25 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 12 Nov 2021 03:59:32 +0530 Subject: [nexx] Better error message for unsupported format Related: #1637 --- yt_dlp/extractor/nexx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yt_dlp/extractor/nexx.py b/yt_dlp/extractor/nexx.py index a30108483..8aceebd49 100644 --- a/yt_dlp/extractor/nexx.py +++ b/yt_dlp/extractor/nexx.py @@ -385,8 +385,7 @@ class NexxIE(InfoExtractor): elif cdn == 'free': formats = self._extract_free_formats(video, video_id) else: - # TODO: reverse more cdns - assert False + self.raise_no_formats(f'{cdn} formats are currently not supported', video_id) self._sort_formats(formats) -- cgit v1.2.3 From df03de2c02192e43e5b51c8708619179a268b4cf Mon Sep 17 00:00:00 2001 From: MinePlayersPE Date: Fri, 12 Nov 2021 20:46:19 +0700 Subject: [RoosterTeethSeries] Fix for multiple pages (#1642) Authored by: MinePlayersPE --- yt_dlp/extractor/roosterteeth.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/roosterteeth.py b/yt_dlp/extractor/roosterteeth.py index be796804c..18672b2e3 100644 --- a/yt_dlp/extractor/roosterteeth.py +++ b/yt_dlp/extractor/roosterteeth.py @@ -12,6 +12,7 @@ from ..utils import ( url_or_none, urlencode_postdata, urljoin, + update_url_query, ) @@ -182,6 +183,13 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE): 'id': 'role-initiative', 'title': 'Role Initiative', } + }, { + 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', + 'playlist_mincount': 50, + 'info_dict': { + 'id': 'let-s-play-minecraft-9', + 'title': 'Let\'s Play Minecraft - Season 9', + } }] def _entries(self, series_id, season_number): @@ -192,7 +200,7 @@ class RoosterTeethSeriesIE(RoosterTeethBaseIE): idx = traverse_obj(data, ('attributes', 'number')) if season_number and idx != season_number: continue - season_url = urljoin(self._API_BASE, data['links']['episodes']) + season_url = update_url_query(urljoin(self._API_BASE, data['links']['episodes']), {'per_page': 1000}) season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data'] for episode in season: yield self.url_result( -- cgit v1.2.3 From 92775d8a40728fe045af000755f1c3eeffb2089d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:07:48 +0530 Subject: [CuriosityStream] Fix series Bug indroduced in ed807c18376ecb61c2219b506040bc3e9464bde9 --- yt_dlp/extractor/curiositystream.py | 56 ++++++++++++++++++++++--------------- yt_dlp/extractor/extractors.py | 3 +- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 41c0f845a..628c83631 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -44,7 +44,7 @@ class CuriosityStreamBaseIE(InfoExtractor): 'password': password, })) self._handle_errors(result) - self._auth_token = result['message']['auth_token'] + CuriosityStreamBaseIE._auth_token = result['message']['auth_token'] class CuriosityStreamIE(CuriosityStreamBaseIE): @@ -142,9 +142,26 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): } -class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): - IE_NAME = 'curiositystream:collection' - _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:collections?|series)/(?P\d+)' +class CuriosityStreamCollectionBaseIE(CuriosityStreamBaseIE): + + def _real_extract(self, url): + collection_id = self._match_id(url) + collection = self._call_api(collection_id, collection_id) + entries = [] + for media in collection.get('media', []): + media_id = compat_str(media.get('id')) + media_type, ie = ('series', CuriosityStreamSeriesIE) if media.get('is_collection') else ('video', CuriosityStreamIE) + entries.append(self.url_result( + 'https://curiositystream.com/%s/%s' % (media_type, media_id), + ie=ie.ie_key(), video_id=media_id)) + return self.playlist_result( + entries, collection_id, + collection.get('title'), collection.get('description')) + + +class CuriosityStreamCollectionsIE(CuriosityStreamCollectionBaseIE): + IE_NAME = 'curiositystream:collections' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/collections/(?P\d+)' _API_BASE_URL = 'https://api.curiositystream.com/v2/collections/' _TESTS = [{ 'url': 'https://curiositystream.com/collections/86', @@ -155,7 +172,17 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, 'playlist_mincount': 7, }, { - 'url': 'https://app.curiositystream.com/collection/2', + 'url': 'https://curiositystream.com/collections/36', + 'only_matching': True, + }] + + +class CuriosityStreamSeriesIE(CuriosityStreamCollectionBaseIE): + IE_NAME = 'curiositystream:series' + _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/(?:series|collection)/(?P\d+)' + _API_BASE_URL = 'https://api.curiositystream.com/v2/series/' + _TESTS = [{ + 'url': 'https://curiositystream.com/series/2', 'info_dict': { 'id': '2', 'title': 'Curious Minds: The Internet', @@ -163,23 +190,6 @@ class CuriosityStreamCollectionIE(CuriosityStreamBaseIE): }, 'playlist_mincount': 16, }, { - 'url': 'https://curiositystream.com/series/2', - 'only_matching': True, - }, { - 'url': 'https://curiositystream.com/collections/36', + 'url': 'https://curiositystream.com/collection/2', 'only_matching': True, }] - - def _real_extract(self, url): - collection_id = self._match_id(url) - collection = self._call_api(collection_id, collection_id) - entries = [] - for media in collection.get('media', []): - media_id = compat_str(media.get('id')) - media_type, ie = ('series', CuriosityStreamCollectionIE) if media.get('is_collection') else ('video', CuriosityStreamIE) - entries.append(self.url_result( - 'https://curiositystream.com/%s/%s' % (media_type, media_id), - ie=ie.ie_key(), video_id=media_id)) - return self.playlist_result( - entries, collection_id, - collection.get('title'), collection.get('description')) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4f9de71e2..2eee2a864 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -307,7 +307,8 @@ from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( CuriosityStreamIE, - CuriosityStreamCollectionIE, + CuriosityStreamCollectionsIE, + CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .dailymail import DailyMailIE -- cgit v1.2.3 From 39c04074e7e108bc6e36f3a34ef08a163663144a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:11:33 +0530 Subject: [ExtractAudio] Fix conversion to `wav` Closes #1645 --- yt_dlp/postprocessor/ffmpeg.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 46e87baeb..b2f28d658 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -403,10 +403,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): 'aac': (0.1, 4), 'vorbis': (0, 10), 'libfdk_aac': (1, 5), - 'opus': None, # doesn't support -q:a - 'wav': None, - 'flac': None, - }[codec] + }.get(codec) if not limits: return [] -- cgit v1.2.3 From e339d25a0d0d5de7e237e6ff8c7676aaa2cbb8a8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 15:11:59 +0530 Subject: [youtube] Minor improvement to format sorting --- yt_dlp/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 7bcd6e7dc..3ae0f5a27 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2563,7 +2563,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['quality'] = next(( q(qdict[val]) - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities)) if val in qdict), -1) return True -- cgit v1.2.3 From 7c7f7161fc0d778cd74d8b89162ba9df3d4e5da8 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 17:30:33 +0530 Subject: Fix `--load-info-json` of playlists with failed entries --- yt_dlp/YoutubeDL.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1b3873254..70106db7e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1504,10 +1504,12 @@ class YoutubeDL(object): if 'entries' not in ie_result: raise EntryNotInPlaylist('There are no entries') + + MissingEntry = object() incomplete_entries = bool(ie_result.get('requested_entries')) if incomplete_entries: def fill_missing_entries(entries, indices): - ret = [None] * max(indices) + ret = [MissingEntry] * max(indices) for i, entry in zip(indices, entries): ret[i - 1] = entry return ret @@ -1561,7 +1563,7 @@ class YoutubeDL(object): entry = None try: entry = get_entry(i) - if entry is None: + if entry is MissingEntry: raise EntryNotInPlaylist() except (IndexError, EntryNotInPlaylist): if incomplete_entries: @@ -1655,7 +1657,6 @@ class YoutubeDL(object): self.report_error( 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures)) break - # TODO: skip failed (empty) entries? playlist_results.append(entry_result) ie_result['entries'] = playlist_results -- cgit v1.2.3 From 9ac24e235ea9ef91c711c35b0f793d17ea284a54 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 23:49:14 +0530 Subject: [curiositystream] Add more metadata Closes #1568 --- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/curiositystream.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 5c6e59901..6f0650296 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -342,6 +342,7 @@ class InfoExtractor(object): series, programme or podcast: series: Title of the series or programme the video episode belongs to. + series_id: Id of the series or programme the video episode belongs to, as a unicode string. season: Title of the season the video episode belongs to. season_number: Number of the season the video episode belongs to, as an integer. season_id: Id of the season the video episode belongs to, as a unicode string. diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 628c83631..286a4c6af 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -50,19 +50,23 @@ class CuriosityStreamBaseIE(InfoExtractor): class CuriosityStreamIE(CuriosityStreamBaseIE): IE_NAME = 'curiositystream' _VALID_URL = r'https?://(?:app\.)?curiositystream\.com/video/(?P\d+)' - _TEST = { + _TESTS = [{ 'url': 'https://app.curiositystream.com/video/2', 'info_dict': { 'id': '2', 'ext': 'mp4', 'title': 'How Did You Develop The Internet?', 'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.', + 'channel': 'Curiosity Stream', + 'categories': ['Technology', 'Interview'], + 'average_rating': 96.79, + 'series_id': '2', }, 'params': { # m3u8 download 'skip_download': True, }, - } + }] def _real_extract(self, url): video_id = self._match_id(url) @@ -139,6 +143,10 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): 'duration': int_or_none(media.get('duration')), 'tags': media.get('tags'), 'subtitles': subtitles, + 'channel': media.get('producer'), + 'categories': [media.get('primary_category'), media.get('type')], + 'average_rating': media.get('rating_percentage'), + 'series_id': str(media.get('collection_id') or '') or None, } -- cgit v1.2.3 From d0e6121adf4f82b266c82d7e632f7fe79f05096c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sat, 13 Nov 2021 23:55:12 +0530 Subject: [curiositystream] Fix login Bug from 92775d8a40728fe045af000755f1c3eeffb2089d --- yt_dlp/extractor/curiositystream.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/yt_dlp/extractor/curiositystream.py b/yt_dlp/extractor/curiositystream.py index 286a4c6af..485b6031f 100644 --- a/yt_dlp/extractor/curiositystream.py +++ b/yt_dlp/extractor/curiositystream.py @@ -15,7 +15,6 @@ from ..utils import ( class CuriosityStreamBaseIE(InfoExtractor): _NETRC_MACHINE = 'curiositystream' _auth_token = None - _API_BASE_URL = 'https://api.curiositystream.com/v1/' def _handle_errors(self, result): error = result.get('error', {}).get('message') @@ -39,7 +38,8 @@ class CuriosityStreamBaseIE(InfoExtractor): if email is None: return result = self._download_json( - self._API_BASE_URL + 'login', None, data=urlencode_postdata({ + 'https://api.curiositystream.com/v1/login', None, + note='Logging in', data=urlencode_postdata({ 'email': email, 'password': password, })) @@ -68,12 +68,14 @@ class CuriosityStreamIE(CuriosityStreamBaseIE): }, }] + _API_BASE_URL = 'https://api.curiositystream.com/v1/media/' + def _real_extract(self, url): video_id = self._match_id(url) formats = [] for encoding_format in ('m3u8', 'mpd'): - media = self._call_api('media/' + video_id, video_id, query={ + media = self._call_api(video_id, video_id, query={ 'encodingsNew': 'true', 'encodingsFormat': encoding_format, }) -- cgit v1.2.3 From f279aaee8e246f510e56fe35b163520f35085338 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 01:25:47 +0530 Subject: Add compat-option embed-metadata --- README.md | 1 + yt_dlp/options.py | 2 +- yt_dlp/postprocessor/ffmpeg.py | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7a4ec55bb..1612bda5a 100644 --- a/README.md +++ b/README.md @@ -137,6 +137,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files * `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this +* Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 89401910e..209f199bd 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -278,7 +278,7 @@ def parseOpts(overrideArguments=None): 'allowed_values': { 'filename', 'format-sort', 'abort-on-error', 'format-spec', 'no-playlist-metafiles', 'multistreams', 'no-live-chat', 'playlist-index', 'list-formats', 'no-direct-merge', - 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', + 'no-youtube-channel-redirect', 'no-youtube-unavailable-videos', 'no-attach-info-json', 'embed-metadata', 'embed-thumbnail-atomicparsley', 'seperate-video-versions', 'no-clean-infojson', 'no-keep-subs', }, 'aliases': { 'youtube-dl': ['-multistreams', 'all'], diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index b2f28d658..d6734e8d9 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -721,6 +721,9 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('season_number') add('episode_id', ('episode', 'episode_id')) add('episode_sort', 'episode_number') + if 'embed-metadata' in self.get_param('compat_opts', []): + add('comment', 'description') + metadata.pop('synopsis', None) for key, value in info.items(): if value is not None and key != meta_prefix and key.startswith(meta_prefix): -- cgit v1.2.3 From dac5df5a988a75ed12343e4ee8fcafbc76ae847d Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 04:03:41 +0530 Subject: Add option `--embed-info-json` to embed info-json in mkv Closes #1644 --- yt_dlp/YoutubeDL.py | 8 ++++--- yt_dlp/__init__.py | 10 ++++++++- yt_dlp/options.py | 12 ++++++++++- yt_dlp/postprocessor/ffmpeg.py | 47 +++++++++++++++++++++++++++++++----------- 4 files changed, 60 insertions(+), 17 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 70106db7e..a102ecc32 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -431,7 +431,7 @@ class YoutubeDL(object): compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort - no-clean-infojson, no-playlist-metafiles, no-keep-subs. + no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', @@ -2654,6 +2654,8 @@ class YoutubeDL(object): infofn = self.prepare_filename(info_dict, 'infojson') _infojson_written = self._write_info_json('video', info_dict, infofn) if _infojson_written: + info_dict['infojson_filename'] = infofn + # For backward compatability, even though it was a private field info_dict['__infojson_filename'] = infofn elif _infojson_written is None: return @@ -3012,8 +3014,8 @@ class YoutubeDL(object): keep_keys = ['_type'] # Always keep this to facilitate load-info-json if remove_private_keys: remove_keys |= { - 'requested_formats', 'requested_subtitles', 'requested_entries', - 'filepath', 'entries', 'original_url', 'playlist_autonumber', + 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', + 'filepath', 'infojson_filename', 'original_url', 'playlist_autonumber', } empty_values = (None, {}, [], set(), tuple()) reject = lambda k, v: k not in keep_keys and ( diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index d72e08b35..63b9b6e2f 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -290,6 +290,11 @@ def _real_main(argv=None): set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') + if 'no-attach-info-json' in compat_opts: + if opts.embed_infojson: + _unused_compat_opt('no-attach-info-json') + else: + opts.embed_infojson = False if 'format-sort' in compat_opts: opts.format_sort.extend(InfoExtractor.FormatSort.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) @@ -526,11 +531,14 @@ def _real_main(argv=None): # By default ffmpeg preserves metadata applicable for both # source and target containers. From this point the container won't change, # so metadata can be added here. - if opts.addmetadata or opts.addchapters: + if opts.addmetadata or opts.addchapters or opts.embed_infojson: + if opts.embed_infojson is None: + opts.embed_infojson = 'if_exists' postprocessors.append({ 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, + 'add_infojson': opts.embed_infojson, }) # Note: Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 209f199bd..0843d5ff7 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -1287,7 +1287,9 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-metadata', '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='Embed metadata to the video file. Also adds chapters to file unless --no-add-chapters is used (Alias: --add-metadata)') + help=( + 'Embed metadata to the video file. Also embeds chapters/infojson if present ' + 'unless --no-embed-chapters/--no-embed-info-json are used (Alias: --add-metadata)')) postproc.add_option( '--no-embed-metadata', '--no-add-metadata', action='store_false', dest='addmetadata', @@ -1300,6 +1302,14 @@ def parseOpts(overrideArguments=None): '--no-embed-chapters', '--no-add-chapters', action='store_false', dest='addchapters', help='Do not add chapter markers (default) (Alias: --no-add-chapters)') + postproc.add_option( + '--embed-info-json', + action='store_true', dest='embed_infojson', default=None, + help='Embed the infojson as an attachment to mkv/mka video files') + postproc.add_option( + '--no-embed-info-json', + action='store_false', dest='embed_infojson', + help='Do not embed the infojson as an attachment to the video file') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index d6734e8d9..eacee8ee9 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -28,6 +28,7 @@ from ..utils import ( shell_quote, traverse_obj, variadic, + write_json_file, ) @@ -636,10 +637,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): - def __init__(self, downloader, add_metadata=True, add_chapters=True): + def __init__(self, downloader, add_metadata=True, add_chapters=True, add_infojson='if_exists'): FFmpegPostProcessor.__init__(self, downloader) self._add_metadata = add_metadata self._add_chapters = add_chapters + self._add_infojson = add_infojson @staticmethod def _options(target_ext): @@ -652,13 +654,23 @@ class FFmpegMetadataPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): filename, metadata_filename = info['filepath'], None - options = [] + files_to_delete, options = [], [] if self._add_chapters and info.get('chapters'): metadata_filename = replace_extension(filename, 'meta') options.extend(self._get_chapter_opts(info['chapters'], metadata_filename)) + files_to_delete.append(metadata_filename) if self._add_metadata: options.extend(self._get_metadata_opts(info)) + if self._add_infojson: + if info['ext'] in ('mkv', 'mka'): + infojson_filename = info.get('infojson_filename') + options.extend(self._get_infojson_opts(info, infojson_filename)) + if not infojson_filename: + files_to_delete.append(info.get('infojson_filename')) + elif self._add_infojson is True: + self.to_screen('The info-json can only be attached to mkv/mka files') + if not options: self.to_screen('There isn\'t any metadata to add') return [], info @@ -668,8 +680,8 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) - if metadata_filename: - os.remove(metadata_filename) + for file in filter(None, files_to_delete): + os.remove(file) # Don't obey --keep-files os.replace(temp_filename, filename) return [], info @@ -741,15 +753,26 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) stream_idx += stream_count - if ('no-attach-info-json' not in self.get_param('compat_opts', []) - and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')): - old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') - if old_stream is not None: - yield ('-map', '-0:%d' % old_stream) - new_stream -= 1 + def _get_infojson_opts(self, info, infofn): + if not infofn or not os.path.exists(infofn): + if self._add_infojson is not True: + return + infofn = infofn or '%s.temp' % ( + self._downloader.prepare_filename(info, 'infojson') + or replace_extension(self._downloader.prepare_filename(info), 'info.json', info['ext'])) + if not self._downloader._ensure_dir_exists(infofn): + return + self.write_debug(f'Writing info-json to: {infofn}') + write_json_file(self._downloader.sanitize_info(info, self.get_param('clean_infojson', True)), infofn) + info['infojson_filename'] = infofn + + old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') + if old_stream is not None: + yield ('-map', '-0:%d' % old_stream) + new_stream -= 1 - yield ('-attach', info['__infojson_filename'], - '-metadata:s:%d' % new_stream, 'mimetype=application/json') + yield ('-attach', infofn, + '-metadata:s:%d' % new_stream, 'mimetype=application/json') class FFmpegMergerPP(FFmpegPostProcessor): -- cgit v1.2.3 From 013b50b7949563e445936302d6e486bab7100018 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 15 Nov 2021 04:50:11 +0530 Subject: Fix 'postprocessor_hooks` Closes #1650 --- yt_dlp/YoutubeDL.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index a102ecc32..197ec11e6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -633,11 +633,14 @@ class YoutubeDL(object): pp = pp_class(self, **compat_kwargs(pp_def)) self.add_post_processor(pp, when=when) - for ph in self.params.get('post_hooks', []): - self.add_post_hook(ph) - - for ph in self.params.get('progress_hooks', []): - self.add_progress_hook(ph) + hooks = { + 'post_hooks': self.add_post_hook, + 'progress_hooks': self.add_progress_hook, + 'postprocessor_hooks': self.add_postprocessor_hook, + } + for opt, fn in hooks.items(): + for ph in self.params.get(opt, []): + fn(ph) register_socks_protocols() -- cgit v1.2.3 From d0d012d4e79cd1420e96ce5c3d509771110d3ea1 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Tue, 16 Nov 2021 14:22:01 +1300 Subject: [youtube] Add `default` player client (#1685) Authored-by: coletdjnz --- README.md | 2 +- yt_dlp/extractor/youtube.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1612bda5a..96f5d7ecb 100644 --- a/README.md +++ b/README.md @@ -1552,7 +1552,7 @@ The following extractors use this feature: #### youtube * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests -* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients +* `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `include_live_dash`: Include live dash formats (These formats don't download properly) * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 3ae0f5a27..203f4a92a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2339,18 +2339,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_requested_clients(self, url, smuggled_data): requested_clients = [] + default = ['android', 'web'] allowed_clients = sorted( [client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'], key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): if client in allowed_clients: requested_clients.append(client) + elif client == 'default': + requested_clients.extend(default) elif client == 'all': requested_clients.extend(allowed_clients) else: self.report_warning(f'Skipping unsupported client {client}') if not requested_clients: - requested_clients = ['android', 'web'] + requested_clients = default if smuggled_data.get('is_music_url') or self.is_music_url(url): requested_clients.extend( -- cgit v1.2.3 From d8cf8d97a8dbc9602556de474af133b5ab0e0a29 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 16 Nov 2021 21:14:02 +0530 Subject: [utils] Fix `PagedList` --- yt_dlp/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index f07eef61f..a9e066257 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4179,7 +4179,9 @@ class PagedList: self._cache = {} def getpage(self, pagenum): - page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum)) + page_results = self._cache.get(pagenum) + if page_results is None: + page_results = list(self._pagefunc(pagenum)) if self._use_cache: self._cache[pagenum] = page_results return page_results @@ -4195,7 +4197,9 @@ class PagedList: if not isinstance(idx, int) or idx < 0: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) - return entries[0] if entries else None + if not entries: + raise IndexError() + return entries[0] class OnDemandPagedList(PagedList): -- cgit v1.2.3 From 720c309932ea6724223d0a6b7781a0e92a74262c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 17 Nov 2021 01:26:23 +0530 Subject: [youtube] Add storyboard formats Closes: #1553, https://github.com/ytdl-org/youtube-dl/issues/9868 Related: https://github.com/ytdl-org/youtube-dl/pull/14951 --- yt_dlp/extractor/youtube.py | 53 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 203f4a92a..41e7fce10 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -9,6 +9,7 @@ import datetime import hashlib import itertools import json +import math import os.path import random import re @@ -28,6 +29,7 @@ from ..compat import ( ) from ..jsinterp import JSInterpreter from ..utils import ( + bug_reports_message, bytes_to_intlist, clean_html, datetime_from_str, @@ -66,6 +68,10 @@ from ..utils import ( ) +def get_first(obj, keys, **kwargs): + return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) + + # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -2586,6 +2592,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) yield f + def _extract_storyboard(self, player_responses, duration): + spec = get_first( + player_responses, ('storyboards', 'playerStoryboardSpecRenderer', 'spec'), default='').split('|')[::-1] + if not spec: + return + base_url = spec.pop() + L = len(spec) - 1 + for i, args in enumerate(spec): + args = args.split('#') + counts = list(map(int_or_none, args[:5])) + if len(args) != 8 or not all(counts): + self.report_warning(f'Malformed storyboard {i}: {"#".join(args)}{bug_reports_message()}') + continue + width, height, frame_count, cols, rows = counts + N, sigh = args[6:] + + url = base_url.replace('$L', str(L - i)).replace('$N', N) + f'&sigh={sigh}' + fragment_count = frame_count / (cols * rows) + fragment_duration = duration / fragment_count + yield { + 'format_id': f'sb{i}', + 'format_note': 'storyboard', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'acodec': 'none', + 'vcodec': 'none', + 'url': url, + 'width': width, + 'height': height, + 'fragments': [{ + 'path': url.replace('$M', str(j)), + 'duration': min(fragment_duration, duration - (j * fragment_duration)), + } for j in range(math.ceil(fragment_count))], + } + def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) video_id = self._match_id(url) @@ -2603,8 +2644,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._get_requested_clients(url, smuggled_data), video_id, webpage, master_ytcfg) - get_first = lambda obj, keys, **kwargs: traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - playability_statuses = traverse_obj( player_responses, (..., 'playabilityStatus'), expected_type=dict, default=[]) @@ -2700,10 +2739,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if reason: self.raise_no_formats(reason, expected=True) - # Source is given priority since formats that throttle are given lower source_preference - # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) - keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: keywords = [ @@ -2791,6 +2826,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not duration and live_endtime and live_starttime: duration = live_endtime - live_starttime + formats.extend(self._extract_storyboard(player_responses, duration)) + + # Source is given priority since formats that throttle are given lower source_preference + # When throttling issue is fully fixed, remove this + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto')) + info = { 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, -- cgit v1.2.3 From 450bdf69bc080d882cb4db26cde8c2f9681b7e18 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:27:50 +0530 Subject: [OneFootball] Add extractor (#1613) Closes: #1598 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/onefootball.py | 51 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/onefootball.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2eee2a864..a60e27186 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1000,6 +1000,7 @@ from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE from .ondemandkorea import OnDemandKoreaIE +from .onefootball import OneFootballIE from .onet import ( OnetIE, OnetChannelIE, diff --git a/yt_dlp/extractor/onefootball.py b/yt_dlp/extractor/onefootball.py new file mode 100644 index 000000000..79501003d --- /dev/null +++ b/yt_dlp/extractor/onefootball.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class OneFootballIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?onefootball\.com/[a-z]{2}/video/[^/&?#]+-(?P\d+)' + + _TESTS = [{ + 'url': 'https://onefootball.com/en/video/highlights-fc-zuerich-3-3-fc-basel-34012334', + 'info_dict': { + 'id': '34012334', + 'ext': 'mp4', + 'title': 'Highlights: FC Zürich 3-3 FC Basel', + 'description': 'md5:33d9855cb790702c4fe42a513700aba8', + 'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34012334', + 'timestamp': 1635874604, + 'upload_date': '20211102' + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://onefootball.com/en/video/klopp-fumes-at-var-decisions-in-west-ham-defeat-34041020', + 'info_dict': { + 'id': '34041020', + 'ext': 'mp4', + 'title': 'Klopp fumes at VAR decisions in West Ham defeat', + 'description': 'md5:9c50371095a01ad3f63311c73d8f51a5', + 'thumbnail': 'https://photobooth-api.onefootball.com/api/screenshot/https:%2F%2Fperegrine-api.onefootball.com%2Fv2%2Fphotobooth%2Fcms%2Fen%2F34041020', + 'timestamp': 1636314103, + 'upload_date': '20211107' + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._search_json_ld(webpage, id) + m3u8_url = self._html_search_regex(r'(https://cdn\.jwplayer\.com/manifests/.+\.m3u8)', webpage, 'm3u8_url') + formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, id) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'description': data_json.get('description'), + 'thumbnail': data_json.get('thumbnail'), + 'timestamp': data_json.get('timestamp'), + 'formats': formats, + 'subtitles': subtitles, + } -- cgit v1.2.3 From 266a1b5d52d4a48a966d0a0b6286ca2740482409 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:28:51 +0530 Subject: [ESPNCricInfo] Add extractor (#1652) Closes: #1635 Authored by: Ashish0804 --- yt_dlp/extractor/espn.py | 43 ++++++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 44 insertions(+) diff --git a/yt_dlp/extractor/espn.py b/yt_dlp/extractor/espn.py index d4a66c29f..dc50f3b8b 100644 --- a/yt_dlp/extractor/espn.py +++ b/yt_dlp/extractor/espn.py @@ -7,7 +7,9 @@ from .once import OnceIE from ..compat import compat_str from ..utils import ( determine_ext, + dict_get, int_or_none, + unified_strdate, unified_timestamp, ) @@ -236,3 +238,44 @@ class FiveThirtyEightIE(InfoExtractor): webpage, 'embed url') return self.url_result(embed_url, 'AbcNewsVideo') + + +class ESPNCricInfoIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?espncricinfo\.com/video/[^#$&?/]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://www.espncricinfo.com/video/finch-chasing-comes-with-risks-despite-world-cup-trend-1289135', + 'info_dict': { + 'id': '1289135', + 'ext': 'mp4', + 'title': 'Finch: Chasing comes with \'risks\' despite World Cup trend', + 'description': 'md5:ea32373303e25efbb146efdfc8a37829', + 'upload_date': '20211113', + 'duration': 96, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + data_json = self._download_json(f'https://hs-consumer-api.espncricinfo.com/v1/pages/video/video-details?videoId={id}', id)['video'] + formats, subtitles = [], {} + for item in data_json.get('playbacks') or []: + if item.get('type') == 'HLS' and item.get('url'): + m3u8_frmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(item['url'], id) + formats.extend(m3u8_frmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + elif item.get('type') == 'AUDIO' and item.get('url'): + formats.append({ + 'url': item['url'], + 'vcodec': 'none', + }) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title'), + 'description': data_json.get('summary'), + 'upload_date': unified_strdate(dict_get(data_json, ('publishedAt', 'recordedAt'))), + 'duration': data_json.get('duration'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a60e27186..a3674d836 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -418,6 +418,7 @@ from .espn import ( ESPNIE, ESPNArticleIE, FiveThirtyEightIE, + ESPNCricInfoIE, ) from .esri import EsriVideoIE from .europa import EuropaIE -- cgit v1.2.3 From 9d63137eac4a5753dae775712599dc5c7adb0e8c Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:29:53 +0530 Subject: [CanalAlpha] Add extractor (#1655) Closes: #1528 Authored by: Ashish0804 --- yt_dlp/extractor/canalalpha.py | 98 ++++++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 99 insertions(+) create mode 100644 yt_dlp/extractor/canalalpha.py diff --git a/yt_dlp/extractor/canalalpha.py b/yt_dlp/extractor/canalalpha.py new file mode 100644 index 000000000..7287677c1 --- /dev/null +++ b/yt_dlp/extractor/canalalpha.py @@ -0,0 +1,98 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + clean_html, + dict_get, + try_get, + unified_strdate, +) + + +class CanalAlphaIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?canalalpha\.ch/play/[^/]+/[^/]+/(?P\d+)/?.*' + + _TESTS = [{ + 'url': 'https://www.canalalpha.ch/play/le-journal/episode/24520/jeudi-28-octobre-2021', + 'info_dict': { + 'id': '24520', + 'ext': 'mp4', + 'title': 'Jeudi 28 octobre 2021', + 'description': 'md5:d30c6c3e53f8ad40d405379601973b30', + 'thumbnail': 'https://static.canalalpha.ch/poster/journal/journal_20211028.jpg', + 'upload_date': '20211028', + 'duration': 1125, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/le-journal/topic/24512/la-poste-fait-de-neuchatel-un-pole-cryptographique', + 'info_dict': { + 'id': '24512', + 'ext': 'mp4', + 'title': 'La Poste fait de Neuchâtel un pôle cryptographique', + 'description': 'md5:4ba63ae78a0974d1a53d6703b6e1dedf', + 'thumbnail': 'https://static.canalalpha.ch/poster/news/news_39712.jpg', + 'upload_date': '20211028', + 'duration': 138, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/eureka/episode/24484/ces-innovations-qui-veulent-rendre-lagriculture-plus-durable', + 'info_dict': { + 'id': '24484', + 'ext': 'mp4', + 'title': 'Ces innovations qui veulent rendre l’agriculture plus durable', + 'description': 'md5:3de3f151180684621e85be7c10e4e613', + 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_10236.jpg', + 'upload_date': '20211026', + 'duration': 360, + }, + 'params': {'skip_download': True} + }, { + 'url': 'https://www.canalalpha.ch/play/avec-le-temps/episode/23516/redonner-de-leclat-grace-au-polissage', + 'info_dict': { + 'id': '23516', + 'ext': 'mp4', + 'title': 'Redonner de l\'éclat grâce au polissage', + 'description': 'md5:0d8fbcda1a5a4d6f6daa3165402177e1', + 'thumbnail': 'https://static.canalalpha.ch/poster/magazine/magazine_9990.png', + 'upload_date': '20210726', + 'duration': 360, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + id = self._match_id(url) + webpage = self._download_webpage(url, id) + data_json = self._parse_json(self._search_regex( + r'window\.__SERVER_STATE__\s?=\s?({(?:(?!};)[^"]|"([^"]|\\")*")+})\s?;', + webpage, 'data_json'), id)['1']['data']['data'] + manifests = try_get(data_json, lambda x: x['video']['manifests'], expected_type=dict) or {} + subtitles = {} + formats = [{ + 'url': video['$url'], + 'ext': 'mp4', + 'width': try_get(video, lambda x: x['res']['width'], expected_type=int), + 'height': try_get(video, lambda x: x['res']['height'], expected_type=int), + } for video in try_get(data_json, lambda x: x['video']['mp4'], expected_type=list) or [] if video.get('$url')] + if manifests.get('hls'): + m3u8_frmts, m3u8_subs = self._parse_m3u8_formats_and_subtitles(manifests['hls'], id) + formats.extend(m3u8_frmts) + subtitles = self._merge_subtitles(subtitles, m3u8_subs) + if manifests.get('dash'): + dash_frmts, dash_subs = self._parse_mpd_formats_and_subtitles(manifests['dash'], id) + formats.extend(dash_frmts) + subtitles = self._merge_subtitles(subtitles, dash_subs) + self._sort_formats(formats) + return { + 'id': id, + 'title': data_json.get('title').strip(), + 'description': clean_html(dict_get(data_json, ('longDesc', 'shortDesc'))), + 'thumbnail': data_json.get('poster'), + 'upload_date': unified_strdate(dict_get(data_json, ('webPublishAt', 'featuredAt', 'diffusionDate'))), + 'duration': try_get(data_json, lambda x: x['video']['duration'], expected_type=int), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a3674d836..2c0a885b9 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -193,6 +193,7 @@ from .camdemy import ( ) from .cammodels import CamModelsIE from .camwithher import CamWithHerIE +from .canalalpha import CanalAlphaIE from .canalplus import CanalplusIE from .canalc2 import Canalc2IE from .canvas import ( -- cgit v1.2.3 From 525d9e0c7d4e8e1ad121d75f14ae40e8ee023079 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:30:48 +0530 Subject: [HotStar] Set language field from tags (#1700) Authored by: Ashish0804 --- yt_dlp/extractor/hotstar.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/yt_dlp/extractor/hotstar.py b/yt_dlp/extractor/hotstar.py index 12e6c53d4..0bdf772a1 100644 --- a/yt_dlp/extractor/hotstar.py +++ b/yt_dlp/extractor/hotstar.py @@ -230,6 +230,11 @@ class HotStarIE(HotStarBaseIE): if tags and 'encryption:plain' not in tags: for f in current_formats: f['has_drm'] = True + if tags and 'language' in tags: + lang = re.search(r'language:(?P[a-z]+)', tags).group('lang') + for f in current_formats: + if not f.get('langauge'): + f['language'] = lang formats.extend(current_formats) subs = self._merge_subtitles(subs, current_subs) if not formats and geo_restricted: -- cgit v1.2.3 From 11852843e738bfdb01e1c65d3466629dc9645813 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:43:39 +0530 Subject: [AmazonStoreIE] Fix regex to not match vdp urls (#1699) Closes: #1698 Authored by: Ashish0804 --- yt_dlp/extractor/amazon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/amazon.py b/yt_dlp/extractor/amazon.py index 01d6f2a54..7c5d35f47 100644 --- a/yt_dlp/extractor/amazon.py +++ b/yt_dlp/extractor/amazon.py @@ -4,7 +4,7 @@ from ..utils import int_or_none class AmazonStoreIE(InfoExtractor): - _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/[^/]*/?(?:dp|gp/product)/(?P[^/&#$?]+)' + _VALID_URL = r'(?:https?://)(?:www\.)?amazon\.(?:[a-z]{2,3})(?:\.[a-z]{2})?/(?:[^/]+/)?(?:dp|gp/product)/(?P[^/&#$?]+)' _TESTS = [{ 'url': 'https://www.amazon.co.uk/dp/B098XNCHLD/', -- cgit v1.2.3 From 61be785a6700be8b9e064572ddfb6546b20cb8f9 Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:20:45 +0000 Subject: [peer.tv] Add extractor (#1499) Closes #1388 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/peertv.py | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 yt_dlp/extractor/peertv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2c0a885b9..458e6e2c8 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1063,6 +1063,7 @@ from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, ) +from .peertv import PeerTVIE from .peloton import ( PelotonIE, PelotonLiveIE diff --git a/yt_dlp/extractor/peertv.py b/yt_dlp/extractor/peertv.py new file mode 100644 index 000000000..002d33a88 --- /dev/null +++ b/yt_dlp/extractor/peertv.py @@ -0,0 +1,57 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import js_to_json + + +class PeerTVIE(InfoExtractor): + IE_NAME = 'peer.tv' + _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P\d+)' + _TESTS = [{ + 'url': 'https://www.peer.tv/de/841', + 'info_dict': { + 'id': '841', + 'ext': 'mp4', + 'title': 'Die Brunnenburg', + 'description': 'md5:4395f6142b090338340ab88a3aae24ed', + }, + }, { + 'url': 'https://www.peer.tv/it/404', + 'info_dict': { + 'id': '404', + 'ext': 'mp4', + 'title': 'Cascate di ghiaccio in Val Gardena', + 'description': 'md5:e8e5907f236171842674e8090e3577b8', + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key') + + js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id, + headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id') + + session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id') + + player_webpage = self._download_webpage( + f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1', + video_id, note='Downloading player webpage') + + m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url') + m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json) + + formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._html_search_regex(r'

(.+?)

', webpage, 'title').replace('\xa0', ' '), + 'formats': formats, + 'description': self._html_search_meta(('og:description', 'description'), webpage), + 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage) + } -- cgit v1.2.3 From 22a510ff447a5d0e4c023b810d434611521b777c Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi Date: Fri, 19 Nov 2021 06:43:22 +0900 Subject: [mixch] add support for mixch.tv (#1586) Authored by: nao20010128nao --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/mixch.py | 55 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 yt_dlp/extractor/mixch.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 458e6e2c8..200c59bbe 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -795,6 +795,7 @@ from .mirrativ import ( ) from .mit import TechTVMITIE, OCWMITIE from .mitele import MiTeleIE +from .mixch import MixchIE from .mixcloud import ( MixcloudIE, MixcloudUserIE, diff --git a/yt_dlp/extractor/mixch.py b/yt_dlp/extractor/mixch.py new file mode 100644 index 000000000..a99ddd172 --- /dev/null +++ b/yt_dlp/extractor/mixch.py @@ -0,0 +1,55 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + traverse_obj, +) + + +class MixchIE(InfoExtractor): + IE_NAME = 'mixch' + _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P\d+)' + + TESTS = [{ + 'url': 'https://mixch.tv/u/16236849/live', + 'skip': 'don\'t know if this live persists', + 'info_dict': { + 'id': '16236849', + 'title': '24配信シェア⭕️投票🙏💦', + 'comment_count': 13145, + 'view_count': 28348, + 'timestamp': 1636189377, + 'uploader': '🦥伊咲👶🏻#フレアワ', + 'uploader_id': '16236849', + } + }, { + 'url': 'https://mixch.tv/u/16137876/live', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id) + + initial_js_state = self._parse_json(self._search_regex( + r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id) + if not initial_js_state.get('liveInfo'): + raise ExtractorError('Livestream has ended.', expected=True) + + return { + 'id': video_id, + 'title': traverse_obj(initial_js_state, ('liveInfo', 'title')), + 'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')), + 'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')), + 'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')), + 'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')), + 'uploader_id': video_id, + 'formats': [{ + 'format_id': 'hls', + 'url': traverse_obj(initial_js_state, ('liveInfo', 'hls')) or 'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_%s.m3u8' % video_id, + 'ext': 'mp4', + 'protocol': 'm3u8', + }], + 'is_live': True, + } -- cgit v1.2.3 From 402cd603a40c2115413f914ebb4dd43d9bf2449a Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Thu, 18 Nov 2021 21:57:40 +0000 Subject: [LinkedIn] Add extractor (#1597) Closes #1206 Authored by: u-spec-png --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/linkedin.py | 105 ++++++++++++++++++++++++++++++----------- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 200c59bbe..106006671 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -704,6 +704,7 @@ from .line import ( LineLiveChannelIE, ) from .linkedin import ( + LinkedInIE, LinkedInLearningIE, LinkedInLearningCourseIE, ) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index c2d347efd..9255b3301 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -6,21 +6,56 @@ import re from .common import InfoExtractor from ..utils import ( + clean_html, + extract_attributes, ExtractorError, float_or_none, + get_element_by_class, int_or_none, srt_subtitles_timecode, + strip_or_none, + mimetype2ext, try_get, urlencode_postdata, urljoin, ) -class LinkedInLearningBaseIE(InfoExtractor): +class LinkedInBaseIE(InfoExtractor): _NETRC_MACHINE = 'linkedin' - _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' _logged_in = False + def _real_initialize(self): + if self._logged_in: + return + email, password = self._get_login_info() + if email is None: + return + + login_page = self._download_webpage( + self._LOGIN_URL, None, 'Downloading login page') + action_url = urljoin(self._LOGIN_URL, self._search_regex( + r']+action=(["\'])(?P.+?)\1', login_page, 'post url', + default='https://www.linkedin.com/uas/login-submit', group='url')) + data = self._hidden_inputs(login_page) + data.update({ + 'session_key': email, + 'session_password': password, + }) + login_submit_page = self._download_webpage( + action_url, None, 'Logging in', + data=urlencode_postdata(data)) + error = self._search_regex( + r']+class="error"[^>]*>\s*(.+?)\s*', + login_submit_page, 'error', default=None) + if error: + raise ExtractorError(error, expected=True) + LinkedInBaseIE._logged_in = True + + +class LinkedInLearningBaseIE(LinkedInBaseIE): + _LOGIN_URL = 'https://www.linkedin.com/uas/login?trk=learning' + def _call_api(self, course_slug, fields, video_slug=None, resolution=None): query = { 'courseSlug': course_slug, @@ -52,32 +87,47 @@ class LinkedInLearningBaseIE(InfoExtractor): def _get_video_id(self, video_data, course_slug, video_slug): return self._get_urn_id(video_data) or '%s/%s' % (course_slug, video_slug) - def _real_initialize(self): - if self._logged_in: - return - email, password = self._get_login_info() - if email is None: - return - login_page = self._download_webpage( - self._LOGIN_URL, None, 'Downloading login page') - action_url = urljoin(self._LOGIN_URL, self._search_regex( - r']+action=(["\'])(?P.+?)\1', login_page, 'post url', - default='https://www.linkedin.com/uas/login-submit', group='url')) - data = self._hidden_inputs(login_page) - data.update({ - 'session_key': email, - 'session_password': password, - }) - login_submit_page = self._download_webpage( - action_url, None, 'Logging in', - data=urlencode_postdata(data)) - error = self._search_regex( - r']+class="error"[^>]*>\s*(.+?)\s*', - login_submit_page, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - LinkedInLearningBaseIE._logged_in = True +class LinkedInIE(LinkedInBaseIE): + _VALID_URL = r'https?://(?:www\.)?linkedin\.com/posts/.+?(?P\d+)' + _TESTS = [{ + 'url': 'https://www.linkedin.com/posts/mishalkhawaja_sendinblueviews-toronto-digitalmarketing-ugcPost-6850898786781339649-mM20', + 'info_dict': { + 'id': '6850898786781339649', + 'ext': 'mp4', + 'title': 'Mishal K. on LinkedIn: #sendinblueviews #toronto #digitalmarketing', + 'description': 'md5:be125430bab1c574f16aeb186a4d5b19', + 'creator': 'Mishal K.' + }, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex(r'([^<]+)', webpage, 'title') + description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) + like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) + creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) + + sources = self._parse_json(extract_attributes(self._search_regex(r'(]+>)', webpage, 'video'))['data-sources'], video_id) + formats = [{ + 'url': source['src'], + 'ext': mimetype2ext(source.get('type')), + 'tbr': float_or_none(source.get('data-bitrate'), scale=1000), + } for source in sources] + + self._sort_formats(formats) + + return { + 'id': video_id, + 'formats': formats, + 'title': title, + 'like_count': like_count, + 'creator': creator, + 'thumbnail': self._og_search_thumbnail(webpage), + 'description': description, + } class LinkedInLearningIE(LinkedInLearningBaseIE): @@ -108,7 +158,6 @@ class LinkedInLearningIE(LinkedInLearningBaseIE): def _real_extract(self, url): course_slug, video_slug = self._match_valid_url(url).groups() - video_data = None formats = [] for width, height in ((640, 360), (960, 540), (1280, 720)): video_data = self._call_api( -- cgit v1.2.3 From cfcaf64a4b10400964606804085eb975cfd2a401 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Fri, 19 Nov 2021 06:14:38 +0800 Subject: [rtrfm] Add extractor (#1628) Authored by: pabs3 --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/rtrfm.py | 67 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 yt_dlp/extractor/rtrfm.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 106006671..89c61312d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1235,6 +1235,7 @@ from .rtl2 import ( RTL2YouSeriesIE, ) from .rtp import RTPIE +from .rtrfm import RTRFMIE from .rts import RTSIE from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE from .rtvnh import RTVNHIE diff --git a/yt_dlp/extractor/rtrfm.py b/yt_dlp/extractor/rtrfm.py new file mode 100644 index 000000000..93d51e8ed --- /dev/null +++ b/yt_dlp/extractor/rtrfm.py @@ -0,0 +1,67 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class RTRFMIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P[^/?\#&]+)' + _TESTS = [ + { + 'url': 'https://rtrfm.com.au/shows/breakfast/', + 'md5': '46168394d3a5ce237cf47e85d0745413', + 'info_dict': { + 'id': 'breakfast-2021-11-16', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + 'skip': 'ID and md5 changes daily', + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', + 'md5': '396bedf1e40f96c62b30d4999202a790', + 'info_dict': { + 'id': 'breakfast-2021-11-11', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2021-11-11', + 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', + }, + }, + { + 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', + 'md5': '594027f513ec36a24b15d65007a24dff', + 'info_dict': { + 'id': 'breakfast-2020-06-01', + 'ext': 'mp3', + 'series': 'Breakfast with Taylah', + 'title': 'Breakfast with Taylah 2020-06-01', + 'description': r're:^Breakfast with Taylah ', + }, + 'skip': 'This audio has expired', + }, + ] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + show, date, title = self._search_regex( + r'''\.playShow(?:From)?\(['"](?P[^'"]+)['"],\s*['"](?P[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P[^'"]+)['"]''', + webpage, 'details', group=('show', 'date', 'title')) + url = self._download_json( + 'https://restreams.rtrfm.com.au/rzz', + show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] + # This is the only indicator of an error until trying to download the URL and + # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). + if '.mp4' in url: + url = None + self.raise_no_formats('Expired or no episode on this date', expected=True) + return { + 'id': '%s-%s' % (show, date), + 'title': '%s %s' % (title, date), + 'series': title, + 'url': url, + 'release_date': date, + 'description': self._og_search_description(webpage), + } -- cgit v1.2.3 From 764f5de2f48a523394558b10006b97cd0b6c7acf Mon Sep 17 00:00:00 2001 From: Paul Wise <pabs3@bonedaddy.net> Date: Fri, 19 Nov 2021 06:15:41 +0800 Subject: [blogger] Add extractor (#1629) Authored by: pabs3 --- yt_dlp/extractor/blogger.py | 54 ++++++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/generic.py | 17 +++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 yt_dlp/extractor/blogger.py diff --git a/yt_dlp/extractor/blogger.py b/yt_dlp/extractor/blogger.py new file mode 100644 index 000000000..dba131cb0 --- /dev/null +++ b/yt_dlp/extractor/blogger.py @@ -0,0 +1,54 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from ..utils import ( + mimetype2ext, + parse_duration, + parse_qs, + str_or_none, + traverse_obj, +) +from .common import InfoExtractor + + +class BloggerIE(InfoExtractor): + IE_NAME = 'blogger.com' + _VALID_URL = r'https?://(?:www\.)?blogger\.com/video\.g\?token=(?P<id>.+)' + _VALID_EMBED = r'''<iframe[^>]+src=["']((?:https?:)?//(?:www\.)?blogger\.com/video\.g\?token=[^"']+)["']''' + _TESTS = [{ + 'url': 'https://www.blogger.com/video.g?token=AD6v5dzEe9hfcARr5Hlq1WTkYy6t-fXH3BBahVhGvVHe5szdEUBEloSEDSTA8-b111089KbfWuBvTN7fnbxMtymsHhXAXwVvyzHH4Qch2cfLQdGxKQrrEuFpC1amSl_9GuLWODjPgw', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'title': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'thumbnail': r're:^https?://.*', + 'duration': 76.068, + } + }] + + @staticmethod + def _extract_urls(webpage): + return re.findall(BloggerIE._VALID_EMBED, webpage) + + def _real_extract(self, url): + token_id = self._match_id(url) + webpage = self._download_webpage(url, token_id) + data_json = self._search_regex(r'var\s+VIDEO_CONFIG\s*=\s*(\{.*)', webpage, 'JSON data') + data = self._parse_json(data_json.encode('utf-8').decode('unicode_escape'), token_id) + streams = data['streams'] + formats = [{ + 'ext': mimetype2ext(traverse_obj(parse_qs(stream['play_url']), ('mime', 0))), + 'url': stream['play_url'], + 'format_id': str_or_none(stream.get('format_id')), + } for stream in streams] + + return { + 'id': data.get('iframe_id', token_id), + 'title': data.get('iframe_id', token_id), + 'formats': formats, + 'thumbnail': data.get('thumbnail'), + 'duration': parse_duration(traverse_obj(parse_qs(streams[0]['play_url']), ('dur', 0))), + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 89c61312d..75cb0b2ab 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -166,6 +166,7 @@ from .bleacherreport import ( BleacherReportIE, BleacherReportCMSIE, ) +from .blogger import BloggerIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 3374c1c20..d6631e2f3 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -136,6 +136,7 @@ from .medialaan import MedialaanIE from .simplecast import SimplecastIE from .wimtv import WimTVIE from .tvp import TVPEmbedIE +from .blogger import BloggerIE class GenericIE(InfoExtractor): @@ -2173,6 +2174,17 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + { + # blogger embed + 'url': 'https://blog.tomeuvizoso.net/2019/01/a-panfrost-milestone.html', + 'md5': 'f1bc19b6ea1b0fd1d81e84ca9ec467ac', + 'info_dict': { + 'id': 'BLOGGER-video-3c740e3a49197e16-796', + 'ext': 'mp4', + 'title': 'Blogger', + 'thumbnail': r're:^https?://.*', + }, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -3216,6 +3228,11 @@ class GenericIE(InfoExtractor): if onionstudios_url: return self.url_result(onionstudios_url) + # Look for Blogger embeds + blogger_urls = BloggerIE._extract_urls(webpage) + if blogger_urls: + return self.playlist_from_matches(blogger_urls, video_id, video_title, ie=BloggerIE.ie_key()) + # Look for ViewLift embeds viewlift_url = ViewLiftEmbedIE._extract_url(webpage) if viewlift_url: -- cgit v1.2.3 From c6118ca2ccf41663e14f353a6f7e6a306525e190 Mon Sep 17 00:00:00 2001 From: zulaport <70630440+zulaport@users.noreply.github.com> Date: Thu, 18 Nov 2021 14:45:13 -0800 Subject: [Stripchat] Add extractor (#1668) Authored by: zulaport --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/stripchat.py | 66 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 yt_dlp/extractor/stripchat.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 75cb0b2ab..6bad1f40c 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1407,6 +1407,7 @@ from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE +from .stripchat import StripchatIE from .stv import STVPlayerIE from .sunporno import SunPornoIE from .sverigesradio import ( diff --git a/yt_dlp/extractor/stripchat.py b/yt_dlp/extractor/stripchat.py new file mode 100644 index 000000000..efd0afc75 --- /dev/null +++ b/yt_dlp/extractor/stripchat.py @@ -0,0 +1,66 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, +) +from ..utils import ( + ExtractorError, + lowercase_escape, + try_get, +) + + +class StripchatIE(InfoExtractor): + _VALID_URL = r'https?://stripchat\.com/(?P<id>[0-9A-Za-z-_]+)' + _TESTS = [{ + 'url': 'https://stripchat.com/feel_me', + 'info_dict': { + 'id': 'feel_me', + 'ext': 'mp4', + 'title': 're:^feel_me [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + 'description': str, + 'is_live': True, + 'age_limit': 18, + }, + 'skip': 'Room is offline', + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage( + 'https://stripchat.com/%s/' % video_id, video_id, + headers=self.geo_verification_headers()) + + data = self._parse_json( + self._search_regex( + r'<script\b[^>]*>\s*window\.__PRELOADED_STATE__\s*=(?P<value>.*?)<\/script>', + webpage, 'data', default='{}', group='value'), + video_id, transform_source=lowercase_escape, fatal=False) + if not data: + raise ExtractorError('Unable to find configuration for stream.') + + if try_get(data, lambda x: x['viewCam']['show'], dict): + raise ExtractorError('Model is in private show', expected=True) + elif not try_get(data, lambda x: x['viewCam']['model']['isLive'], bool): + raise ExtractorError('Model is offline', expected=True) + + server = try_get(data, lambda x: x['viewCam']['viewServers']['flashphoner-hls'], compat_str) + host = try_get(data, lambda x: x['config']['data']['hlsStreamHost'], compat_str) + model_id = try_get(data, lambda x: x['viewCam']['model']['id'], int) + + formats = self._extract_m3u8_formats( + 'https://b-%s.%s/hls/%d/%d.m3u8' % (server, host, model_id, model_id), + video_id, ext='mp4', m3u8_id='hls', fatal=False, live=True) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(video_id), + 'description': self._og_search_description(webpage), + 'is_live': True, + 'formats': formats, + # Stripchat declares the RTA meta-tag, but in an non-standard format so _rta_search() can't be used + 'age_limit': 18, + } -- cgit v1.2.3 From e16fefd8699c56d7a565e933ed1f55112ad399b4 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <admin@xenova.com> Date: Fri, 19 Nov 2021 00:48:48 +0200 Subject: [Reddit] Add support for 1080p videos (#1682) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29565 Authored by: xenova --- yt_dlp/extractor/extractors.py | 5 +-- yt_dlp/extractor/generic.py | 28 +++++++++++++++ yt_dlp/extractor/reddit.py | 82 +++++++++++++++++++++--------------------- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bad1f40c..d19c67243 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1206,10 +1206,7 @@ from .redbulltv import ( RedBullTVRrnContentIE, RedBullIE, ) -from .reddit import ( - RedditIE, - RedditRIE, -) +from .reddit import RedditIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d6631e2f3..9c7fa4a21 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor): 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', } }, + { + # Reddit-hosted video that will redirect and be processed by RedditIE + # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '87f5f02f6c1582654146f830f21f8662', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'timestamp': 1501941939.0, + 'title': 'That small heart attack.', + 'upload_date': '20170805', + 'uploader': 'Antw87' + } + }, + { + # 1080p Reddit-hosted video that will redirect and be processed by RedditIE + 'url': 'https://v.redd.it/33hgok7dfbz71/', + 'md5': '7a1d587940242c9bb3bd6eb320b39258', + 'info_dict': { + 'id': '33hgok7dfbz71', + 'ext': 'mp4', + 'title': "The game Didn't want me to Knife that Guy I guess", + 'uploader': 'paraf1ve', + 'timestamp': 1636788683.0, + 'upload_date': '20211113' + } + } + # ] def report_following_redirect(self, new_url): diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 3ea750aeb..a042a59cc 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -8,43 +8,11 @@ from ..utils import ( try_get, unescapeHTML, url_or_none, + traverse_obj ) class RedditIE(InfoExtractor): - _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)' - _TEST = { - # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ - 'url': 'https://v.redd.it/zv89llsvexdz', - 'md5': '0a070c53eba7ec4534d95a5a1259e253', - 'info_dict': { - 'id': 'zv89llsvexdz', - 'ext': 'mp4', - 'title': 'zv89llsvexdz', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - formats = self._extract_m3u8_formats( - 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - - formats.extend(self._extract_mpd_formats( - 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, - mpd_id='dash', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - -class RedditRIE(InfoExtractor): _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', @@ -147,19 +115,53 @@ class RedditRIE(InfoExtractor): for resolution in resolutions: add_thumbnail(resolution) - return { - '_type': 'url_transparent', - 'url': video_url, + info = { 'title': data.get('title'), 'thumbnails': thumbnails, 'timestamp': float_or_none(data.get('created_utc')), 'uploader': data.get('author'), - 'duration': int_or_none(try_get( - data, - (lambda x: x['media']['reddit_video']['duration'], - lambda x: x['secure_media']['reddit_video']['duration']))), 'like_count': int_or_none(data.get('ups')), 'dislike_count': int_or_none(data.get('downs')), 'comment_count': int_or_none(data.get('num_comments')), 'age_limit': age_limit, } + + # Check if media is hosted on reddit: + reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) + if reddit_video: + playlist_urls = [ + try_get(reddit_video, lambda x: unescapeHTML(x[y])) + for y in ('dash_url', 'hls_url') + ] + + # Update video_id + display_id = video_id + video_id = self._search_regex( + r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], + 'video_id', default=display_id) + + dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' + hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + + formats = self._extract_m3u8_formats( + hls_playlist_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + dash_playlist_url, display_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + **info, + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'duration': int_or_none(reddit_video.get('duration')), + } + + # Not hosted on reddit, must continue extraction + return { + **info, + 'display_id': video_id, + '_type': 'url_transparent', + 'url': video_url, + } -- cgit v1.2.3 From 8863c8f09ee0bf36a83f428adca58b373d2c8358 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Thu, 18 Nov 2021 22:38:00 +0530 Subject: [soundcloud:search] Fix pagination --- yt_dlp/extractor/soundcloud.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/soundcloud.py b/yt_dlp/extractor/soundcloud.py index 824528474..2bb449220 100644 --- a/yt_dlp/extractor/soundcloud.py +++ b/yt_dlp/extractor/soundcloud.py @@ -893,5 +893,6 @@ class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE): break def _get_n_results(self, query, n): - tracks = self._get_collection('search/tracks', query, limit=n, q=query) - return self.playlist_result(tracks, query, query) + return self.playlist_result(itertools.islice( + self._get_collection('search/tracks', query, limit=n, q=query), + 0, None if n == float('inf') else n), query, query) -- cgit v1.2.3 From 467b6b838737c0907bbc331f96352dda3019afb7 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 05:20:13 +0530 Subject: [ExtractAudio] Support `alac` Closes #1707 --- yt_dlp/postprocessor/ffmpeg.py | 44 +++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index eacee8ee9..1bde170ce 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -53,6 +53,7 @@ ACODECS = { 'opus': 'libopus', 'vorbis': 'libvorbis', 'wav': None, + 'alac': None, } @@ -383,7 +384,7 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') - SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav') + SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) @@ -399,10 +400,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): limits = { 'libmp3lame': (10, 0), + 'libvorbis': (0, 10), # FFmpeg's AAC encoder does not have an upper limit for the value of -q:a. # Experimentally, with values over 4, bitrate changes were minimal or non-existent 'aac': (0.1, 4), - 'vorbis': (0, 10), 'libfdk_aac': (1, 5), }.get(codec) if not limits: @@ -426,7 +427,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, information): - path = information['filepath'] + orig_path = path = information['filepath'] orig_ext = information['ext'] if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS: @@ -452,6 +453,10 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = ['-f', 'adts'] if filecodec == 'vorbis': extension = 'ogg' + elif filecodec == 'alac': + acodec = None + extension = 'm4a' + more_opts += ['-acodec', 'alac'] else: # MP3 otherwise. acodec = 'libmp3lame' @@ -466,42 +471,49 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = self._quality_args(acodec) if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] - if self._preferredcodec == 'm4a': + elif self._preferredcodec == 'm4a': more_opts += ['-bsf:a', 'aac_adtstoasc'] - if self._preferredcodec == 'vorbis': + elif self._preferredcodec == 'vorbis': extension = 'ogg' - if self._preferredcodec == 'wav': + elif self._preferredcodec == 'wav': extension = 'wav' more_opts += ['-f', 'wav'] + elif self._preferredcodec == 'alac': + extension = 'm4a' + more_opts += ['-acodec', 'alac'] prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups - new_path = prefix + sep + extension - - information['filepath'] = new_path - information['ext'] = extension + temp_path = new_path = prefix + sep + extension - # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. - if (new_path == path - or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + if new_path == path: + orig_path = prepend_extension(path, 'orig') + temp_path = prepend_extension(path, 'temp') + if (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)) + and os.path.exists(encodeFilename(orig_path))): self.to_screen('Post-process file %s exists, skipping' % new_path) return [], information try: - self.to_screen('Destination: ' + new_path) - self.run_ffmpeg(path, new_path, acodec, more_opts) + self.to_screen(f'Destination: {new_path}') + self.run_ffmpeg(path, temp_path, acodec, more_opts) except AudioConversionError as e: raise PostProcessingError( 'audio conversion failed: ' + e.msg) except Exception: raise PostProcessingError('error running ' + self.basename) + os.replace(path, orig_path) + os.replace(temp_path, new_path) + information['filepath'] = new_path + information['ext'] = extension + # Try to update the date time for extracted audio file. if information.get('filetime') is not None: self.try_utime( new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file') - return [path], information + return [orig_path], information class FFmpegVideoConvertorPP(FFmpegPostProcessor): -- cgit v1.2.3 From 9222c38182604d0a9826291509e0719b45b3faac Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 05:36:28 +0530 Subject: [cleanup] Minor cleanup Closes #1696, Closes #1673 --- CONTRIBUTING.md | 2 +- Changelog.md | 2 +- README.md | 26 +++++++++++++------- test/test_youtube_signature.py | 4 +++ yt_dlp/YoutubeDL.py | 7 +++--- yt_dlp/__init__.py | 56 +++++++++++++++++------------------------- yt_dlp/extractor/francetv.py | 2 +- yt_dlp/extractor/funimation.py | 2 +- yt_dlp/extractor/linkedin.py | 2 +- yt_dlp/extractor/pbs.py | 2 +- yt_dlp/extractor/tenplay.py | 2 +- yt_dlp/extractor/youtube.py | 27 ++++++++++++++------ 12 files changed, 74 insertions(+), 60 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd22afed9..8a0178d94 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -209,7 +209,7 @@ After you have ensured this site is distributing its content legally, you can fo ``` 1. Add an import in [`yt_dlp/extractor/extractors.py`](yt_dlp/extractor/extractors.py). 1. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` -1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the purticular test is disabled from running. +1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): diff --git a/Changelog.md b/Changelog.md index 5ac2aa615..7bb8c7888 100644 --- a/Changelog.md +++ b/Changelog.md @@ -40,7 +40,7 @@ * [fragment] Fix progress display in fragmented downloads * [downloader/ffmpeg] Fix vtt download with ffmpeg * [ffmpeg] Detect presence of setts and libavformat version -* [ExtractAudio] Rescale --audio-quality correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) +* [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) * [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) * [FormatSort] `eac3` is better than `ac3` * [FormatSort] Fix some fields' defaults diff --git a/README.md b/README.md index 96f5d7ecb..1a5f84cc9 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ The major new features from the latest release of [blackjack4494/yt-dlc](https:/ * **New playlist extractors**: bilibili categories, eroprofile albums, hotstar series, hungama albums, newgrounds user, niconico search/users, paramountplus series, patreon user, peertube playlist/channels, roosterteeth series, sonyliv series, tiktok user, trovo channels, voot series -* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo +* **Fixed/improved extractors**: 7plus, 9now, afreecatv, akamai, aljazeera, amcnetworks, animalplanet, archive.org, arte, atv, bbc, bilibili, bitchute, bravotv, camtube, cbc, cda, ceskatelevize, chingari, comedycentral, coub, crackle, crunchyroll, curiositystream, diynetwork, dw, eroprofile, facebook, francetv, funimation, globo, hearthisatie, hidive, hotstar, hungama, imdb, ina, instagram, iprima, itv, iwara, kakao, la7, linkedinlearning, linuxacadamy, mediaset, mediasite, motherless, mxplayer, nbcolympics, ndr, newgrounds, niconico, nitter, nova, nrk, nuvid, oreilly, paramountplus, parliamentlive, patreon, pbs, peertube, plutotv, polskieradio, pornhub, reddit, redtube, rmcdecouverte, roosterteeth, rtp, rumble, saml verizon login, skyit, sonyliv, soundcloud, southparkde, spankbang, spreaker, streamable, tagesschau, tbs, tennistv, tenplay, tiktok, tubi, tv2, tv2hu, tv5mondeplus, tvp, twitcasting, vh1, viafree, videa, vidio, vidme, viewlift, viki, vimeo, viu, vk, vlive, vrt, wakanim, xhamster, yahoo * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN @@ -136,7 +136,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * `--ignore-errors` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files -* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-infojson`. Use `--compat-options no-attach-info-json` to revert this +* `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this @@ -196,7 +196,7 @@ python3 -m pip install --no-deps -U yt-dlp If you want to be on the cutting edge, you can also install the master branch with: ``` -python3 -m pip3 install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip +python3 -m pip install --force-reinstall https://github.com/yt-dlp/yt-dlp/archive/master.zip ``` Note that on some systems, you may need to use `py` or `python` instead of `python3` @@ -793,7 +793,7 @@ You can also fork the project on github and push it to a release branch in your --audio-format FORMAT Specify audio format to convert the audio to when -x is used. Currently supported formats are: best (default) or one of - best|aac|flac|mp3|m4a|opus|vorbis|wav + best|aac|flac|mp3|m4a|opus|vorbis|wav|alac --audio-quality QUALITY Specify ffmpeg audio quality, insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K @@ -844,15 +844,20 @@ You can also fork the project on github and push it to a release branch in your --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) - --embed-metadata Embed metadata to the video file. Also adds - chapters to file unless --no-add-chapters - is used (Alias: --add-metadata) + --embed-metadata Embed metadata to the video file. Also + embeds chapters/infojson if present unless + --no-embed-chapters/--no-embed-info-json + are used (Alias: --add-metadata) --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) --embed-chapters Add chapter markers to the video file (Alias: --add-chapters) --no-embed-chapters Do not add chapter markers (default) (Alias: --no-add-chapters) + --embed-info-json Embed the infojson as an attachment to + mkv/mka video files + --no-embed-info-json Do not embed the infojson as an attachment + to the video file --parse-metadata FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details @@ -1210,11 +1215,14 @@ If you are using an output template inside a Windows batch file then you must es Note that on Windows you need to use double quotes instead of single. ```bash +$ yt-dlp --get-filename -o 'test video.%(ext)s' BaW_jenozKc +test video.webm # Literal name with correct extension + $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc -youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters +youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters $ yt-dlp --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames -youtube-dl_test_video_.mp4 # A simple file name +youtube-dl_test_video_.webm # Restricted file name # Download YouTube playlist videos in separate directory indexed by video order in a playlist $ yt-dlp -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 60d8eabf5..df4c36047 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -74,6 +74,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', ), + ( + 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', + 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', + ), ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 197ec11e6..e078e62ef 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -528,7 +528,6 @@ class YoutubeDL(object): self.cache = Cache(self) windows_enable_vt_mode() - # FIXME: This will break if we ever print color to stdout self._allow_colors = { 'screen': not self.params.get('no_color') and supports_terminal_sequences(self._screen_file), 'err': not self.params.get('no_color') and supports_terminal_sequences(self._err_file), @@ -2012,10 +2011,10 @@ class YoutubeDL(object): # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector if format_spec == 'all': def selector_function(ctx): - yield from _check_formats(ctx['formats']) + yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'])) + formats = list(_check_formats(ctx['formats'][::-1])) if not formats: return merged_format = formats[-1] @@ -3163,7 +3162,7 @@ class YoutubeDL(object): return 'images' else: return default - return f'{res} images' if is_images else res + return f'img {res}' if is_images else res def _format_note(self, fdict): res = '' diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 63b9b6e2f..7960d3b03 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -378,8 +378,6 @@ def _real_main(argv=None): opts.sponsorblock_remove = set() sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove - if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: - opts.addchapters = True opts.remove_chapters = opts.remove_chapters or [] if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: @@ -400,40 +398,32 @@ def _real_main(argv=None): opts.remuxvideo = False if opts.allow_unplayable_formats: - if opts.extractaudio: - report_conflict('--allow-unplayable-formats', '--extract-audio') - opts.extractaudio = False - if opts.remuxvideo: - report_conflict('--allow-unplayable-formats', '--remux-video') - opts.remuxvideo = False - if opts.recodevideo: - report_conflict('--allow-unplayable-formats', '--recode-video') - opts.recodevideo = False - if opts.addmetadata: - report_conflict('--allow-unplayable-formats', '--add-metadata') - opts.addmetadata = False - if opts.embedsubtitles: - report_conflict('--allow-unplayable-formats', '--embed-subs') - opts.embedsubtitles = False - if opts.embedthumbnail: - report_conflict('--allow-unplayable-formats', '--embed-thumbnail') - opts.embedthumbnail = False - if opts.xattrs: - report_conflict('--allow-unplayable-formats', '--xattrs') - opts.xattrs = False - if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'): - report_conflict('--allow-unplayable-formats', '--fixup') + def report_unplayable_conflict(opt_name, arg, default=False, allowed=None): + val = getattr(opts, opt_name) + if (not allowed and val) or not allowed(val): + report_conflict('--allow-unplayable-formats', arg) + setattr(opts, opt_name, default) + + report_unplayable_conflict('extractaudio', '--extract-audio') + report_unplayable_conflict('remuxvideo', '--remux-video') + report_unplayable_conflict('recodevideo', '--recode-video') + report_unplayable_conflict('addmetadata', '--embed-metadata') + report_unplayable_conflict('addchapters', '--embed-chapters') + report_unplayable_conflict('embed_infojson', '--embed-info-json') + opts.embed_infojson = False + report_unplayable_conflict('embedsubtitles', '--embed-subs') + report_unplayable_conflict('embedthumbnail', '--embed-thumbnail') + report_unplayable_conflict('xattrs', '--xattrs') + report_unplayable_conflict('fixup', '--fixup', default='never', allowed=lambda x: x in (None, 'never', 'ignore')) opts.fixup = 'never' - if opts.remove_chapters: - report_conflict('--allow-unplayable-formats', '--remove-chapters') - opts.remove_chapters = [] - if opts.sponsorblock_remove: - report_conflict('--allow-unplayable-formats', '--sponsorblock-remove') - opts.sponsorblock_remove = set() - if opts.sponskrub: - report_conflict('--allow-unplayable-formats', '--sponskrub') + report_unplayable_conflict('remove_chapters', '--remove-chapters', default=[]) + report_unplayable_conflict('sponsorblock_remove', '--sponsorblock-remove', default=set()) + report_unplayable_conflict('sponskrub', '--sponskrub', default=set()) opts.sponskrub = False + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + # PostProcessors postprocessors = list(opts.add_postprocessors) if sponsorblock_query: diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 3bbab69e6..bc5ef4df9 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -185,7 +185,7 @@ class FranceTVIE(InfoExtractor): 'vcodec': 'none', 'ext': 'mhtml', 'protocol': 'mhtml', - 'url': 'about:dummy', + 'url': 'about:invalid', 'fragments': [{ 'path': sheet, # XXX: not entirely accurate; each spritesheet seems to be diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 42711083e..96dad2ca3 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -276,7 +276,7 @@ class FunimationIE(FunimationBaseIE): def _get_subtitles(self, subtitles, experience_id, episode, display_id, format_name): if isinstance(episode, str): webpage = self._download_webpage( - f'https://www.funimation.com/player/{experience_id}', display_id, + f'https://www.funimation.com/player/{experience_id}/', display_id, fatal=False, note=f'Downloading player webpage for {format_name}') episode, _, _ = self._get_episode(webpage, episode_id=episode, fatal=False) diff --git a/yt_dlp/extractor/linkedin.py b/yt_dlp/extractor/linkedin.py index 9255b3301..bd76ae166 100644 --- a/yt_dlp/extractor/linkedin.py +++ b/yt_dlp/extractor/linkedin.py @@ -109,7 +109,7 @@ class LinkedInIE(LinkedInBaseIE): description = clean_html(get_element_by_class('share-update-card__update-text', webpage)) like_count = int_or_none(get_element_by_class('social-counts-reactions__social-counts-numRections', webpage)) creator = strip_or_none(clean_html(get_element_by_class('comment__actor-name', webpage))) - + sources = self._parse_json(extract_attributes(self._search_regex(r'(<video[^>]+>)', webpage, 'video'))['data-sources'], video_id) formats = [{ 'url': source['src'], diff --git a/yt_dlp/extractor/pbs.py b/yt_dlp/extractor/pbs.py index 0eabf9bee..ffaa6bf92 100644 --- a/yt_dlp/extractor/pbs.py +++ b/yt_dlp/extractor/pbs.py @@ -193,7 +193,7 @@ class PBSIE(InfoExtractor): # Article with embedded player (or direct video) (?:www\.)?pbs\.org/(?:[^/]+/){1,5}(?P<presumptive_id>[^/]+?)(?:\.html)?/?(?:$|[?\#]) | # Player - (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+)/ + (?:video|player)\.pbs\.org/(?:widget/)?partnerplayer/(?P<player_id>[^/]+) ) ''' % '|'.join(list(zip(*_STATIONS))[0]) diff --git a/yt_dlp/extractor/tenplay.py b/yt_dlp/extractor/tenplay.py index c810cfd0d..5b3222ecf 100644 --- a/yt_dlp/extractor/tenplay.py +++ b/yt_dlp/extractor/tenplay.py @@ -58,7 +58,7 @@ class TenPlayIE(InfoExtractor): 'email': username, 'password': password, })) - return "Bearer " + data['jwt']['accessToken'] + return 'Bearer ' + data['jwt']['accessToken'] def _real_extract(self, url): content_id = self._match_id(url) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 41e7fce10..1fbdcd98b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -508,9 +508,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): Extracts visitorData from an API response or ytcfg Appears to be used to track session state """ - return traverse_obj( - args, (..., ('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), - expected_type=compat_str, get_all=False) + return get_first( + args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + expected_type=str) @property def is_authenticated(self): @@ -1674,7 +1674,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # shorts 'url': 'https://www.youtube.com/shorts/BGQWPY4IigY', 'only_matching': True, - }, + }, { + 'note': 'Storyboards', + 'url': 'https://www.youtube.com/watch?v=5KLPxDtMqe8', + 'info_dict': { + 'id': '5KLPxDtMqe8', + 'ext': 'mhtml', + 'format_id': 'sb0', + 'title': 'Your Brain is Plastic', + 'uploader_id': 'scishow', + 'description': 'md5:89cd86034bdb5466cd87c6ba206cd2bc', + 'upload_date': '20140324', + 'uploader': 'SciShow', + }, 'params': {'format': 'mhtml', 'skip_download': True} + } ] @classmethod @@ -1920,9 +1933,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return sts def _mark_watched(self, video_id, player_responses): - playback_url = traverse_obj( - player_responses, (..., 'playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), - expected_type=url_or_none, get_all=False) + playback_url = get_first( + player_responses, ('playbackTracking', 'videostatsPlaybackUrl', 'baseUrl'), + expected_type=url_or_none) if not playback_url: self.report_warning('Unable to mark watched') return -- cgit v1.2.3 From dd2a987d3f412dc61422ad13cf7b60920be8af6e Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 06:30:25 +0530 Subject: [tests] Fix tests --- test/test_YoutubeDL.py | 4 ++-- test/test_all_urls.py | 1 - test/test_youtube_lists.py | 22 ++++++++++++---------- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/extractor/youtube.py | 3 --- 5 files changed, 15 insertions(+), 17 deletions(-) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 5a0dabeb6..63ef50e1a 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -137,7 +137,7 @@ class TestFormatSelection(unittest.TestCase): test('webm/mp4', '47') test('3gp/40/mp4', '35') test('example-with-dashes', 'example-with-dashes') - test('all', '35', 'example-with-dashes', '45', '47', '2') # Order doesn't actually matter for this + test('all', '2', '47', '45', 'example-with-dashes', '35') test('mergeall', '2+47+45+example-with-dashes+35', multi=True) def test_format_selection_audio(self): @@ -520,7 +520,7 @@ class TestFormatSelection(unittest.TestCase): ydl = YDL({'format': 'all[width>=400][width<=600]'}) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] - self.assertEqual(downloaded_ids, ['B', 'C', 'D']) + self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) try: diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 68c1c68d3..2d89366d4 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -38,7 +38,6 @@ class TestAllURLsMatching(unittest.TestCase): assertTab('https://www.youtube.com/AsapSCIENCE') assertTab('https://www.youtube.com/embedded') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') - assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index e831393e4..d9638658d 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -26,29 +26,31 @@ class TestYoutubeLists(unittest.TestCase): def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True - ie = YoutubePlaylistIE(dl) + ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=FXxLjLQi3Fg&list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') self.assertEqual(result['_type'], 'url') - self.assertEqual(YoutubeIE().extract_id(result['url']), 'FXxLjLQi3Fg') + self.assertEqual(YoutubeIE.extract_id(result['url']), 'FXxLjLQi3Fg') def test_youtube_course(self): + print('Skipping: Course URLs no longer exists') + return dl = FakeYDL() ie = YoutubePlaylistIE(dl) # TODO find a > 100 (paginating?) videos course result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') entries = list(result['entries']) - self.assertEqual(YoutubeIE().extract_id(entries[0]['url']), 'j9WZyLZCBzs') + self.assertEqual(YoutubeIE.extract_id(entries[0]['url']), 'j9WZyLZCBzs') self.assertEqual(len(entries), 25) - self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') + self.assertEqual(YoutubeIE.extract_id(entries[-1]['url']), 'rYefUsYuEp0') def test_youtube_mix(self): dl = FakeYDL() - ie = YoutubePlaylistIE(dl) - result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w') - entries = result['entries'] + ie = YoutubeTabIE(dl) + result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8') + entries = list(result['entries']) self.assertTrue(len(entries) >= 50) original_video = entries[0] - self.assertEqual(original_video['id'], 'OQpdSVF_k_w') + self.assertEqual(original_video['id'], 'tyITL_exICo') def test_youtube_toptracks(self): print('Skipping: The playlist page gives error 500') @@ -68,10 +70,10 @@ class TestYoutubeLists(unittest.TestCase): entries = list(result['entries']) self.assertTrue(len(entries) == 1) video = entries[0] - self.assertEqual(video['_type'], 'url_transparent') + self.assertEqual(video['_type'], 'url') self.assertEqual(video['ie_key'], 'Youtube') self.assertEqual(video['id'], 'BaW_jenozKc') - self.assertEqual(video['url'], 'BaW_jenozKc') + self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc') self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐') self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index e078e62ef..1f1b4ccd4 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2014,7 +2014,7 @@ class YoutubeDL(object): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): - formats = list(_check_formats(ctx['formats'][::-1])) + formats = list(_check_formats(ctx['formats'])) if not formats: return merged_format = formats[-1] diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 1fbdcd98b..632129bc6 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3467,9 +3467,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'note': 'inline playlist with not always working continuations', 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8', - 'only_matching': True, }, { 'url': 'https://www.youtube.com/course', 'only_matching': True, -- cgit v1.2.3 From 6b993ca765753e0b04d65ec70cf787a2e9f94639 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Fri, 19 Nov 2021 02:49:51 +0100 Subject: [hls] Better FairPlay DRM detection (#1661) Authored by: nyuszika7h --- yt_dlp/downloader/hls.py | 9 +++++++++ yt_dlp/extractor/common.py | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 61312c5ba..e932fd6ae 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -77,6 +77,15 @@ class HlsFD(FragmentFD): message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') if not can_download: + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), s) + if has_drm and not self.params.get('allow_unplayable_formats'): + self.report_error( + 'This video is DRM protected; Try selecting another format with --format or ' + 'add --check-formats to automatically fallback to the next best format') + return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 6f0650296..a47364d07 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2035,10 +2035,10 @@ class InfoExtractor(object): video_id=None): formats, subtitles = [], {} - if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access - return formats, subtitles - - has_drm = re.search(r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', m3u8_doc) + has_drm = re.search('|'.join([ + r'#EXT-X-FAXS-CM:', # Adobe Flash Access + r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay + ]), m3u8_doc) def format_url(url): return url if re.match(r'^https?://', url) else compat_urlparse.urljoin(m3u8_url, url) -- cgit v1.2.3 From a04e005521ecf2eb0c4979e234ff0c4f23a3caa0 Mon Sep 17 00:00:00 2001 From: The Hatsune Daishi <nao20010128@gmail.com> Date: Fri, 19 Nov 2021 10:54:10 +0900 Subject: [AES] Add ECB mode (#1686) Needed for #1688 Authored by: nao20010128nao --- test/test_aes.py | 18 +++++++++++++++++- yt_dlp/aes.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/test/test_aes.py b/test/test_aes.py index 46db59e57..5c9273f8a 100644 --- a/test/test_aes.py +++ b/test/test_aes.py @@ -10,6 +10,8 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.aes import ( aes_decrypt, aes_encrypt, + aes_ecb_encrypt, + aes_ecb_decrypt, aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, @@ -17,7 +19,8 @@ from yt_dlp.aes import ( aes_ctr_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, - aes_decrypt_text + aes_decrypt_text, + BLOCK_SIZE_BYTES, ) from yt_dlp.compat import compat_pycrypto_AES from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes @@ -94,6 +97,19 @@ class TestAES(unittest.TestCase): decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) + def test_ecb_encrypt(self): + data = bytes_to_intlist(self.secret_msg) + data += [0x08] * (BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES) + encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key, self.iv)) + self.assertEqual( + encrypted, + b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + + def test_ecb_decrypt(self): + data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') + decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) + self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/aes.py b/yt_dlp/aes.py index 60cdeb74e..8503e3dfd 100644 --- a/yt_dlp/aes.py +++ b/yt_dlp/aes.py @@ -28,6 +28,48 @@ else: BLOCK_SIZE_BYTES = 16 +def aes_ecb_encrypt(data, key, iv=None): + """ + Encrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} encrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_encrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + +def aes_ecb_decrypt(data, key, iv=None): + """ + Decrypt with aes in ECB mode + + @param {int[]} data cleartext + @param {int[]} key 16/24/32-Byte cipher key + @param {int[]} iv Unused for this mode + @returns {int[]} decrypted data + """ + expanded_key = key_expansion(key) + block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) + + encrypted_data = [] + for i in range(block_count): + block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] + encrypted_data += aes_decrypt(block, expanded_key) + encrypted_data = encrypted_data[:len(data)] + + return encrypted_data + + def aes_ctr_decrypt(data, key, iv): """ Decrypt with aes in counter mode -- cgit v1.2.3 From 7333296ff5386efcd13a9db780170350e1924389 Mon Sep 17 00:00:00 2001 From: Paper <37962225+mrpapersonic@users.noreply.github.com> Date: Fri, 19 Nov 2021 01:11:36 -0500 Subject: [VidLii] Add 720p support (#1681) Authored by: mrpapersonic --- yt_dlp/extractor/vidlii.py | 47 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py index f4774256b..ce7487ec1 100644 --- a/yt_dlp/extractor/vidlii.py +++ b/yt_dlp/extractor/vidlii.py @@ -5,9 +5,11 @@ import re from .common import InfoExtractor from ..utils import ( + HEADRequest, float_or_none, get_element_by_id, int_or_none, + str_to_int, strip_or_none, unified_strdate, urljoin, @@ -35,6 +37,25 @@ class VidLiiIE(InfoExtractor): 'categories': ['News & Politics'], 'tags': ['Vidlii', 'Jan', 'Videogames'], } + }, { + 'url': 'https://www.vidlii.com/watch?v=zTAtaAgOLKt', + 'md5': '5778f7366aa4c569b77002f8bf6b614f', + 'info_dict': { + 'id': 'zTAtaAgOLKt', + 'ext': 'mp4', + 'title': 'FULPTUBE SUCKS.', + 'description': 'md5:087b2ca355d4c8f8f77e97c43e72d711', + 'thumbnail': 'https://www.vidlii.com/usfi/thmp/zTAtaAgOLKt.jpg', + 'uploader': 'Homicide', + 'uploader_url': 'https://www.vidlii.com/user/Homicide', + 'upload_date': '20210612', + 'duration': 89, + 'view_count': int, + 'comment_count': int, + 'average_rating': float, + 'categories': ['News & Politics'], + 'tags': ['fulp', 'tube', 'sucks', 'bad', 'fulptube'], + }, }, { 'url': 'https://www.vidlii.com/embed?v=tJluaH4BJ3v&a=0', 'only_matching': True, @@ -45,10 +66,20 @@ class VidLiiIE(InfoExtractor): webpage = self._download_webpage( 'https://www.vidlii.com/watch?v=%s' % video_id, video_id) - - video_url = self._search_regex( - r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', webpage, - 'video url', group='url') + formats = [] + + sources = [source[1] for source in re.findall( + r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1', + webpage) or []] + for source in sources: + height = int(self._search_regex(r'(\d+).mp4', source, 'height', default=360)) + if self._request_webpage(HEADRequest(source), video_id, f'Checking {height}p url', errnote=False): + formats.append({ + 'url': source, + 'format_id': f'{height}p', + 'height': height, + }) + self._sort_formats(formats) title = self._search_regex( (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage, @@ -82,9 +113,9 @@ class VidLiiIE(InfoExtractor): default=None) or self._search_regex( r'duration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) - view_count = int_or_none(self._search_regex( - (r'<strong>(\d+)</strong> views', - r'Views\s*:\s*<strong>(\d+)</strong>'), + view_count = str_to_int(self._search_regex( + (r'<strong>([,0-9]+)</strong> views', + r'Views\s*:\s*<strong>([,0-9]+)</strong>'), webpage, 'view count', fatal=False)) comment_count = int_or_none(self._search_regex( @@ -109,11 +140,11 @@ class VidLiiIE(InfoExtractor): return { 'id': video_id, - 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'uploader': uploader, + 'formats': formats, 'uploader_url': uploader_url, 'upload_date': upload_date, 'duration': duration, -- cgit v1.2.3 From c45b87419f86b5c513a3135ea17e93b3deea6e29 Mon Sep 17 00:00:00 2001 From: nyuszika7h <nyuszika7h@gmail.com> Date: Fri, 19 Nov 2021 15:57:01 +0100 Subject: [bbc] Get all available formats (#1717) Authored by: nyuszika7h --- yt_dlp/extractor/bbc.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 4e2dcd76b..672ed1ffe 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -451,9 +451,10 @@ class BBCCoUkIE(InfoExtractor): playlist = self._download_json( 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id, playlist_id, 'Downloading playlist JSON') + formats = [] + subtitles = {} - version = playlist.get('defaultAvailableVersion') - if version: + for version in playlist.get('allAvailableVersions', []): smp_config = version['smpConfig'] title = smp_config['title'] description = smp_config['summary'] @@ -463,8 +464,18 @@ class BBCCoUkIE(InfoExtractor): continue programme_id = item.get('vpid') duration = int_or_none(item.get('duration')) - formats, subtitles = self._download_media_selector(programme_id) - return programme_id, title, description, duration, formats, subtitles + version_formats, version_subtitles = self._download_media_selector(programme_id) + types = version['types'] + for f in version_formats: + f['format_note'] = ', '.join(types) + if any('AudioDescribed' in x for x in types): + f['language_preference'] = -10 + formats += version_formats + for tag, subformats in (version_subtitles or {}).items(): + subtitles.setdefault(tag, []) + subtitles[tag] += subformats + + return programme_id, title, description, duration, formats, subtitles except ExtractorError as ee: if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): raise -- cgit v1.2.3 From c5e3f84972f19e8f5c99ca358cf30bb105294e20 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:33:51 +0530 Subject: [utils] Allow alignment in `render_table` and add tests --- test/test_utils.py | 39 ++++++++++++++++++++++++++++++++++++++- yt_dlp/YoutubeDL.py | 35 +++++++++++++++++------------------ yt_dlp/utils.py | 22 +++++++++++++--------- 3 files changed, 68 insertions(+), 28 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 810ed3de4..b918ae2b6 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1220,14 +1220,51 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) def test_render_table(self): + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]]), + 'a empty bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['a', 'empty', 'bcd'], + [[123, '', 4], [9999, '', 51]], + hide_empty=True), + 'a bcd\n' + '123 4\n' + '9999 51') + + self.assertEqual( + render_table( + ['\ta', 'bcd'], + [['1\t23', 4], ['\t9999', 51]]), + ' a bcd\n' + '1 23 4\n' + '9999 51') + self.assertEqual( render_table( ['a', 'bcd'], - [[123, 4], [9999, 51]]), + [[123, 4], [9999, 51]], + delim='-'), 'a bcd\n' + '--------\n' '123 4\n' '9999 51') + self.assertEqual( + render_table( + ['a', 'bcd'], + [[123, 4], [9999, 51]], + delim='-', extra_gap=2), + 'a bcd\n' + '----------\n' + '123 4\n' + '9999 51') + def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1f1b4ccd4..4bd6dcc4c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3229,37 +3229,36 @@ class YoutubeDL(object): formats = info_dict.get('formats', [info_dict]) new_format = self.params.get('listformats_table', True) is not False if new_format: - tbr_digits = number_of_digits(max(f.get('tbr') or 0 for f in formats)) - vbr_digits = number_of_digits(max(f.get('vbr') or 0 for f in formats)) - abr_digits = number_of_digits(max(f.get('abr') or 0 for f in formats)) delim = self._format_screen('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), self.format_resolution(f), - format_field(f, 'fps', '%3d'), + format_field(f, 'fps', '\t%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), delim, - format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes), - format_field(f, 'tbr', f'%{tbr_digits}dk'), - shorten_protocol_name(f.get('protocol', '').replace("native", "n")), + format_field(f, 'filesize', ' \t%s', func=format_bytes) + format_field(f, 'filesize_approx', '~\t%s', func=format_bytes), + format_field(f, 'tbr', '\t%dk'), + shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), delim, format_field(f, 'vcodec', default='unknown').replace('none', ''), - format_field(f, 'vbr', f'%{vbr_digits}dk'), + format_field(f, 'vbr', '\t%dk'), format_field(f, 'acodec', default='unknown').replace('none', ''), - format_field(f, 'abr', f'%{abr_digits}dk'), - format_field(f, 'asr', '%5dHz'), + format_field(f, 'abr', '\t%dk'), + format_field(f, 'asr', '\t%dHz'), join_nonempty( self._format_screen('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None, format_field(f, 'language', '[%s]'), - format_field(f, 'format_note'), - format_field(f, 'container', ignore=(None, f.get('ext'))), - delim=', '), + join_nonempty( + format_field(f, 'format_note'), + format_field(f, 'container', ignore=(None, f.get('ext'))), + delim=', '), + delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( - 'ID', 'EXT', 'RESOLUTION', 'FPS', 'HDR', delim, ' FILESIZE', ' TBR', 'PROTO', - delim, 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'MORE INFO') + 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', delim, '\tFILESIZE', '\tTBR', 'PROTO', + delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') else: table = [ [ @@ -3275,8 +3274,8 @@ class YoutubeDL(object): '[info] Available formats for %s:' % info_dict['id']) self.to_stdout(render_table( header_line, table, - extraGap=(0 if new_format else 1), - hideEmpty=new_format, + extra_gap=(0 if new_format else 1), + hide_empty=new_format, delim=new_format and self._format_screen('\u2500', self.Styles.DELIM, '-', test_encoding=True))) def list_thumbnails(self, info_dict): @@ -3307,7 +3306,7 @@ class YoutubeDL(object): self.to_stdout(render_table( self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], - hideEmpty=True)) + hide_empty=True)) def urlopen(self, req): """ Start an HTTP download """ diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a9e066257..282ed1f93 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4805,10 +4805,11 @@ def determine_protocol(info_dict): return compat_urllib_parse_urlparse(url).scheme -def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): - """ Render a list of rows, each as a list of values """ +def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False): + """ Render a list of rows, each as a list of values. + Text after a \t will be right aligned """ def width(string): - return len(remove_terminal_sequences(string)) + return len(remove_terminal_sequences(string).replace('\t', '')) def get_max_lens(table): return [max(width(str(v)) for v in col) for col in zip(*table)] @@ -4816,21 +4817,24 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): def filter_using_list(row, filterArray): return [col for (take, col) in zip(filterArray, row) if take] - if hideEmpty: + if hide_empty: max_lens = get_max_lens(data) header_row = filter_using_list(header_row, max_lens) data = [filter_using_list(row, max_lens) for row in data] table = [header_row] + data max_lens = get_max_lens(table) - extraGap += 1 + extra_gap += 1 if delim: - table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data - max_lens[-1] = 0 + table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data + table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter for row in table: for pos, text in enumerate(map(str, row)): - row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap)) - ret = '\n'.join(''.join(row) for row in table) + if '\t' in text: + row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap + else: + row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap) + ret = '\n'.join(''.join(row).rstrip() for row in table) return ret -- cgit v1.2.3 From c07a39ae8e3e3b71ec8c7c0fa3e91b6908584316 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Fri, 19 Nov 2021 20:45:52 +0530 Subject: [utils] Fix `PagedList` Bug in d8cf8d97a8dbc9602556de474af133b5ab0e0a29 --- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 4bd6dcc4c..62ec087b8 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1328,7 +1328,7 @@ class YoutubeDL(object): self.to_stderr('\r') self.report_warning(f'{e}; Re-extracting data') return wrapper(self, *args, **kwargs) - except (DownloadCancelled, LazyList.IndexError): + except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except Exception as e: if self.params.get('ignoreerrors'): diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 282ed1f93..2d5b9892d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4168,6 +4168,10 @@ class LazyList(collections.abc.Sequence): class PagedList: + + class IndexError(IndexError): + pass + def __len__(self): # This is only useful for tests return len(self.getslice()) @@ -4198,7 +4202,7 @@ class PagedList: raise TypeError('indices must be non-negative integers') entries = self.getslice(idx, idx + 1) if not entries: - raise IndexError() + raise self.IndexError() return entries[0] -- cgit v1.2.3 From 282f570918f936a3aa9f57d4c85de4693da882c9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:05:57 +0530 Subject: [utils] Fix error when copying `LazyList` --- test/test_utils.py | 10 +++++----- yt_dlp/YoutubeDL.py | 4 ++-- yt_dlp/utils.py | 20 ++++++++++++++------ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index b918ae2b6..22dda4f37 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1657,9 +1657,9 @@ Line 1 self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) - self.assertEqual(list(LazyList(it).reverse()), it[::-1]) - self.assertEqual(list(LazyList(it).reverse()[1:3:7]), it[::-1][1:3:7]) - self.assertEqual(list(LazyList(it).reverse()[::-1]), it) + self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) + self.assertEqual(list(reversed(LazyList(it))[::-1]), it) + self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): @@ -1672,13 +1672,13 @@ Line 1 test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) - ll = LazyList(range(10)).reverse() + ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) - ll.reverse() + ll = reversed(ll) test(ll, -15, 14, range(15)) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 62ec087b8..fb7e12624 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2166,7 +2166,7 @@ class YoutubeDL(object): t['url'] = sanitize_url(t['url']) if self.params.get('check_formats') is True: - info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1])).reverse() + info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) else: info_dict['thumbnails'] = thumbnails @@ -2361,7 +2361,7 @@ class YoutubeDL(object): # TODO Central sorting goes here if self.params.get('check_formats') is True: - formats = LazyList(self._check_formats(formats[::-1])).reverse() + formats = LazyList(self._check_formats(formats[::-1]), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 2d5b9892d..ade2bbff1 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4086,10 +4086,10 @@ class LazyList(collections.abc.Sequence): class IndexError(IndexError): pass - def __init__(self, iterable): + def __init__(self, iterable, *, reverse=False, _cache=None): self.__iterable = iter(iterable) - self.__cache = [] - self.__reversed = False + self.__cache = [] if _cache is None else _cache + self.__reversed = reverse def __iter__(self): if self.__reversed: @@ -4155,9 +4155,17 @@ class LazyList(collections.abc.Sequence): self.__exhaust() return len(self.__cache) - def reverse(self): - self.__reversed = not self.__reversed - return self + def __reversed__(self): + return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache) + + def __copy__(self): + return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache) + + def __deepcopy__(self, memo): + # FIXME: This is actually just a shallow copy + id_ = id(self) + memo[id_] = self.__copy__() + return memo[id_] def __repr__(self): # repr and str should mimic a list. So we exhaust the iterable -- cgit v1.2.3 From d76991ab0743a1e855bd44be597a40c89d5a814a Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Sat, 20 Nov 2021 08:27:47 +0530 Subject: Fix `--check-formats` for `mhtml` Closes #1709 --- yt_dlp/downloader/mhtml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/downloader/mhtml.py b/yt_dlp/downloader/mhtml.py index b75db18a8..1477f65a6 100644 --- a/yt_dlp/downloader/mhtml.py +++ b/yt_dlp/downloader/mhtml.py @@ -114,8 +114,8 @@ body > figure > img { fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] - title = info_dict['title'] - origin = info_dict['webpage_url'] + title = info_dict.get('title', info_dict['format_id']) + origin = info_dict.get('webpage_url', info_dict['url']) ctx = { 'filename': filename, -- cgit v1.2.3 From 545ad64988d03b8c38e51004cd6941236f529e66 Mon Sep 17 00:00:00 2001 From: aarubui <aarubui@users.noreply.github.com> Date: Sat, 20 Nov 2021 15:03:43 +1100 Subject: [willow] Add extractor (#1723) Authored by: aarubui --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/willow.py | 58 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 yt_dlp/extractor/willow.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d19c67243..fdcd60e2d 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1789,6 +1789,7 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE +from .willow import WillowIE from .wimtv import WimTVIE from .whowatch import WhoWatchIE from .wistia import ( diff --git a/yt_dlp/extractor/willow.py b/yt_dlp/extractor/willow.py new file mode 100644 index 000000000..4d3d62f95 --- /dev/null +++ b/yt_dlp/extractor/willow.py @@ -0,0 +1,58 @@ +# coding: utf-8 +from ..utils import ExtractorError +from .common import InfoExtractor + + +class WillowIE(InfoExtractor): + _VALID_URL = r'https?://(www\.)?willow\.tv/videos/(?P<id>[0-9a-z-_]+)' + _GEO_COUNTRIES = ['US'] + + _TESTS = [{ + 'url': 'http://willow.tv/videos/d5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', + 'info_dict': { + 'id': '169662', + 'display_id': 'd5winning-moment-eng-vs-ind-streaming-online-4th-test-india-tour-of-england-2021', + 'ext': 'mp4', + 'title': 'Winning Moment: 4th Test, England vs India', + 'thumbnail': 'https://aimages.willow.tv/ytThumbnails/6748_D5winning_moment.jpg', + 'duration': 233, + 'timestamp': 1630947954, + 'upload_date': '20210906', + 'location': 'Kennington Oval, London', + 'series': 'India tour of England 2021', + }, + 'params': { + 'skip_download': True, # AES-encrypted m3u8 + }, + }, { + 'url': 'http://willow.tv/videos/highlights-short-ind-vs-nz-streaming-online-2nd-t20i-new-zealand-tour-of-india-2021', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + video_data = self._parse_json(self._html_search_regex( + r'var\s+data_js\s*=\s*JSON\.parse\(\'(.+)\'\)', webpage, + 'data_js'), video_id) + + video = next((v for v in video_data.get('trending_videos') or [] + if v.get('secureurl')), None) + if not video: + raise ExtractorError('No videos found') + + formats = self._extract_m3u8_formats(video['secureurl'], video_id, 'mp4') + self._sort_formats(formats) + + return { + 'id': str(video.get('content_id')), + 'display_id': video.get('video_slug'), + 'title': video.get('video_name') or self._html_search_meta('twitter:title', webpage), + 'formats': formats, + 'thumbnail': video.get('yt_thumb_url') or self._html_search_meta( + 'twitter:image', webpage, default=None), + 'duration': video.get('duration_seconds'), + 'timestamp': video.get('created_date'), + 'location': video.get('venue'), + 'series': video.get('series_name'), + } -- cgit v1.2.3 From 77fcc6515852bc2e1c6960a6e010ab2ff1caf1ee Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 20 Nov 2021 14:55:14 +0530 Subject: [CozyTV] Add extractor (#1727) Authored by: Ashish0804 --- yt_dlp/extractor/cozytv.py | 40 ++++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 41 insertions(+) create mode 100644 yt_dlp/extractor/cozytv.py diff --git a/yt_dlp/extractor/cozytv.py b/yt_dlp/extractor/cozytv.py new file mode 100644 index 000000000..868d8d27d --- /dev/null +++ b/yt_dlp/extractor/cozytv.py @@ -0,0 +1,40 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..utils import unified_strdate + + +class CozyTVIE(InfoExtractor): + _VALID_URL = r'(?:https?://)(?:www\.)?cozy\.tv/(?P<uploader>[^/]+)/replays/(?P<id>[^/$#&?]+)' + + _TESTS = [{ + 'url': 'https://cozy.tv/beardson/replays/2021-11-19_1', + 'info_dict': { + 'id': 'beardson-2021-11-19_1', + 'ext': 'mp4', + 'title': 'pokemon pt2', + 'uploader': 'beardson', + 'upload_date': '20211119', + 'was_live': True, + 'duration': 7981, + }, + 'params': {'skip_download': True} + }] + + def _real_extract(self, url): + uploader, date = self._match_valid_url(url).groups() + id = f'{uploader}-{date}' + data_json = self._download_json(f'https://api.cozy.tv/cache/{uploader}/replay/{date}', id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + f'https://cozycdn.foxtrotstream.xyz/replays/{uploader}/{date}/index.m3u8', id, ext='mp4') + return { + 'id': id, + 'title': data_json.get('title'), + 'uploader': data_json.get('user') or uploader, + 'upload_date': unified_strdate(data_json.get('date')), + 'was_live': True, + 'duration': data_json.get('duration'), + 'formats': formats, + 'subtitles': subtitles, + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index fdcd60e2d..a0f4908f0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -293,6 +293,7 @@ from .commonprotocols import ( from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE +from .cozytv import CozyTVIE from .cracked import CrackedIE from .crackle import CrackleIE from .crooksandliars import CrooksAndLiarsIE -- cgit v1.2.3 From 849d699a8b2d36a9aab6c3a34073c9d1c5088a29 Mon Sep 17 00:00:00 2001 From: 4a1e2y5 <66421735+4a1e2y5@users.noreply.github.com> Date: Sun, 21 Nov 2021 00:24:05 +0100 Subject: [xvideos] Detect embed URLs (#1729) Authored by: 4a1e2y5 --- yt_dlp/extractor/xvideos.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index 8fc64914c..ef45eb929 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -19,7 +19,7 @@ class XVideosIE(InfoExtractor): (?: (?:[^/]+\.)?xvideos2?\.com/video| (?:www\.)?xvideos\.es/video| - flashservice\.xvideos\.com/embedframe/| + (?:www|flashservice)\.xvideos\.com/embedframe/| static-hw\.xvideos\.com/swf/xv-player\.swf\?.*?\bid_video= ) (?P<id>[0-9]+) @@ -37,6 +37,9 @@ class XVideosIE(InfoExtractor): }, { 'url': 'https://flashservice.xvideos.com/embedframe/4588838', 'only_matching': True, + }, { + 'url': 'https://www.xvideos.com/embedframe/4588838', + 'only_matching': True, }, { 'url': 'http://static-hw.xvideos.com/swf/xv-player.swf?id_video=4588838', 'only_matching': True, -- cgit v1.2.3 From c98d4df23bfba30fc38f2614bd96db67644e7ddf Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 22 Nov 2021 13:41:57 +0530 Subject: [WDR] Expand valid URL Closes #1749 --- yt_dlp/extractor/wdr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/wdr.py b/yt_dlp/extractor/wdr.py index f54aa6ff9..d3229d8af 100644 --- a/yt_dlp/extractor/wdr.py +++ b/yt_dlp/extractor/wdr.py @@ -22,7 +22,11 @@ from ..utils import ( class WDRIE(InfoExtractor): - _VALID_URL = r'https?://deviceids-medp\.wdr\.de/ondemand/\d+/(?P<id>\d+)\.js' + _VALID_URL = r'''(?x)https?:// + (?:deviceids-medp\.wdr\.de/ondemand/\d+/| + kinder\.wdr\.de/(?!mediathek/)[^#?]+-) + (?P<id>\d+)\.(?:js|assetjsonp) + ''' _GEO_COUNTRIES = ['DE'] _TEST = { 'url': 'http://deviceids-medp.wdr.de/ondemand/155/1557833.js', -- cgit v1.2.3 From 234416e4bf39d442e7abd036b7c59b8934a4086b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Mon, 22 Nov 2021 23:32:14 +0530 Subject: [downloader/ffmpeg] Fix for direct videos inside mpd manifests Closes #1751 --- yt_dlp/downloader/external.py | 3 +-- yt_dlp/extractor/common.py | 9 +++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index 1efbb2fab..da69423f7 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -443,8 +443,7 @@ class FFmpegFD(ExternalFD): if info_dict.get('requested_formats') or protocol == 'http_dash_segments': for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]): stream_number = fmt.get('manifest_stream_number', 0) - a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v' - args.extend(['-map', f'{i}:{a_or_v}:{stream_number}']) + args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): args += ['-fs', compat_str(self._TEST_FILE_SIZE)] diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a47364d07..1565ba5c3 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals import base64 +import collections import datetime import hashlib import itertools @@ -2649,7 +2650,7 @@ class InfoExtractor(object): mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) formats, subtitles = [], {} - stream_numbers = {'audio': 0, 'video': 0} + stream_numbers = collections.defaultdict(int) for period in mpd_doc.findall(_add_ns('Period')): period_duration = parse_duration(period.get('duration')) or mpd_duration period_ms_info = extract_multisegment_info(period, { @@ -2715,10 +2716,8 @@ class InfoExtractor(object): 'format_note': 'DASH %s' % content_type, 'filesize': filesize, 'container': mimetype2ext(mime_type) + '_dash', - 'manifest_stream_number': stream_numbers[content_type] } f.update(parse_codecs(codecs)) - stream_numbers[content_type] += 1 elif content_type == 'text': f = { 'ext': mimetype2ext(mime_type), @@ -2885,7 +2884,9 @@ class InfoExtractor(object): else: # Assuming direct URL to unfragmented media. f['url'] = base_url - if content_type in ('video', 'audio') or mime_type == 'image/jpeg': + if content_type in ('video', 'audio', 'image/jpeg'): + f['manifest_stream_number'] = stream_numbers[f['url']] + stream_numbers[f['url']] += 1 formats.append(f) elif content_type == 'text': subtitles.setdefault(lang or 'und', []).append(f) -- cgit v1.2.3 From 1ee34c76bb6e3a74d5a4d76475469e64dc201063 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 01:09:17 +0530 Subject: [vimeo] Add fallback for config URL Closes #1662 --- yt_dlp/extractor/vimeo.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/vimeo.py b/yt_dlp/extractor/vimeo.py index 7df4116f3..e2b86662b 100644 --- a/yt_dlp/extractor/vimeo.py +++ b/yt_dlp/extractor/vimeo.py @@ -604,6 +604,20 @@ class VimeoIE(VimeoBaseInfoExtractor): 'format': 'Original', }, }, + { + 'url': 'https://vimeo.com/channels/staffpicks/143603739', + 'info_dict': { + 'id': '143603739', + 'ext': 'mp4', + 'uploader': 'Karim Huu Do', + 'timestamp': 1445846953, + 'upload_date': '20151026', + 'title': 'The Shoes - Submarine Feat. Blaine Harrison', + 'uploader_id': 'karimhd', + 'description': 'md5:8e2eea76de4504c2e8020a9bcfa1e843', + }, + 'params': {'skip_download': 'm3u8'}, + }, { # requires passing unlisted_hash(a52724358e) to load_download_config request 'url': 'https://vimeo.com/392479337/a52724358e', @@ -798,18 +812,19 @@ class VimeoIE(VimeoBaseInfoExtractor): timestamp = None video_description = None info_dict = {} + config_url = None channel_id = self._search_regex( r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None) if channel_id: config_url = self._html_search_regex( - r'\bdata-config-url="([^"]+)"', webpage, 'config URL') + r'\bdata-config-url="([^"]+)"', webpage, 'config URL', default=None) video_description = clean_html(get_element_by_class('description', webpage)) info_dict.update({ 'channel_id': channel_id, 'channel_url': 'https://vimeo.com/channels/' + channel_id, }) - else: + if not config_url: page_config = self._parse_json(self._search_regex( r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});', webpage, 'page config', default='{}'), video_id, fatal=False) -- cgit v1.2.3 From f7b558df4d76fae77a5bbac62364195891673738 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 01:14:25 +0530 Subject: [mediaklikk] Expand valid URL Partial fix for #1409 --- yt_dlp/extractor/mediaklikk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/mediaklikk.py b/yt_dlp/extractor/mediaklikk.py index b9b6d739f..18ff3befa 100644 --- a/yt_dlp/extractor/mediaklikk.py +++ b/yt_dlp/extractor/mediaklikk.py @@ -12,8 +12,8 @@ from ..compat import ( class MediaKlikkIE(InfoExtractor): - _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)? - (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/ + _VALID_URL = r'''(?x)https?://(?:www\.)? + (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/ (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)? (?P<id>[^/#?_]+)''' -- cgit v1.2.3 From 0e6b018a10e751bc6da59cdf5d55e61cdf975efa Mon Sep 17 00:00:00 2001 From: Zirro <code@zirro.se> Date: Tue, 23 Nov 2021 01:40:53 +0530 Subject: Ensure path for link files exists (#1755) Authored by: Zirro --- yt_dlp/YoutubeDL.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index fb7e12624..5c2d64598 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -2690,6 +2690,8 @@ class YoutubeDL(object): self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') return False linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) + if not self._ensure_dir_exists(encodeFilename(linkfn)): + return False if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True -- cgit v1.2.3 From 14a086058a30a0748b5b716e9b21481f993518f3 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 02:33:41 +0530 Subject: [ARDBetaMediathek] Handle new URLs Adapted from https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf Closes #1601 --- yt_dlp/extractor/ard.py | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 048d30f27..f8d57109e 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -388,7 +388,13 @@ class ARDIE(InfoExtractor): class ARDBetaMediathekIE(ARDMediathekBaseIE): - _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?P<mode>player|live|video|sendung|sammlung)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)' + _VALID_URL = r'''(?x)https:// + (?:(?:beta|www)\.)?ardmediathek\.de/ + (?:(?P<client>[^/]+)/)? + (?:player|live|video|(?P<playlist>sendung|sammlung))/ + (?:(?P<display_id>[^?#]+)/)? + (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+)''' + _TESTS = [{ 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', 'md5': 'a1dc75a39c61601b980648f7c9f9f71d', @@ -403,6 +409,18 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'upload_date': '20200805', 'ext': 'mp4', }, + 'skip': 'Error', + }, { + 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', + 'md5': 'f1837e563323b8a642a8ddeff0131f51', + 'info_dict': { + 'id': '10049223', + 'ext': 'mp4', + 'title': 'tagesschau, 20:00 Uhr', + 'timestamp': 1636398000, + 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', + 'upload_date': '20211108', + }, }, { 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', 'only_matching': True, @@ -426,6 +444,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): # playlist of type 'sammlung' 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', + 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet', + 'only_matching': True, }] def _ARD_load_playlist_snipped(self, playlist_id, display_id, client, mode, pageNumber): @@ -525,20 +549,12 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): return self.playlist_result(entries, playlist_title=display_id) def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('video_id') - display_id = mobj.group('display_id') - if display_id: - display_id = display_id.rstrip('/') - if not display_id: - display_id = video_id - - if mobj.group('mode') in ('sendung', 'sammlung'): - # this is a playlist-URL - return self._ARD_extract_playlist( - url, video_id, display_id, - mobj.group('client'), - mobj.group('mode')) + video_id, display_id, playlist_type, client = self._match_valid_url(url).group( + 'id', 'display_id', 'playlist', 'client') + display_id, client = display_id or video_id, client or 'ard' + + if playlist_type: + return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) player_page = self._download_json( 'https://api.ardmediathek.de/public-gateway', @@ -574,7 +590,7 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): } } } -}''' % (mobj.group('client'), video_id), +}''' % (client, video_id), }).encode(), headers={ 'Content-Type': 'application/json' })['data']['playerPage'] -- cgit v1.2.3 From 8f122fa070dee737077059747731896a603c9e0b Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:11:28 +0530 Subject: [extractor] Extract `average_rating` from JSON-LD Eg: Crunchyroll --- yt_dlp/extractor/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 1565ba5c3..fc28bca2e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1452,6 +1452,9 @@ class InfoExtractor(object): item_type = e.get('@type') if expected_type is not None and expected_type != item_type: continue + rating = traverse_obj(e, ('aggregateRating', 'ratingValue'), expected_type=float_or_none) + if rating is not None: + info['average_rating'] = rating if item_type in ('TVEpisode', 'Episode'): episode_name = unescapeHTML(e.get('name')) info.update({ -- cgit v1.2.3 From bc8ab44ea08995bd4345c9ca149ba82591b600bb Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:13:27 +0530 Subject: [itv] Fix for Python 3.6/3.7 Closes #1758 --- yt_dlp/extractor/itv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 6e6a3673c..5f1d306f6 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -117,7 +117,7 @@ class ITVIE(InfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_subs, featureset_subs = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[2]) == 'outband-webvtt'), (None, None)) @@ -146,7 +146,7 @@ class ITVIE(InfoExtractor): # See: https://github.com/yt-dlp/yt-dlp/issues/986 platform_tag_video, featureset_video = next( ((platform_tag, featureset) - for platform_tag, featuresets in reversed(variants.items()) for featureset in featuresets + for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), (None, None)) if not platform_tag_video or not featureset_video: -- cgit v1.2.3 From d52cd2f5cd54bd100a51fca8e4044b4f2a89fade Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 13:15:49 +0530 Subject: [sbs] Fix for movies and livestreams Closes #1640 --- yt_dlp/extractor/sbs.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/sbs.py b/yt_dlp/extractor/sbs.py index 0a806ee4e..4090f6385 100644 --- a/yt_dlp/extractor/sbs.py +++ b/yt_dlp/extractor/sbs.py @@ -10,7 +10,14 @@ from ..utils import ( class SBSIE(InfoExtractor): IE_DESC = 'sbs.com.au' - _VALID_URL = r'https?://(?:www\.)?sbs\.com\.au/(?:ondemand(?:/video/(?:single/)?|.*?\bplay=|/watch/)|news/(?:embeds/)?video/)(?P<id>[0-9]+)' + _VALID_URL = r'''(?x) + https?://(?:www\.)?sbs\.com\.au/(?: + ondemand(?: + /video/(?:single/)?| + /movie/[^/]+/| + .*?\bplay=|/watch/ + )|news/(?:embeds/)?video/ + )(?P<id>[0-9]+)''' _TESTS = [{ # Original URL is handled by the generic IE which finds the iframe: @@ -46,6 +53,13 @@ class SBSIE(InfoExtractor): }, { 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', 'only_matching': True, + }, { + 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', + 'only_matching': True, + }, { + 'note': 'Live stream', + 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', + 'only_matching': True, }] def _real_extract(self, url): @@ -75,4 +89,5 @@ class SBSIE(InfoExtractor): 'ie_key': 'ThePlatform', 'id': video_id, 'url': smuggle_url(self._proto_relative_url(theplatform_url), {'force_smil_url': True}), + 'is_live': player_params.get('streamType') == 'live', } -- cgit v1.2.3 From e5d731f35dce2e0eb82d7877d6e1001d5e18ced9 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 17:15:41 +0530 Subject: [tv2] Expand valid URL Closes #1764 --- yt_dlp/extractor/tv2.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/tv2.py b/yt_dlp/extractor/tv2.py index e0851531c..da351eeb0 100644 --- a/yt_dlp/extractor/tv2.py +++ b/yt_dlp/extractor/tv2.py @@ -19,7 +19,7 @@ from ..utils import ( class TV2IE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?tv2\.no/v\d*/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.tv2.no/v/916509/', 'info_dict': { @@ -33,6 +33,9 @@ class TV2IE(InfoExtractor): 'view_count': int, 'categories': list, }, + }, { + 'url': 'http://www.tv2.no/v2/916509', + 'only_matching': True, }] _PROTOCOLS = ('HLS', 'DASH') _GEO_COUNTRIES = ['NO'] -- cgit v1.2.3 From 57dbe8077f8d00e0fffac53669f40cd7d584474f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 20:33:55 +0530 Subject: [jsinterp] Fix splice to handle float Needed for new youtube js player f1ca6900 Closes #1767 --- test/test_youtube_signature.py | 4 ++++ yt_dlp/jsinterp.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index df4c36047..3359ac457 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -78,6 +78,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', ), + ( + 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', + 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', + ), ] diff --git a/yt_dlp/jsinterp.py b/yt_dlp/jsinterp.py index bb2a0ae0b..a6084ab82 100644 --- a/yt_dlp/jsinterp.py +++ b/yt_dlp/jsinterp.py @@ -397,7 +397,7 @@ class JSInterpreter(object): elif member == 'splice': assertion(isinstance(obj, list), 'must be applied on a list') assertion(argvals, 'takes one or more arguments') - index, howMany = (argvals + [len(obj)])[:2] + index, howMany = map(int, (argvals + [len(obj)])[:2]) if index < 0: index += len(obj) add_items = argvals[2:] -- cgit v1.2.3 From ff51ed588fa75256b98ead67bdef7edda08b66f0 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Tue, 23 Nov 2021 20:38:30 +0530 Subject: Clarify video/audio-only formats in -F Related: #1759 --- yt_dlp/YoutubeDL.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5c2d64598..b983b1775 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -847,6 +847,7 @@ class YoutubeDL(object): DELIM = 'blue' ERROR = 'red' WARNING = 'yellow' + SUPPRESS = 'light black' def __format_text(self, out, text, f, fallback=None, *, test_encoding=False): assert out in ('screen', 'err') @@ -3149,22 +3150,17 @@ class YoutubeDL(object): @staticmethod def format_resolution(format, default='unknown'): - is_images = format.get('vcodec') == 'none' and format.get('acodec') == 'none' if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] if format.get('width') and format.get('height'): - res = '%dx%d' % (format['width'], format['height']) + return '%dx%d' % (format['width'], format['height']) elif format.get('height'): - res = '%sp' % format['height'] + return '%sp' % format['height'] elif format.get('width'): - res = '%dx?' % format['width'] - elif is_images: - return 'images' - else: - return default - return f'img {res}' if is_images else res + return '%dx?' % format['width'] + return default def _format_note(self, fdict): res = '' @@ -3236,7 +3232,7 @@ class YoutubeDL(object): [ self._format_screen(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), - self.format_resolution(f), + format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, 'fps', '\t%d'), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), delim, @@ -3244,9 +3240,15 @@ class YoutubeDL(object): format_field(f, 'tbr', '\t%dk'), shorten_protocol_name(f.get('protocol', '').replace('native', 'n')), delim, - format_field(f, 'vcodec', default='unknown').replace('none', ''), + format_field(f, 'vcodec', default='unknown').replace( + 'none', + 'images' if f.get('acodec') == 'none' + else self._format_screen('audio only', self.Styles.SUPPRESS)), format_field(f, 'vbr', '\t%dk'), - format_field(f, 'acodec', default='unknown').replace('none', ''), + format_field(f, 'acodec', default='unknown').replace( + 'none', + '' if f.get('vcodec') == 'none' + else self._format_screen('video only', self.Styles.SUPPRESS)), format_field(f, 'abr', '\t%dk'), format_field(f, 'asr', '\t%dHz'), join_nonempty( -- cgit v1.2.3 From 9941a1e12750c3df1350c505250ee88a230a208c Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 08:28:36 +0530 Subject: [PatreonUser] Do not capture RSS URLs Closes #1777 --- yt_dlp/extractor/patreon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/patreon.py b/yt_dlp/extractor/patreon.py index c7d316efc..d3ee071e0 100644 --- a/yt_dlp/extractor/patreon.py +++ b/yt_dlp/extractor/patreon.py @@ -191,7 +191,7 @@ class PatreonIE(InfoExtractor): class PatreonUserIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?P<id>[-_\w\d]+)/?(?:posts/?)?' + _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?!rss)(?P<id>[-\w]+)' _TESTS = [{ 'url': 'https://www.patreon.com/dissonancepod/', -- cgit v1.2.3 From a6213a49250129f25e8f435ff3fadf4a3237f6e1 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 08:31:52 +0530 Subject: [cleanup,youtube] Reorganize Tab and Search extractor inheritances --- yt_dlp/extractor/youtube.py | 1219 ++++++++++++++++++++++--------------------- 1 file changed, 610 insertions(+), 609 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 632129bc6..a8d515f5c 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -44,6 +44,7 @@ from ..utils import ( join_nonempty, mimetype2ext, network_exceptions, + NO_DEFAULT, orderedSet, parse_codecs, parse_count, @@ -3116,508 +3117,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return info +class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): -class YoutubeTabIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube Tabs' - _VALID_URL = r'''(?x) - https?:// - (?:\w+\.)? - (?: - youtube(?:kids)?\.com| - %(invidious)s - )/ - (?: - (?P<channel_type>channel|c|user|browse)/| - (?P<not_channel> - feed/|hashtag/| - (?:playlist|watch)\?.*?\blist= - )| - (?!(?:%(reserved_names)s)\b) # Direct URLs - ) - (?P<id>[^/?\#&]+) - ''' % { - 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } - IE_NAME = 'youtube:tab' - - _TESTS = [{ - 'note': 'playlists, multipage', - 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', - 'playlist_mincount': 94, - 'info_dict': { - 'id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'title': 'Игорь Клейнер - Playlists', - 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', - 'uploader': 'Игорь Клейнер', - 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', - }, - }, { - 'note': 'playlists, multipage, different order', - 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', - 'playlist_mincount': 94, - 'info_dict': { - 'id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'title': 'Игорь Клейнер - Playlists', - 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', - 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'uploader': 'Игорь Клейнер', - }, - }, { - 'note': 'playlists, series', - 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', - 'playlist_mincount': 5, - 'info_dict': { - 'id': 'UCYO_jab_esuFRV4b17AJtAw', - 'title': '3Blue1Brown - Playlists', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', - 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', - 'uploader': '3Blue1Brown', - }, - }, { - 'note': 'playlists, singlepage', - 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', - 'playlist_mincount': 4, - 'info_dict': { - 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', - 'title': 'ThirstForScience - Playlists', - 'description': 'md5:609399d937ea957b0f53cbffb747a14c', - 'uploader': 'ThirstForScience', - 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', - } - }, { - 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', - 'only_matching': True, - }, { - 'note': 'basic, single video playlist', - 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', - 'info_dict': { - 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', - 'uploader': 'Sergey M.', - 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', - 'title': 'youtube-dl public playlist', - }, - 'playlist_count': 1, - }, { - 'note': 'empty playlist', - 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', - 'info_dict': { - 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', - 'uploader': 'Sergey M.', - 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', - 'title': 'youtube-dl empty playlist', - }, - 'playlist_count': 0, - }, { - 'note': 'Home tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Home', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 2, - }, { - 'note': 'Videos tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Videos', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 975, - }, { - 'note': 'Videos tab, sorted by popular', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Videos', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 199, - }, { - 'note': 'Playlists tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Playlists', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 17, - }, { - 'note': 'Community tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Community', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 18, - }, { - 'note': 'Channels tab', - 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', - 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'title': 'lex will - Channels', - 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', - 'uploader': 'lex will', - 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - }, - 'playlist_mincount': 12, - }, { - 'note': 'Search tab', - 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', - 'playlist_mincount': 40, - 'info_dict': { - 'id': 'UCYO_jab_esuFRV4b17AJtAw', - 'title': '3Blue1Brown - Search - linear algebra', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', - 'uploader': '3Blue1Brown', - 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', - }, - }, { - 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'only_matching': True, - }, { - 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'only_matching': True, - }, { - 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'only_matching': True, - }, { - 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', - 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', - 'info_dict': { - 'title': '29C3: Not my department', - 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', - 'uploader': 'Christiaan008', - 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', - 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268', - }, - 'playlist_count': 96, - }, { - 'note': 'Large playlist', - 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', - 'info_dict': { - 'title': 'Uploads from Cauchemar', - 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', - 'uploader': 'Cauchemar', - 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', - }, - 'playlist_mincount': 1123, - }, { - 'note': 'even larger playlist, 8832 videos', - 'url': 'http://www.youtube.com/user/NASAgovVideo/videos', - 'only_matching': True, - }, { - 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', - 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', - 'info_dict': { - 'title': 'Uploads from Interstellar Movie', - 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', - 'uploader': 'Interstellar Movie', - 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', - }, - 'playlist_mincount': 21, - }, { - 'note': 'Playlist with "show unavailable videos" button', - 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', - 'info_dict': { - 'title': 'Uploads from Phim Siêu Nhân Nhật Bản', - 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', - 'uploader': 'Phim Siêu Nhân Nhật Bản', - 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', - }, - 'playlist_mincount': 200, - }, { - 'note': 'Playlist with unavailable videos in page 7', - 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w', - 'info_dict': { - 'title': 'Uploads from BlankTV', - 'id': 'UU8l9frL61Yl5KFOl87nIm2w', - 'uploader': 'BlankTV', - 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w', - }, - 'playlist_mincount': 1000, - }, { - 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', - 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', - 'info_dict': { - 'title': 'Data Analysis with Dr Mike Pound', - 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', - 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA', - 'uploader': 'Computerphile', - 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487', - }, - 'playlist_mincount': 11, - }, { - 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', - 'only_matching': True, - }, { - 'note': 'Playlist URL that does not actually serve a playlist', - 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', - 'info_dict': { - 'id': 'FqZTN594JQw', - 'ext': 'webm', - 'title': "Smiley's People 01 detective, Adventure Series, Action", - 'uploader': 'STREEM', - 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', - 'upload_date': '20150526', - 'license': 'Standard YouTube License', - 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', - 'categories': ['People & Blogs'], - 'tags': list, - 'view_count': int, - 'like_count': int, - 'dislike_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'This video is not available.', - 'add_ie': [YoutubeIE.ie_key()], - }, { - 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', - 'info_dict': { - 'id': '3yImotZU3tw', # This will keep changing - 'ext': 'mp4', - 'title': compat_str, - 'uploader': 'Sky News', - 'uploader_id': 'skynews', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', - 'upload_date': r're:\d{8}', - 'description': compat_str, - 'categories': ['News & Politics'], - 'tags': list, - 'like_count': int, - 'dislike_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '], - }, { - 'url': 'https://www.youtube.com/user/TheYoungTurks/live', - 'info_dict': { - 'id': 'a48o2S1cPoo', - 'ext': 'mp4', - 'title': 'The Young Turks - Live Main Show', - 'uploader': 'The Young Turks', - 'uploader_id': 'TheYoungTurks', - 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', - 'upload_date': '20150715', - 'license': 'Standard YouTube License', - 'description': 'md5:438179573adcdff3c97ebb1ee632b891', - 'categories': ['News & Politics'], - 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], - 'like_count': int, - 'dislike_count': int, - }, - 'params': { - 'skip_download': True, - }, - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', - 'only_matching': True, - }, { - 'note': 'A channel that is not live. Should raise error', - 'url': 'https://www.youtube.com/user/numberphile/live', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/trending', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/library', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/history', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/subscriptions', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/feed/watch_later', - 'only_matching': True, - }, { - 'note': 'Recommended - redirects to home page.', - 'url': 'https://www.youtube.com/feed/recommended', - 'only_matching': True, - }, { - 'note': 'inline playlist with not always working continuations', - 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/course', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/zsecurity', - 'only_matching': True, - }, { - 'url': 'http://www.youtube.com/NASAgovVideo/videos', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/TheYoungTurks/live', - 'only_matching': True, - }, { - 'url': 'https://www.youtube.com/hashtag/cctv9', - 'info_dict': { - 'id': 'cctv9', - 'title': '#cctv9', - }, - 'playlist_mincount': 350, - }, { - 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', - 'only_matching': True, - }, { - 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', - 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'only_matching': True - }, { - 'note': '/browse/ should redirect to /channel/', - 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', - 'only_matching': True - }, { - 'note': 'VLPL, should redirect to playlist?list=PL...', - 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'info_dict': { - 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'uploader': 'NoCopyrightSounds', - 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', - 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', - 'title': 'NCS Releases', - }, - 'playlist_mincount': 166, - }, { - 'note': 'Topic, should redirect to playlist?list=UU...', - 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', - 'info_dict': { - 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', - 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', - 'title': 'Uploads from Royalty Free Music - Topic', - 'uploader': 'Royalty Free Music - Topic', - }, - 'expected_warnings': [ - 'A channel/user page was given', - 'The URL does not have a videos tab', - ], - 'playlist_mincount': 101, - }, { - 'note': 'Topic without a UU playlist', - 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', - 'info_dict': { - 'id': 'UCtFRv9O2AHqOZjjynzrv-xg', - 'title': 'UCtFRv9O2AHqOZjjynzrv-xg', - }, - 'expected_warnings': [ - 'A channel/user page was given', - 'The URL does not have a videos tab', - 'Falling back to channel URL', - ], - 'playlist_mincount': 9, - }, { - 'note': 'Youtube music Album', - 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE', - 'info_dict': { - 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', - 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', - }, - 'playlist_count': 50, - }, { - 'note': 'unlisted single video playlist', - 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', - 'info_dict': { - 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', - 'uploader': 'colethedj', - 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', - 'title': 'yt-dlp unlisted playlist test', - 'availability': 'unlisted' - }, - 'playlist_count': 1, - }, { - 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', - 'url': 'https://www.youtube.com/feed/recommended', - 'info_dict': { - 'id': 'recommended', - 'title': 'recommended', - }, - 'playlist_mincount': 50, - 'params': { - 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} - }, - }, { - 'note': 'API Fallback: /videos tab, sorted by oldest first', - 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', - 'info_dict': { - 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', - 'title': 'Cody\'sLab - Videos', - 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', - 'uploader': 'Cody\'sLab', - 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', - }, - 'playlist_mincount': 650, - 'params': { - 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} - }, - }, { - 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', - 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', - 'info_dict': { - 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', - 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', - 'title': 'Uploads from Royalty Free Music - Topic', - 'uploader': 'Royalty Free Music - Topic', - }, - 'expected_warnings': [ - 'A channel/user page was given', - 'The URL does not have a videos tab', - ], - 'playlist_mincount': 101, - 'params': { - 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} - }, - }] - - @classmethod - def suitable(cls, url): - return False if YoutubeIE.suitable(url) else super( - YoutubeTabIE, cls).suitable(url) - - def _extract_channel_id(self, webpage): - channel_id = self._html_search_meta( - 'channelId', webpage, 'channel id', default=None) - if channel_id: - return channel_id - channel_url = self._html_search_meta( - ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', - 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', - 'twitter:app:url:googleplay'), webpage, 'channel url') - return self._search_regex( - r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', - channel_url, 'channel id') + def _extract_channel_id(self, webpage): + channel_id = self._html_search_meta( + 'channelId', webpage, 'channel id', default=None) + if channel_id: + return channel_id + channel_url = self._html_search_meta( + ('og:url', 'al:ios:url', 'al:android:url', 'al:web:url', + 'twitter:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad', + 'twitter:app:url:googleplay'), webpage, 'channel url') + return self._search_regex( + r'https?://(?:www\.)?youtube\.com/channel/([^/?#&])+', + channel_url, 'channel id') @staticmethod def _extract_basic_item_renderer(item): @@ -3787,49 +3300,51 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if entry: yield entry ''' - def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): - - def extract_entries(parent_renderer): # this needs to called again for continuation to work with feeds - contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] - for content in contents: - if not isinstance(content, dict): - continue - is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) - if not is_renderer: - renderer = content.get('richItemRenderer') - if renderer: - for entry in self._rich_entries(renderer): - yield entry - continuation_list[0] = self._extract_continuation(parent_renderer) + def _extract_entries(self, parent_renderer, continuation_list): + # continuation_list is modified in-place with continuation_list = [continuation_token] + continuation_list[:] = [None] + contents = try_get(parent_renderer, lambda x: x['contents'], list) or [] + for content in contents: + if not isinstance(content, dict): + continue + is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) + if not is_renderer: + renderer = content.get('richItemRenderer') + if renderer: + for entry in self._rich_entries(renderer): + yield entry + continuation_list[0] = self._extract_continuation(parent_renderer) + continue + isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] + for isr_content in isr_contents: + if not isinstance(isr_content, dict): continue - isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or [] - for isr_content in isr_contents: - if not isinstance(isr_content, dict): - continue - known_renderers = { - 'playlistVideoListRenderer': self._playlist_entries, - 'gridRenderer': self._grid_entries, - 'shelfRenderer': lambda x: self._shelf_entries(x, tab.get('title') != 'Channels'), - 'backstagePostThreadRenderer': self._post_thread_entries, - 'videoRenderer': lambda x: [self._video_entry(x)], - } - for key, renderer in isr_content.items(): - if key not in known_renderers: - continue - for entry in known_renderers[key](renderer): - if entry: - yield entry - continuation_list[0] = self._extract_continuation(renderer) - break - - if not continuation_list[0]: - continuation_list[0] = self._extract_continuation(is_renderer) + known_renderers = { + 'playlistVideoListRenderer': self._playlist_entries, + 'gridRenderer': self._grid_entries, + 'shelfRenderer': lambda x: self._shelf_entries(x), + 'backstagePostThreadRenderer': self._post_thread_entries, + 'videoRenderer': lambda x: [self._video_entry(x)], + } + for key, renderer in isr_content.items(): + if key not in known_renderers: + continue + for entry in known_renderers[key](renderer): + if entry: + yield entry + continuation_list[0] = self._extract_continuation(renderer) + break if not continuation_list[0]: - continuation_list[0] = self._extract_continuation(parent_renderer) + continuation_list[0] = self._extract_continuation(is_renderer) + + if not continuation_list[0]: + continuation_list[0] = self._extract_continuation(parent_renderer) - continuation_list = [None] # Python 2 does not support nonlocal + def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): + continuation_list = [None] + extract_entries = lambda x: self._extract_entries(x, continuation_list) tab_content = try_get(tab, lambda x: x['content'], dict) if not tab_content: return @@ -4214,12 +3729,556 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): raise ExtractorError(err_note, expected=True) self.report_warning(err_note, item_id) - @staticmethod - def _smuggle_data(entries, data): - for entry in entries: - if data: - entry['url'] = smuggle_url(entry['url'], data) - yield entry + @staticmethod + def _smuggle_data(entries, data): + for entry in entries: + if data: + entry['url'] = smuggle_url(entry['url'], data) + yield entry + + _SEARCH_PARAMS = None + + def _search_results(self, query, params=NO_DEFAULT): + data = {'query': query} + if params is NO_DEFAULT: + params = self._SEARCH_PARAMS + if params: + data['params'] = params + continuation = {} + for page_num in itertools.count(1): + data.update(continuation) + search = self._extract_response( + item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, + check_get_keys=('contents', 'onResponseReceivedCommands') + ) + if not search: + break + slr_contents = try_get( + search, + (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], + lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), + list) + if not slr_contents: + break + + # Youtube sometimes adds promoted content to searches, + # changing the index location of videos and token. + # So we search through all entries till we find them. + continuation = None + for slr_content in slr_contents: + if not continuation: + continuation = self._extract_continuation({'contents': [slr_content]}) + + isr_contents = try_get( + slr_content, + lambda x: x['itemSectionRenderer']['contents'], + list) + if not isr_contents: + continue + for content in isr_contents: + if not isinstance(content, dict): + continue + video = content.get('videoRenderer') + if not isinstance(video, dict): + continue + video_id = video.get('videoId') + if not video_id: + continue + + yield self._extract_video(video) + + if not continuation: + break + + +class YoutubeTabIE(YoutubeTabBaseInfoExtractor): + IE_DESC = 'YouTube Tabs' + _VALID_URL = r'''(?x: + https?:// + (?:\w+\.)? + (?: + youtube(?:kids)?\.com| + %(invidious)s + )/ + (?: + (?P<channel_type>channel|c|user|browse)/| + (?P<not_channel> + feed/|hashtag/| + (?:playlist|watch)\?.*?\blist= + )| + (?!(?:%(reserved_names)s)\b) # Direct URLs + ) + (?P<id>[^/?\#&]+) + )''' % { + 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, + 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + } + IE_NAME = 'youtube:tab' + + _TESTS = [{ + 'note': 'playlists, multipage', + 'url': 'https://www.youtube.com/c/ИгорьКлейнер/playlists?view=1&flow=grid', + 'playlist_mincount': 94, + 'info_dict': { + 'id': 'UCqj7Cz7revf5maW9g5pgNcg', + 'title': 'Игорь Клейнер - Playlists', + 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', + 'uploader': 'Игорь Клейнер', + 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', + }, + }, { + 'note': 'playlists, multipage, different order', + 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', + 'playlist_mincount': 94, + 'info_dict': { + 'id': 'UCqj7Cz7revf5maW9g5pgNcg', + 'title': 'Игорь Клейнер - Playlists', + 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', + 'uploader_id': 'UCqj7Cz7revf5maW9g5pgNcg', + 'uploader': 'Игорь Клейнер', + }, + }, { + 'note': 'playlists, series', + 'url': 'https://www.youtube.com/c/3blue1brown/playlists?view=50&sort=dd&shelf_id=3', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'title': '3Blue1Brown - Playlists', + 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'uploader': '3Blue1Brown', + }, + }, { + 'note': 'playlists, singlepage', + 'url': 'https://www.youtube.com/user/ThirstForScience/playlists', + 'playlist_mincount': 4, + 'info_dict': { + 'id': 'UCAEtajcuhQ6an9WEzY9LEMQ', + 'title': 'ThirstForScience - Playlists', + 'description': 'md5:609399d937ea957b0f53cbffb747a14c', + 'uploader': 'ThirstForScience', + 'uploader_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', + } + }, { + 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', + 'only_matching': True, + }, { + 'note': 'basic, single video playlist', + 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', + 'info_dict': { + 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', + 'uploader': 'Sergey M.', + 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', + 'title': 'youtube-dl public playlist', + }, + 'playlist_count': 1, + }, { + 'note': 'empty playlist', + 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', + 'info_dict': { + 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA', + 'uploader': 'Sergey M.', + 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf', + 'title': 'youtube-dl empty playlist', + }, + 'playlist_count': 0, + }, { + 'note': 'Home tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/featured', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Home', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 2, + }, { + 'note': 'Videos tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Videos', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 975, + }, { + 'note': 'Videos tab, sorted by popular', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/videos?view=0&sort=p&flow=grid', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Videos', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 199, + }, { + 'note': 'Playlists tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/playlists', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Playlists', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 17, + }, { + 'note': 'Community tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/community', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Community', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 18, + }, { + 'note': 'Channels tab', + 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w/channels', + 'info_dict': { + 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'lex will - Channels', + 'description': 'md5:2163c5d0ff54ed5f598d6a7e6211e488', + 'uploader': 'lex will', + 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + }, + 'playlist_mincount': 12, + }, { + 'note': 'Search tab', + 'url': 'https://www.youtube.com/c/3blue1brown/search?query=linear%20algebra', + 'playlist_mincount': 40, + 'info_dict': { + 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'title': '3Blue1Brown - Search - linear algebra', + 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'uploader': '3Blue1Brown', + 'uploader_id': 'UCYO_jab_esuFRV4b17AJtAw', + }, + }, { + 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', + 'only_matching': True, + }, { + 'url': 'https://www.youtubekids.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', + 'only_matching': True, + }, { + 'url': 'https://music.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', + 'only_matching': True, + }, { + 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.', + 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'info_dict': { + 'title': '29C3: Not my department', + 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC', + 'uploader': 'Christiaan008', + 'uploader_id': 'UCEPzS1rYsrkqzSLNp76nrcg', + 'description': 'md5:a14dc1a8ef8307a9807fe136a0660268', + }, + 'playlist_count': 96, + }, { + 'note': 'Large playlist', + 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q', + 'info_dict': { + 'title': 'Uploads from Cauchemar', + 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q', + 'uploader': 'Cauchemar', + 'uploader_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q', + }, + 'playlist_mincount': 1123, + }, { + 'note': 'even larger playlist, 8832 videos', + 'url': 'http://www.youtube.com/user/NASAgovVideo/videos', + 'only_matching': True, + }, { + 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos', + 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA', + 'info_dict': { + 'title': 'Uploads from Interstellar Movie', + 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', + 'uploader': 'Interstellar Movie', + 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', + }, + 'playlist_mincount': 21, + }, { + 'note': 'Playlist with "show unavailable videos" button', + 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', + 'info_dict': { + 'title': 'Uploads from Phim Siêu Nhân Nhật Bản', + 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', + 'uploader': 'Phim Siêu Nhân Nhật Bản', + 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', + }, + 'playlist_mincount': 200, + }, { + 'note': 'Playlist with unavailable videos in page 7', + 'url': 'https://www.youtube.com/playlist?list=UU8l9frL61Yl5KFOl87nIm2w', + 'info_dict': { + 'title': 'Uploads from BlankTV', + 'id': 'UU8l9frL61Yl5KFOl87nIm2w', + 'uploader': 'BlankTV', + 'uploader_id': 'UC8l9frL61Yl5KFOl87nIm2w', + }, + 'playlist_mincount': 1000, + }, { + 'note': 'https://github.com/ytdl-org/youtube-dl/issues/21844', + 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'info_dict': { + 'title': 'Data Analysis with Dr Mike Pound', + 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', + 'uploader_id': 'UC9-y-6csu5WGm29I7JiwpnA', + 'uploader': 'Computerphile', + 'description': 'md5:7f567c574d13d3f8c0954d9ffee4e487', + }, + 'playlist_mincount': 11, + }, { + 'url': 'https://invidio.us/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc', + 'only_matching': True, + }, { + 'note': 'Playlist URL that does not actually serve a playlist', + 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', + 'info_dict': { + 'id': 'FqZTN594JQw', + 'ext': 'webm', + 'title': "Smiley's People 01 detective, Adventure Series, Action", + 'uploader': 'STREEM', + 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'upload_date': '20150526', + 'license': 'Standard YouTube License', + 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', + 'categories': ['People & Blogs'], + 'tags': list, + 'view_count': int, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'skip': 'This video is not available.', + 'add_ie': [YoutubeIE.ie_key()], + }, { + 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', + 'info_dict': { + 'id': '3yImotZU3tw', # This will keep changing + 'ext': 'mp4', + 'title': compat_str, + 'uploader': 'Sky News', + 'uploader_id': 'skynews', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/skynews', + 'upload_date': r're:\d{8}', + 'description': compat_str, + 'categories': ['News & Politics'], + 'tags': list, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'expected_warnings': ['Downloading just video ', 'Ignoring subtitle tracks found in '], + }, { + 'url': 'https://www.youtube.com/user/TheYoungTurks/live', + 'info_dict': { + 'id': 'a48o2S1cPoo', + 'ext': 'mp4', + 'title': 'The Young Turks - Live Main Show', + 'uploader': 'The Young Turks', + 'uploader_id': 'TheYoungTurks', + 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', + 'upload_date': '20150715', + 'license': 'Standard YouTube License', + 'description': 'md5:438179573adcdff3c97ebb1ee632b891', + 'categories': ['News & Politics'], + 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/CommanderVideoHq/live', + 'only_matching': True, + }, { + 'note': 'A channel that is not live. Should raise error', + 'url': 'https://www.youtube.com/user/numberphile/live', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/trending', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/library', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/history', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/subscriptions', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/feed/watch_later', + 'only_matching': True, + }, { + 'note': 'Recommended - redirects to home page.', + 'url': 'https://www.youtube.com/feed/recommended', + 'only_matching': True, + }, { + 'note': 'inline playlist with not always working continuations', + 'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/course', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/zsecurity', + 'only_matching': True, + }, { + 'url': 'http://www.youtube.com/NASAgovVideo/videos', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/TheYoungTurks/live', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/hashtag/cctv9', + 'info_dict': { + 'id': 'cctv9', + 'title': '#cctv9', + }, + 'playlist_mincount': 350, + }, { + 'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', + 'only_matching': True, + }, { + 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', + 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'only_matching': True + }, { + 'note': '/browse/ should redirect to /channel/', + 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', + 'only_matching': True + }, { + 'note': 'VLPL, should redirect to playlist?list=PL...', + 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'info_dict': { + 'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', + 'uploader': 'NoCopyrightSounds', + 'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!', + 'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg', + 'title': 'NCS Releases', + }, + 'playlist_mincount': 166, + }, { + 'note': 'Topic, should redirect to playlist?list=UU...', + 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', + 'info_dict': { + 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', + 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', + 'title': 'Uploads from Royalty Free Music - Topic', + 'uploader': 'Royalty Free Music - Topic', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + ], + 'playlist_mincount': 101, + }, { + 'note': 'Topic without a UU playlist', + 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', + 'info_dict': { + 'id': 'UCtFRv9O2AHqOZjjynzrv-xg', + 'title': 'UCtFRv9O2AHqOZjjynzrv-xg', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + 'Falling back to channel URL', + ], + 'playlist_mincount': 9, + }, { + 'note': 'Youtube music Album', + 'url': 'https://music.youtube.com/browse/MPREb_gTAcphH99wE', + 'info_dict': { + 'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0', + 'title': 'Album - Royalty Free Music Library V2 (50 Songs)', + }, + 'playlist_count': 50, + }, { + 'note': 'unlisted single video playlist', + 'url': 'https://www.youtube.com/playlist?list=PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', + 'info_dict': { + 'uploader_id': 'UC9zHu_mHU96r19o-wV5Qs1Q', + 'uploader': 'colethedj', + 'id': 'PLwL24UFy54GrB3s2KMMfjZscDi1x5Dajf', + 'title': 'yt-dlp unlisted playlist test', + 'availability': 'unlisted' + }, + 'playlist_count': 1, + }, { + 'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData', + 'url': 'https://www.youtube.com/feed/recommended', + 'info_dict': { + 'id': 'recommended', + 'title': 'recommended', + }, + 'playlist_mincount': 50, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: /videos tab, sorted by oldest first', + 'url': 'https://www.youtube.com/user/theCodyReeder/videos?view=0&sort=da&flow=grid', + 'info_dict': { + 'id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + 'title': 'Cody\'sLab - Videos', + 'description': 'md5:d083b7c2f0c67ee7a6c74c3e9b4243fa', + 'uploader': 'Cody\'sLab', + 'uploader_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', + }, + 'playlist_mincount': 650, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }, { + 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', + 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', + 'info_dict': { + 'id': 'UU9ALqqC4aIeG5iDs7i90Bfw', + 'uploader_id': 'UC9ALqqC4aIeG5iDs7i90Bfw', + 'title': 'Uploads from Royalty Free Music - Topic', + 'uploader': 'Royalty Free Music - Topic', + }, + 'expected_warnings': [ + 'A channel/user page was given', + 'The URL does not have a videos tab', + ], + 'playlist_mincount': 101, + 'params': { + 'skip_download': True, + 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + }, + }] + + @classmethod + def suitable(cls, url): + return False if YoutubeIE.suitable(url) else super( + YoutubeTabIE, cls).suitable(url) def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) @@ -4506,77 +4565,24 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): ie=YoutubeTabIE.ie_key()) -class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): - IE_DESC = 'YouTube searches' +class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): + IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _SEARCH_PARAMS = None _TESTS = [] - def _search_results(self, query): - data = {'query': query} - if self._SEARCH_PARAMS: - data['params'] = self._SEARCH_PARAMS - continuation = {} - for page_num in itertools.count(1): - data.update(continuation) - search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands') - ) - if not search: - break - slr_contents = try_get( - search, - (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], - lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), - list) - if not slr_contents: - break - - # Youtube sometimes adds promoted content to searches, - # changing the index location of videos and token. - # So we search through all entries till we find them. - continuation = None - for slr_content in slr_contents: - if not continuation: - continuation = self._extract_continuation({'contents': [slr_content]}) - - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - - yield self._extract_video(video) - - if not continuation: - break - - -class YoutubeSearchDateIE(YoutubeSearchIE): +class YoutubeSearchDateIE(SearchInfoExtractor, YoutubeTabBaseInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube searches, newest videos first' + IE_DESC = 'YouTube search, newest videos first' _SEARCH_PARAMS = 'CAI%3D' -class YoutubeSearchURLIE(YoutubeSearchIE): +class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' - _SEARCH_KEY = None _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' - # _MAX_RESULTS = 100 _TESTS = [{ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'playlist_mincount': 5, @@ -4589,15 +4595,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE): 'only_matching': True, }] - @classmethod - def _make_valid_url(cls): - return cls._VALID_URL - def _real_extract(self, url): qs = parse_qs(url) query = (qs.get('search_query') or qs.get('q'))[0] - self._SEARCH_PARAMS = qs.get('sp', ('',))[0] - return self._get_n_results(query, self._MAX_RESULTS) + return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query) class YoutubeFeedsInfoExtractor(YoutubeTabIE): -- cgit v1.2.3 From a61fd4cf6fa23b05729396ae342a5fe9785c231f Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 09:27:59 +0530 Subject: [youtube:search_url] Add playlist/channel support Closes #1213, #1214 --- yt_dlp/extractor/youtube.py | 57 +++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a8d515f5c..ba135613b 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -3117,6 +3117,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return info + class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): def _extract_channel_id(self, webpage): @@ -3326,6 +3327,8 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): 'shelfRenderer': lambda x: self._shelf_entries(x), 'backstagePostThreadRenderer': self._post_thread_entries, 'videoRenderer': lambda x: [self._video_entry(x)], + 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), + 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), } for key, renderer in isr_content.items(): if key not in known_renderers: @@ -3744,50 +3747,19 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): params = self._SEARCH_PARAMS if params: data['params'] = params - continuation = {} + continuation_list = [None] for page_num in itertools.count(1): - data.update(continuation) + data.update(continuation_list[0] or {}) search = self._extract_response( item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands') - ) - if not search: - break + check_get_keys=('contents', 'onResponseReceivedCommands')) slr_contents = try_get( search, (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), list) - if not slr_contents: - break - - # Youtube sometimes adds promoted content to searches, - # changing the index location of videos and token. - # So we search through all entries till we find them. - continuation = None - for slr_content in slr_contents: - if not continuation: - continuation = self._extract_continuation({'contents': [slr_content]}) - - isr_contents = try_get( - slr_content, - lambda x: x['itemSectionRenderer']['contents'], - list) - if not isr_contents: - continue - for content in isr_contents: - if not isinstance(content, dict): - continue - video = content.get('videoRenderer') - if not isinstance(video, dict): - continue - video_id = video.get('videoId') - if not video_id: - continue - - yield self._extract_video(video) - - if not continuation: + yield from self._extract_entries({'contents': slr_contents}, continuation_list) + if not continuation_list[0]: break @@ -4569,14 +4541,15 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' - _SEARCH_PARAMS = None + _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only _TESTS = [] + class YoutubeSearchDateIE(SearchInfoExtractor, YoutubeTabBaseInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube search, newest videos first' - _SEARCH_PARAMS = 'CAI%3D' + _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): @@ -4590,6 +4563,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', } + }, { + 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'python', + 'title': 'python', + } + }, { 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', 'only_matching': True, -- cgit v1.2.3 From fec41d17a587ff18f375c9ec96ee8bc748b57236 Mon Sep 17 00:00:00 2001 From: Sipherdrakon <64430430+Sipherdrakon@users.noreply.github.com> Date: Wed, 24 Nov 2021 03:01:49 -0500 Subject: [MTV] Improve mgid extraction (#1713) Original PR: https://github.com/ytdl-org/youtube-dl/pull/30149 Fixes: #713, #1580, https://github.com/ytdl-org/youtube-dl/issues/30139 Authored by: Sipherdrakon, kikuyan --- yt_dlp/extractor/mtv.py | 20 +++++++++++--------- yt_dlp/extractor/southpark.py | 17 ++++++++--------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py index 4812f11cc..be5de0a70 100644 --- a/yt_dlp/extractor/mtv.py +++ b/yt_dlp/extractor/mtv.py @@ -306,21 +306,23 @@ class MTVServicesInfoExtractor(InfoExtractor): if not mgid: mgid = self._extract_triforce_mgid(webpage) - if not mgid: - mgid = self._search_regex( - r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None) - - if not mgid: - mgid = self._search_regex( - r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None) - if not mgid: data = self._parse_json(self._search_regex( r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None) main_container = self._extract_child_with_type(data, 'MainContainer') ab_testing = self._extract_child_with_type(main_container, 'ABTesting') video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer') - mgid = video_player['props']['media']['video']['config']['uri'] + if video_player: + mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri']) + else: + flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper') + auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper') + player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player') + if player: + mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid']) + + if not mgid: + raise ExtractorError('Could not extract mgid') return mgid diff --git a/yt_dlp/extractor/southpark.py b/yt_dlp/extractor/southpark.py index d49749467..942a52dcf 100644 --- a/yt_dlp/extractor/southpark.py +++ b/yt_dlp/extractor/southpark.py @@ -6,19 +6,18 @@ from .mtv import MTVServicesInfoExtractor class SouthParkIE(MTVServicesInfoExtractor): IE_NAME = 'southpark.cc.com' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/((?:video-)?clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))' _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed' _TESTS = [{ - 'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured', + 'url': 'https://southpark.cc.com/video-clips/d7wr06/south-park-you-all-agreed-to-counseling', 'info_dict': { - 'id': 'a7bff6c2-ed00-11e0-aca6-0026b9414f30', 'ext': 'mp4', - 'title': 'South Park|Bat Daded', - 'description': 'Randy disqualifies South Park by getting into a fight with Bat Dad.', - 'timestamp': 1112760000, - 'upload_date': '20050406', + 'title': 'You All Agreed to Counseling', + 'description': 'Kenny, Cartman, Stan, and Kyle visit Mr. Mackey and ask for his help getting Mrs. Nelson to come back. Mr. Mackey reveals the only way to get things back to normal is to get the teachers vaccinated.', + 'timestamp': 1615352400, + 'upload_date': '20210310', }, }, { 'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1', @@ -40,11 +39,11 @@ class SouthParkIE(MTVServicesInfoExtractor): class SouthParkEsIE(SouthParkIE): IE_NAME = 'southpark.cc.com:español' - _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/episodios-en-espanol/(?P<id>.+?)(\?|#|$))' + _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/es/episodios/(?P<id>.+?)(\?|#|$))' _LANG = 'es' _TESTS = [{ - 'url': 'http://southpark.cc.com/episodios-en-espanol/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', + 'url': 'http://southpark.cc.com/es/episodios/s01e01-cartman-consigue-una-sonda-anal#source=351c1323-0b96-402d-a8b9-40d01b2e9bde&position=1&sort=!airdate', 'info_dict': { 'title': 'Cartman Consigue Una Sonda Anal', 'description': 'Cartman Consigue Una Sonda Anal', -- cgit v1.2.3 From da27aeea5c4eb8e381b8cb34d3ead8c6487d1e67 Mon Sep 17 00:00:00 2001 From: Tim <staubichsauger@t-online.de> Date: Wed, 24 Nov 2021 11:08:58 +0100 Subject: [ITV] Fix extractor (#1776) Closes #1775 Authored by: staubichsauger --- yt_dlp/extractor/itv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/itv.py b/yt_dlp/extractor/itv.py index 5f1d306f6..bdd6af688 100644 --- a/yt_dlp/extractor/itv.py +++ b/yt_dlp/extractor/itv.py @@ -147,7 +147,7 @@ class ITVIE(InfoExtractor): platform_tag_video, featureset_video = next( ((platform_tag, featureset) for platform_tag, featuresets in reversed(list(variants.items())) for featureset in featuresets - if try_get(featureset, lambda x: x[:2]) == ['hls', 'aes']), + if set(try_get(featureset, lambda x: x[:2]) or []) == {'aes', 'hls'}), (None, None)) if not platform_tag_video or not featureset_video: raise ExtractorError('No downloads available', expected=True, video_id=video_id) -- cgit v1.2.3 From 17b454066224453b0adc795c5a990b35b97c9ffb Mon Sep 17 00:00:00 2001 From: Aurora <nyaurora@disroot.org> Date: Wed, 24 Nov 2021 10:47:53 +0000 Subject: [radiozet] Add extractor (#1593) Authored by: 0xA7404A (Aurora) --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/radiozet.py | 51 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 yt_dlp/extractor/radiozet.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a0f4908f0..4dda3705a 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1168,6 +1168,7 @@ from .radiode import RadioDeIE from .radiojavan import RadioJavanIE from .radiobremen import RadioBremenIE from .radiofrance import RadioFranceIE +from .radiozet import RadioZetPodcastIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, diff --git a/yt_dlp/extractor/radiozet.py b/yt_dlp/extractor/radiozet.py new file mode 100644 index 000000000..2e1ff36c2 --- /dev/null +++ b/yt_dlp/extractor/radiozet.py @@ -0,0 +1,51 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import ( + traverse_obj, + strip_or_none, +) + + +class RadioZetPodcastIE(InfoExtractor): + _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' + _TEST = { + 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', + 'info_dict': { + 'id': '42154', + 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', + 'title': 'O przedmiotach szkolnych, które przydają się w życiu', + 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', + 'release_timestamp': 1592985480, + 'ext': 'mp3', + 'thumbnail': r're:^https?://.*\.png$', + 'duration': 83, + 'series': 'Nie Ma Za Co', + 'creator': 'Katarzyna Pakosińska', + } + } + + def _call_api(self, podcast_id, display_id): + return self._download_json( + f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', + display_id) + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', + webpage, 'podcast id') + data = self._call_api(podcast_id, display_id)['data'][0] + + return { + 'id': podcast_id, + 'display_id': display_id, + 'title': strip_or_none(data.get('title')), + 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), + 'release_timestamp': data.get('published_date'), + 'url': traverse_obj(data, ('player', 'stream')), + 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), + 'duration': traverse_obj(data, ('player', 'duration')), + 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), + 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), + } -- cgit v1.2.3 From eb56d132d21752fa50e0dd2c3bfa3d983ad48655 Mon Sep 17 00:00:00 2001 From: pukkandan <pukkandan.ytdlp@gmail.com> Date: Wed, 24 Nov 2021 18:22:42 +0530 Subject: [cleanup,instagram] Refactor extractors Closes #1561 --- yt_dlp/extractor/instagram.py | 285 +++++++++++++++++------------------------- 1 file changed, 114 insertions(+), 171 deletions(-) diff --git a/yt_dlp/extractor/instagram.py b/yt_dlp/extractor/instagram.py index 0e726423e..1fcf97a19 100644 --- a/yt_dlp/extractor/instagram.py +++ b/yt_dlp/extractor/instagram.py @@ -1,5 +1,4 @@ # coding: utf-8 -from __future__ import unicode_literals import itertools import hashlib @@ -9,7 +8,6 @@ import time from .common import InfoExtractor from ..compat import ( - compat_str, compat_HTTPError, ) from ..utils import ( @@ -19,9 +17,8 @@ from ..utils import ( int_or_none, lowercase_escape, std_headers, - try_get, + traverse_obj, url_or_none, - variadic, urlencode_postdata, ) @@ -72,6 +69,58 @@ class InstagramBaseIE(InfoExtractor): def _real_initialize(self): self._login() + def _get_count(self, media, kind, *keys): + return traverse_obj( + media, (kind, 'count'), *((f'edge_media_{key}', 'count') for key in keys), + expected_type=int_or_none) + + def _get_dimension(self, name, media, webpage=None): + return ( + traverse_obj(media, ('dimensions', name), expected_type=int_or_none) + or int_or_none(self._html_search_meta( + (f'og:video:{name}', f'video:{name}'), webpage or '', default=None))) + + def _extract_nodes(self, nodes, is_direct=False): + for idx, node in enumerate(nodes, start=1): + if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: + continue + + video_id = node.get('shortcode') + + if is_direct: + info = { + 'id': video_id or node['id'], + 'url': node.get('video_url'), + 'width': self._get_dimension('width', node), + 'height': self._get_dimension('height', node), + 'http_headers': { + 'Referer': 'https://www.instagram.com/', + } + } + elif not video_id: + continue + else: + info = { + '_type': 'url', + 'ie_key': 'Instagram', + 'id': video_id, + 'url': f'https://instagram.com/p/{video_id}', + } + + yield { + **info, + 'title': node.get('title') or (f'Video {idx}' if is_direct else None), + 'description': traverse_obj( + node, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str), + 'thumbnail': traverse_obj( + node, 'display_url', 'thumbnail_src', 'display_src', expected_type=url_or_none), + 'duration': float_or_none(node.get('video_duration')), + 'timestamp': int_or_none(node.get('taken_at_timestamp')), + 'view_count': int_or_none(node.get('video_view_count')), + 'comment_count': self._get_count(node, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), + 'like_count': self._get_count(node, 'likes', 'preview_like'), + } + class InstagramIOSIE(InfoExtractor): IE_DESC = 'IOS instagram:// URL' @@ -234,29 +283,22 @@ class InstagramIE(InstagramBaseIE): return mobj.group('link') def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - url = mobj.group('url') - + video_id, url = self._match_valid_url(url).group('id', 'url') webpage, urlh = self._download_webpage_handle(url, video_id) - if 'www.instagram.com/accounts/login' in urlh.geturl().rstrip('/'): + if 'www.instagram.com/accounts/login' in urlh.geturl(): self.raise_login_required('You need to log in to access this content') - (media, video_url, description, thumbnails, timestamp, uploader, - uploader_id, like_count, comment_count, comments, height, - width) = [None] * 12 - shared_data = self._parse_json( self._search_regex( r'window\._sharedData\s*=\s*({.+?});', webpage, 'shared data', default='{}'), video_id, fatal=False) - if shared_data: - media = try_get( - shared_data, - (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'], - lambda x: x['entry_data']['PostPage'][0]['media']), - dict) + media = traverse_obj( + shared_data, + ('entry_data', 'PostPage', 0, 'graphql', 'shortcode_media'), + ('entry_data', 'PostPage', 0, 'media'), + expected_type=dict) + # _sharedData.entry_data.PostPage is empty when authenticated (see # https://github.com/ytdl-org/youtube-dl/pull/22880) if not media: @@ -265,125 +307,71 @@ class InstagramIE(InstagramBaseIE): r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;', webpage, 'additional data', default='{}'), video_id, fatal=False) - if additional_data: - media = try_get( - additional_data, lambda x: x['graphql']['shortcode_media'], - dict) - if media: - video_url = media.get('video_url') - height = int_or_none(self._html_search_meta(('og:video:height', 'video:height'), webpage)) or try_get(media, lambda x: x['dimensions']['height']) - width = int_or_none(self._html_search_meta(('og:video:width', 'video:width'), webpage)) or try_get(media, lambda x: x['dimensions']['width']) - description = try_get( - media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) or media.get('caption') - title = media.get('title') - display_resources = media.get('display_resources') - if not display_resources: - display_resources = [{'src': media.get('display_src')}, {'src': media.get('display_url')}] - duration = float_or_none(media.get('video_duration')) - timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date')) - uploader = try_get(media, lambda x: x['owner']['full_name']) - uploader_id = try_get(media, lambda x: x['owner']['username']) - - def get_count(keys, kind): - for key in variadic(keys): - count = int_or_none(try_get( - media, (lambda x: x['edge_media_%s' % key]['count'], - lambda x: x['%ss' % kind]['count']))) - if count is not None: - return count - - like_count = get_count('preview_like', 'like') - comment_count = get_count( - ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment') - - thumbnails = [{ - 'url': thumbnail['src'], - 'width': thumbnail.get('config_width'), - 'height': thumbnail.get('config_height'), - } for thumbnail in display_resources if thumbnail.get('src')] - - comments = [] - for comment in try_get(media, lambda x: x['edge_media_to_parent_comment']['edges']): - comment_dict = comment.get('node', {}) - comment_text = comment_dict.get('text') - if comment_text: - comments.append({ - 'author': try_get(comment_dict, lambda x: x['owner']['username']), - 'author_id': try_get(comment_dict, lambda x: x['owner']['id']), - 'id': comment_dict.get('id'), - 'text': comment_text, - 'timestamp': int_or_none(comment_dict.get('created_at')), - }) - if not video_url: - edges = try_get( - media, lambda x: x['edge_sidecar_to_children']['edges'], - list) or [] - if edges: - entries = [] - for edge_num, edge in enumerate(edges, start=1): - node = try_get(edge, lambda x: x['node'], dict) - if not node: - continue - node_video_url = url_or_none(node.get('video_url')) - if not node_video_url: - continue - entries.append({ - 'id': node.get('shortcode') or node['id'], - 'title': node.get('title') or 'Video %d' % edge_num, - 'url': node_video_url, - 'thumbnail': node.get('display_url'), - 'duration': float_or_none(node.get('video_duration')), - 'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])), - 'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])), - 'view_count': int_or_none(node.get('video_view_count')), - }) - return self.playlist_result( - entries, video_id, - 'Post by %s' % uploader_id if uploader_id else None, - description) + media = traverse_obj(additional_data, ('graphql', 'shortcode_media'), expected_type=dict) or {} + + uploader_id = traverse_obj(media, ('owner', 'username')) or self._search_regex( + r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', webpage, 'uploader id', fatal=False) + + description = ( + traverse_obj(media, ('edge_media_to_caption', 'edges', 0, 'node', 'text'), expected_type=str) + or media.get('caption')) + if not description: + description = self._search_regex( + r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) + if description is not None: + description = lowercase_escape(description) + video_url = media.get('video_url') if not video_url: + nodes = traverse_obj(media, ('edge_sidecar_to_children', 'edges', ..., 'node'), expected_type=dict) or [] + if nodes: + return self.playlist_result( + self._extract_nodes(nodes, True), video_id, + 'Post by %s' % uploader_id if uploader_id else None, description) + video_url = self._og_search_video_url(webpage, secure=False) formats = [{ 'url': video_url, - 'width': width, - 'height': height, + 'width': self._get_dimension('width', media, webpage), + 'height': self._get_dimension('height', media, webpage), }] - dash = try_get(media, lambda x: x['dash_info']['video_dash_manifest']) + dash = traverse_obj(media, ('dash_info', 'video_dash_manifest')) if dash: formats.extend(self._parse_mpd_formats(self._parse_xml(dash, video_id), mpd_id='dash')) self._sort_formats(formats) - if not uploader_id: - uploader_id = self._search_regex( - r'"owner"\s*:\s*{\s*"username"\s*:\s*"(.+?)"', - webpage, 'uploader id', fatal=False) - - if not description: - description = self._search_regex( - r'"caption"\s*:\s*"(.+?)"', webpage, 'description', default=None) - if description is not None: - description = lowercase_escape(description) - - if not thumbnails: - thumbnails = self._og_search_thumbnail(webpage) + comments = [{ + 'author': traverse_obj(comment_dict, ('node', 'owner', 'username')), + 'author_id': traverse_obj(comment_dict, ('node', 'owner', 'id')), + 'id': traverse_obj(comment_dict, ('node', 'id')), + 'text': traverse_obj(comment_dict, ('node', 'text')), + 'timestamp': traverse_obj(comment_dict, ('node', 'created_at'), expected_type=int_or_none), + } for comment_dict in traverse_obj(media, ('edge_media_to_parent_comment', 'edges'))] + + display_resources = ( + media.get('display_resources') + or [{'src': media.get(key)} for key in ('display_src', 'display_url')] + or [{'src': self._og_search_thumbnail(webpage)}]) + thumbnails = [{ + 'url': thumbnail['src'], + 'width': thumbnail.get('config_width'), + 'height': thumbnail.get('config_height'), + } for thumbnail in display_resources if thumbnail.get('src')] return { 'id': video_id, 'formats': formats, - 'ext': 'mp4', - 'title': title or 'Video by %s' % uploader_id, + 'title': media.get('title') or 'Video by %s' % uploader_id, 'description': description, - 'duration': duration, - 'thumbnails': thumbnails, - 'timestamp': timestamp, + 'duration': float_or_none(media.get('video_duration')), + 'timestamp': traverse_obj(media, 'taken_at_timestamp', 'date', expected_type=int_or_none), 'uploader_id': uploader_id, - 'uploader': uploader, - 'like_count': like_count, - 'comment_count': comment_count, + 'uploader': traverse_obj(media, ('owner', 'full_name')), + 'like_count': self._get_count(media, 'likes', 'preview_like'), + 'comment_count': self._get_count(media, 'comments', 'preview_comment', 'to_comment', 'to_parent_comment'), 'comments': comments, + 'thumbnails': thumbnails, 'http_headers': { 'Referer': 'https://www.instagram.com/', } @@ -402,10 +390,6 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): def _extract_graphql(self, data, url): # Parses GraphQL queries containing videos and generates a playlist. - def get_count(suffix): - return int_or_none(try_get( - node, lambda x: x['edge_media_' + suffix]['count'])) - uploader_id = self._match_id(url) csrf_token = data['config']['csrf_token'] rhx_gis = data.get('rhx_gis') or '3c7ca9dcefcf966d11dacf1f151335e8' @@ -454,55 +438,14 @@ class InstagramPlaylistBaseIE(InstagramBaseIE): continue raise - edges = media.get('edges') - if not edges or not isinstance(edges, list): - break - - for edge in edges: - node = edge.get('node') - if not node or not isinstance(node, dict): - continue - if node.get('__typename') != 'GraphVideo' and node.get('is_video') is not True: - continue - video_id = node.get('shortcode') - if not video_id: - continue - - info = self.url_result( - 'https://instagram.com/p/%s/' % video_id, - ie=InstagramIE.ie_key(), video_id=video_id) - - description = try_get( - node, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'], - compat_str) - thumbnail = node.get('thumbnail_src') or node.get('display_src') - timestamp = int_or_none(node.get('taken_at_timestamp')) - - comment_count = get_count('to_comment') - like_count = get_count('preview_like') - view_count = int_or_none(node.get('video_view_count')) - - info.update({ - 'description': description, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'comment_count': comment_count, - 'like_count': like_count, - 'view_count': view_count, - }) - - yield info - - page_info = media.get('page_info') - if not page_info or not isinstance(page_info, dict): - break - - has_next_page = page_info.get('has_next_page') - if not has_next_page: + nodes = traverse_obj(media, ('edges', ..., 'node'), expected_type=dict) or [] + if not nodes: break + yield from self._extract_nodes(nodes) - cursor = page_info.get('end_cursor') - if not cursor or not isinstance(cursor, compat_str): + has_next_page = traverse_obj(media, ('page_info', 'has_next_page')) + cursor = traverse_obj(media, ('page_info', 'end_cursor'), expected_type=str) + if not has_next_page or not cursor: break def _real_extract(self, url): -- cgit v1.2.3 From 883ecd54949fa90174094628bf002f179edf6767 Mon Sep 17 00:00:00 2001 From: cntrl-s <65956966+cntrl-s@users.noreply.github.com> Date: Sat, 27 Nov 2021 00:05:39 +0530 Subject: Streamff extractor (#1736) Closes #1359 Authored by: cntrl-s --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/streamff.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 yt_dlp/extractor/streamff.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4dda3705a..163efc748 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1404,6 +1404,7 @@ from .streamable import StreamableIE from .streamanity import StreamanityIE from .streamcloud import StreamcloudIE from .streamcz import StreamCZIE +from .streamff import StreamFFIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE diff --git a/yt_dlp/extractor/streamff.py b/yt_dlp/extractor/streamff.py new file mode 100644 index 000000000..6b190bb3b --- /dev/null +++ b/yt_dlp/extractor/streamff.py @@ -0,0 +1,31 @@ +# coding: utf-8 +from .common import InfoExtractor +from ..utils import int_or_none, parse_iso8601 + + +class StreamFFIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?streamff\.com/v/(?P<id>[a-zA-Z0-9]+)' + + _TESTS = [{ + 'url': 'https://streamff.com/v/55cc94', + 'md5': '8745a67bb5e5c570738efe7983826370', + 'info_dict': { + 'id': '55cc94', + 'ext': 'mp4', + 'title': '55cc94', + 'timestamp': 1634764643, + 'upload_date': '20211020', + 'view_count': int, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + json_data = self._download_json(f'https://streamff.com/api/videos/{video_id}', video_id) + return { + 'id': video_id, + 'title': json_data.get('name') or video_id, + 'url': 'https://streamff.com/%s' % json_data['videoLink'], + 'view_count': int_or_none(json_data.get('views')), + 'timestamp': parse_iso8601(json_data.get('date')), + } -- cgit v1.2.3 From 18d6dd4e0194211c4f3238fe441ebe0c1fdbc167 Mon Sep 17 00:00:00 2001 From: Grabien <60237587+Grabien@users.noreply.github.com> Date: Fri, 26 Nov 2021 21:00:04 +0200 Subject: [extractor/breitbart] Breitbart.com website support (#1434) Authored by: Grabien --- yt_dlp/extractor/breitbart.py | 39 +++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 40 insertions(+) create mode 100644 yt_dlp/extractor/breitbart.py diff --git a/yt_dlp/extractor/breitbart.py b/yt_dlp/extractor/breitbart.py new file mode 100644 index 000000000..f50f719dc --- /dev/null +++ b/yt_dlp/extractor/breitbart.py @@ -0,0 +1,39 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class BreitBartIE(InfoExtractor): + _VALID_URL = r'https?:\/\/(?:www\.)breitbart.com/videos/v/(?P<id>[^/]+)' + _TESTS = [{ + 'url': 'https://www.breitbart.com/videos/v/5cOz1yup/?pl=Ij6NDOji', + 'md5': '0aa6d1d6e183ac5ca09207fe49f17ade', + 'info_dict': { + 'id': '5cOz1yup', + 'ext': 'mp4', + 'title': 'Watch \u2013 Clyburn: Statues in Congress Have to Go Because they Are Honoring Slavery', + 'description': 'md5:bac35eb0256d1cb17f517f54c79404d5', + 'thumbnail': 'https://cdn.jwplayer.com/thumbs/5cOz1yup-1920.jpg', + 'age_limit': 0, + } + }, { + 'url': 'https://www.breitbart.com/videos/v/eaiZjVOn/', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + formats = self._extract_m3u8_formats(f'https://cdn.jwplayer.com/manifests/{video_id}.m3u8', video_id, ext='mp4') + self._sort_formats(formats) + return { + 'id': video_id, + 'title': self._og_search_title( + webpage, default=None) or self._html_search_regex( + r'(?s)<title>(.*?)', webpage, 'video title'), + 'description': self._og_search_description(webpage), + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': self._rta_search(webpage), + 'formats': formats + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 163efc748..ed8a23e72 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -179,6 +179,7 @@ from .br import ( ) from .bravotv import BravoTVIE from .breakcom import BreakIE +from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, -- cgit v1.2.3 From cf1f13b817d88eb7d4b449f20cbad3215030e35f Mon Sep 17 00:00:00 2001 From: shirt <2660574+shirt-dev@users.noreply.github.com> Date: Sat, 27 Nov 2021 00:15:59 -0500 Subject: [generic] Support mpd manifests without extension (#1806) Authored by: shirt-dev --- yt_dlp/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 9c7fa4a21..ae0ebb14a 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2601,6 +2601,8 @@ class GenericIE(InfoExtractor): subtitles = {} if format_id.endswith('mpegurl'): formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') + elif format_id.endswith('mpd') or format_id.endswith('dash+xml'): + formats, subtitles = self._extract_mpd_formats_and_subtitles(url, video_id) elif format_id == 'f4m': formats = self._extract_f4m_formats(url, video_id) else: -- cgit v1.2.3 From 3938a9212c3d1aa30a7f6db12b997d94afd8b646 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:01:42 +0530 Subject: [CPTwentyFour] Add extractor (#1769) Closes #1768 Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/ninecninemedia.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ed8a23e72..a277bf722 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -943,7 +943,10 @@ from .niconico import ( NicovideoSearchIE, NicovideoSearchURLIE, ) -from .ninecninemedia import NineCNineMediaIE +from .ninecninemedia import ( + NineCNineMediaIE, + CPTwentyFourIE, +) from .ninegag import NineGagIE from .ninenow import NineNowIE from .nintendo import NintendoIE diff --git a/yt_dlp/extractor/ninecninemedia.py b/yt_dlp/extractor/ninecninemedia.py index 4aaf21a12..781842721 100644 --- a/yt_dlp/extractor/ninecninemedia.py +++ b/yt_dlp/extractor/ninecninemedia.py @@ -1,7 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals - from .common import InfoExtractor from ..utils import ( float_or_none, @@ -99,3 +98,37 @@ class NineCNineMediaIE(InfoExtractor): } return info + + +class CPTwentyFourIE(InfoExtractor): + IE_NAME = 'cp24' + _GEO_COUNTRIES = ['CA'] + _VALID_URL = r'https?://(?:www\.)?cp24\.com/news/(?P[^?#]+)' + + _TESTS = [{ + 'url': 'https://www.cp24.com/news/video-shows-atm-being-ripped-out-of-business-by-pickup-truck-driver-in-mississauga-1.5676877', + 'info_dict': { + 'id': '2328005', + 'ext': 'mp4', + 'title': 'WATCH: Truck rips ATM from Mississauga business', + 'description': 'md5:cf7498480885f080a754389a2b2f7073', + 'timestamp': 1637618377, + 'episode_number': None, + 'season': 'Season 0', + 'season_number': 0, + 'season_id': 57974, + 'series': 'CTV News Toronto', + 'duration': 26.86, + 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg', + 'upload_date': '20211122', + }, + 'params': {'skip_download': True, 'format': 'bv'} + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + id, destination = self._search_regex( + r'getAuthStates\("(?P[^"]+)",\s?"(?P[^"]+)"\);', + webpage, 'video id and destination', group=('id', 'destination')) + return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id) -- cgit v1.2.3 From 359df0fc423b4a5d5af8113d42648fdea22e81ea Mon Sep 17 00:00:00 2001 From: Henrik Heimbuerger Date: Sat, 27 Nov 2021 07:51:32 +0100 Subject: [nebula] Add NebulaCollectionIE and rewrite extractor (#1694) Closes #1690 Authored by: hheimbuerger --- yt_dlp/extractor/extractors.py | 5 +- yt_dlp/extractor/nebula.py | 370 +++++++++++++++++++++++------------------ 2 files changed, 215 insertions(+), 160 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a277bf722..2fb9515c0 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -889,7 +889,10 @@ from .ndr import ( NJoyEmbedIE, ) from .ndtv import NDTVIE -from .nebula import NebulaIE +from .nebula import ( + NebulaIE, + NebulaCollectionIE, +) from .nerdcubed import NerdCubedFeedIE from .netzkino import NetzkinoIE from .neteasemusic import ( diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 9698a358e..d235805c3 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -1,22 +1,163 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import json import time +import urllib -from urllib.error import HTTPError -from .common import InfoExtractor -from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote from ..utils import ( ExtractorError, parse_iso8601, try_get, - urljoin, ) +from .common import InfoExtractor + + +class NebulaBaseIE(InfoExtractor): + _NETRC_MACHINE = 'watchnebula' + + _nebula_api_token = None + _nebula_bearer_token = None + _zype_access_token = None + + def _perform_nebula_auth(self): + username, password = self._get_login_info() + if not (username and password): + self.raise_login_required() + + data = json.dumps({'email': username, 'password': password}).encode('utf8') + response = self._download_json( + 'https://api.watchnebula.com/api/v1/auth/login/', + data=data, fatal=False, video_id=None, + headers={ + 'content-type': 'application/json', + # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint + 'cookie': '' + }, + note='Logging in to Nebula with supplied credentials', + errnote='Authentication failed or rejected') + if not response or not response.get('key'): + self.raise_login_required() + + # save nebula token as cookie + self._set_cookie( + 'nebula.app', 'nebula-auth', + urllib.parse.quote( + json.dumps({ + "apiToken": response["key"], + "isLoggingIn": False, + "isLoggingOut": False, + }, separators=(",", ":"))), + expire_time=int(time.time()) + 86400 * 365, + ) + + return response['key'] + + def _retrieve_nebula_api_token(self): + """ + Check cookie jar for valid token. Try to authenticate using credentials if no valid token + can be found in the cookie jar. + """ + nebula_cookies = self._get_cookies('https://nebula.app') + nebula_cookie = nebula_cookies.get('nebula-auth') + if nebula_cookie: + self.to_screen('Authenticating to Nebula with token from cookie jar') + nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value) + nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken') + if nebula_api_token: + return nebula_api_token + + return self._perform_nebula_auth() + def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): + assert method in ('GET', 'POST',) + assert auth_type in ('api', 'bearer',) -class NebulaIE(InfoExtractor): + def inner_call(): + authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' + return self._download_json( + url, video_id, note=note, headers={'Authorization': authorization}, + data=b'' if method == 'POST' else None) + + try: + return inner_call() + except ExtractorError as exc: + # if 401 or 403, attempt credential re-auth and retry + if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): + self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') + self._login() + return inner_call() + else: + raise + + def _fetch_nebula_bearer_token(self): + """ + Get a Bearer token for the Nebula API. This will be required to fetch video meta data. + """ + response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', + method='POST', + note='Authorizing to Nebula') + return response['token'] + def _fetch_zype_access_token(self): + """ + Get a Zype access token, which is required to access video streams -- in our case: to + generate video URLs. + """ + user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token') + + access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str) + if not access_token: + if try_get(user_object, lambda x: x['is_subscribed'], bool): + # TODO: Reimplement the same Zype token polling the Nebula frontend implements + # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 + raise ExtractorError( + 'Unable to extract Zype access token from Nebula API authentication endpoint. ' + 'Open an arbitrary video in a browser with this account to generate a token', + expected=True) + raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') + return access_token + + def _build_video_info(self, episode): + zype_id = episode['zype_id'] + zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}' + channel_slug = episode['channel_slug'] + return { + 'id': episode['zype_id'], + 'display_id': episode['slug'], + '_type': 'url_transparent', + 'ie_key': 'Zype', + 'url': zype_video_url, + 'title': episode['title'], + 'description': episode['description'], + 'timestamp': parse_iso8601(episode['published_at']), + 'thumbnails': [{ + # 'id': tn.get('name'), # this appears to be null + 'url': tn['original'], + 'height': key, + } for key, tn in episode['assets']['thumbnail'].items()], + 'duration': episode['duration'], + 'channel': episode['channel_title'], + 'channel_id': channel_slug, + 'channel_url': f'https://nebula.app/{channel_slug}', + 'uploader': episode['channel_title'], + 'uploader_id': channel_slug, + 'uploader_url': f'https://nebula.app/{channel_slug}', + 'series': episode['channel_title'], + 'creator': episode['channel_title'], + } + + def _login(self): + self._nebula_api_token = self._retrieve_nebula_api_token() + self._nebula_bearer_token = self._fetch_nebula_bearer_token() + self._zype_access_token = self._fetch_zype_access_token() + + def _real_initialize(self): + self._login() + + +class NebulaIE(NebulaBaseIE): _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P[-\w]+)' _TESTS = [ { @@ -30,12 +171,13 @@ class NebulaIE(InfoExtractor): 'upload_date': '20180731', 'timestamp': 1533009600, 'channel': 'Lindsay Ellis', + 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', + 'uploader_id': 'lindsayellis', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', @@ -47,13 +189,14 @@ class NebulaIE(InfoExtractor): 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', 'upload_date': '20200327', 'timestamp': 1585348140, - 'channel': 'The Logistics of D-Day', - 'uploader': 'The Logistics of D-Day', + 'channel': 'Real Engineering', + 'channel_id': 'realengineering', + 'uploader': 'Real Engineering', + 'uploader_id': 'realengineering', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://nebula.app/videos/money-episode-1-the-draw', @@ -66,173 +209,82 @@ class NebulaIE(InfoExtractor): 'upload_date': '20200323', 'timestamp': 1584980400, 'channel': 'Tom Scott Presents: Money', + 'channel_id': 'tom-scott-presents-money', 'uploader': 'Tom Scott Presents: Money', + 'uploader_id': 'tom-scott-presents-money', }, 'params': { 'usenetrc': True, }, - 'skip': 'All Nebula content requires authentication', }, { 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', 'only_matching': True, }, ] - _NETRC_MACHINE = 'watchnebula' - _nebula_token = None + def _fetch_video_metadata(self, slug): + return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', + video_id=slug, + auth_type='bearer', + note='Fetching video meta data') - def _retrieve_nebula_auth(self): - """ - Log in to Nebula, and returns a Nebula API token - """ + def _real_extract(self, url): + slug = self._match_id(url) + video = self._fetch_video_metadata(slug) + return self._build_video_info(video) - username, password = self._get_login_info() - if not (username and password): - self.raise_login_required() - self.report_login() - data = json.dumps({'email': username, 'password': password}).encode('utf8') - response = self._download_json( - 'https://api.watchnebula.com/api/v1/auth/login/', - data=data, fatal=False, video_id=None, - headers={ - 'content-type': 'application/json', - # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint - 'cookie': '' +class NebulaCollectionIE(NebulaBaseIE): + IE_NAME = 'nebula:collection' + _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P[-\w]+)' + _TESTS = [ + { + 'url': 'https://nebula.app/tom-scott-presents-money', + 'info_dict': { + 'id': 'tom-scott-presents-money', + 'title': 'Tom Scott Presents: Money', + 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', }, - note='Authenticating to Nebula with supplied credentials', - errnote='Authentication failed or rejected') - if not response or not response.get('key'): - self.raise_login_required() - - # save nebula token as cookie - self._set_cookie( - 'nebula.app', 'nebula-auth', - compat_urllib_parse_quote( - json.dumps({ - "apiToken": response["key"], - "isLoggingIn": False, - "isLoggingOut": False, - }, separators=(",", ":"))), - expire_time=int(time.time()) + 86400 * 365, - ) - - return response['key'] - - def _retrieve_zype_api_key(self, page_url, display_id): - """ - Retrieves the Zype API key - """ - - # Find the js that has the API key from the webpage and download it - webpage = self._download_webpage(page_url, video_id=display_id) - main_script_relpath = self._search_regex( - r']*src="(?P[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage, - group='script_relpath', name='script relative path', fatal=True) - main_script_abspath = urljoin(page_url, main_script_relpath) - main_script = self._download_webpage(main_script_abspath, video_id=display_id, - note='Retrieving Zype API key') - - api_key = self._search_regex( - r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P[\w-]*)"', main_script, - group='api_key', name='API key', fatal=True) - - return api_key - - def _call_zype_api(self, path, params, video_id, api_key, note): - """ - A helper for making calls to the Zype API. - """ - query = {'api_key': api_key, 'per_page': 1} - query.update(params) - return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note) - - def _call_nebula_api(self, path, video_id, access_token, note): - """ - A helper for making calls to the Nebula API. - """ - return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={ - 'Authorization': 'Token {access_token}'.format(access_token=access_token) - }, note=note) - - def _fetch_zype_access_token(self, video_id): - try: - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - except ExtractorError as exc: - # if 401, attempt credential auth and retry - if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401: - self._nebula_token = self._retrieve_nebula_auth() - user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token') - else: - raise - - access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str) - if not access_token: - if try_get(user_object, lambda x: x['is_subscribed'], bool): - # TODO: Reimplement the same Zype token polling the Nebula frontend implements - # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 - raise ExtractorError( - 'Unable to extract Zype access token from Nebula API authentication endpoint. ' - 'Open an arbitrary video in a browser with this account to generate a token', - expected=True) - raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') - return access_token - - def _extract_channel_title(self, video_meta): - # TODO: Implement the API calls giving us the channel list, - # so that we can do the title lookup and then figure out the channel URL - categories = video_meta.get('categories', []) if video_meta else [] - # the channel name is the value of the first category - for category in categories: - if category.get('value'): - return category['value'][0] - - def _real_initialize(self): - # check cookie jar for valid token - nebula_cookies = self._get_cookies('https://nebula.app') - nebula_cookie = nebula_cookies.get('nebula-auth') - if nebula_cookie: - self.to_screen('Authenticating to Nebula with token from cookie jar') - nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value) - self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken') + 'playlist_count': 5, + 'params': { + 'usenetrc': True, + }, + }, { + 'url': 'https://nebula.app/lindsayellis', + 'info_dict': { + 'id': 'lindsayellis', + 'title': 'Lindsay Ellis', + 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', + }, + 'playlist_mincount': 100, + 'params': { + 'usenetrc': True, + }, + }, + ] - # try to authenticate using credentials if no valid token has been found - if not self._nebula_token: - self._nebula_token = self._retrieve_nebula_auth() + def _generate_playlist_entries(self, collection_id, channel): + episodes = channel['episodes']['results'] + for page_num in itertools.count(2): + for episode in episodes: + yield self._build_video_info(episode) + next_url = channel['episodes']['next'] + if not next_url: + break + channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', + note=f'Retrieving channel page {page_num}') + episodes = channel['episodes']['results'] def _real_extract(self, url): - display_id = self._match_id(url) - api_key = self._retrieve_zype_api_key(url, display_id) - - response = self._call_zype_api('/videos', {'friendly_title': display_id}, - display_id, api_key, note='Retrieving metadata from Zype') - if len(response.get('response') or []) != 1: - raise ExtractorError('Unable to find video on Zype API') - video_meta = response['response'][0] - - video_id = video_meta['_id'] - zype_access_token = self._fetch_zype_access_token(display_id) + collection_id = self._match_id(url) + channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' + channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') + channel_details = channel['details'] - channel_title = self._extract_channel_title(video_meta) - - return { - 'id': video_id, - 'display_id': display_id, - '_type': 'url_transparent', - 'ie_key': 'Zype', - 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token), - 'title': video_meta.get('title'), - 'description': video_meta.get('description'), - 'timestamp': parse_iso8601(video_meta.get('published_at')), - 'thumbnails': [{ - 'id': tn.get('name'), # this appears to be null - 'url': tn['url'], - 'width': tn.get('width'), - 'height': tn.get('height'), - } for tn in video_meta.get('thumbnails', [])], - 'duration': video_meta.get('duration'), - 'channel': channel_title, - 'uploader': channel_title, # we chose uploader = channel name - # TODO: uploader_url, channel_id, channel_url - } + return self.playlist_result( + entries=self._generate_playlist_entries(collection_id, channel), + playlist_id=collection_id, + playlist_title=channel_details['title'], + playlist_description=channel_details['description'] + ) -- cgit v1.2.3 From 2abf0815542dd44724b577752fb9339e76816057 Mon Sep 17 00:00:00 2001 From: Yakabuff Date: Sat, 27 Nov 2021 02:04:51 -0500 Subject: [xvideos] Fix extractor (#1799) Closes #1788 Authored by: Yakabuff --- yt_dlp/extractor/xvideos.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/yt_dlp/extractor/xvideos.py b/yt_dlp/extractor/xvideos.py index ef45eb929..ab07f01af 100644 --- a/yt_dlp/extractor/xvideos.py +++ b/yt_dlp/extractor/xvideos.py @@ -83,9 +83,7 @@ class XVideosIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - - webpage = self._download_webpage( - 'https://www.xvideos.com/video%s/' % video_id, video_id) + webpage = self._download_webpage(url, video_id) mobj = re.search(r'

(.+?)

', webpage) if mobj: -- cgit v1.2.3 From 4e4ba1d75f250240725c0012edbd88cc0a7ead4b Mon Sep 17 00:00:00 2001 From: chio0hai <94094996+chio0hai@users.noreply.github.com> Date: Sat, 27 Nov 2021 02:10:29 -0500 Subject: [redgifs] Add extractor (#1631) Closes #1504 Authored by: chio0hai --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/redgifs.py | 94 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 yt_dlp/extractor/redgifs.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2fb9515c0..dd9edff0e 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1216,6 +1216,7 @@ from .redbulltv import ( RedBullIE, ) from .reddit import RedditIE +from .redgifs import RedGifsIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py new file mode 100644 index 000000000..1257d1344 --- /dev/null +++ b/yt_dlp/extractor/redgifs.py @@ -0,0 +1,94 @@ +# coding: utf-8 + +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + int_or_none, + qualities, + try_get, +) + + +class RedGifsIE(InfoExtractor): + _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P[^-/?#\.]+)' + _FORMATS = { + 'gif': 250, + 'sd': 480, + 'hd': None, + } + _TESTS = [{ + 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }, { + 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', + 'info_dict': { + 'id': 'squeakyhelplesswisent', + 'ext': 'mp4', + 'title': 'Hotwife Legs Thick', + 'timestamp': 1636287915, + 'upload_date': '20211107', + 'uploader': 'ignored52', + 'duration': 16, + 'view_count': int, + 'like_count': int, + 'categories': list, + 'age_limit': 18, + } + }] + + def _real_extract(self, url): + video_id = self._match_id(url).lower() + + video_info = self._download_json( + 'https://api.redgifs.com/v2/gifs/%s' % video_id, + video_id, 'Downloading video info') + if 'error' in video_info: + raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True) + + gif = video_info['gif'] + urls = gif['urls'] + + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif.get('height')) + aspect_ratio = try_get(gif, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = urls.get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': ' '.join(gif.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif.get('createDate')), + 'uploader': gif.get('userName'), + 'duration': int_or_none(gif.get('duration')), + 'view_count': int_or_none(gif.get('views')), + 'like_count': int_or_none(gif.get('likes')), + 'categories': gif.get('tags') or [], + 'age_limit': 18, + 'formats': formats, + } -- cgit v1.2.3 From 896a88c5c61a5431222a9b3a75c2c9c5129b1bbe Mon Sep 17 00:00:00 2001 From: gustaf <86112802+18928172992817182@users.noreply.github.com> Date: Sat, 27 Nov 2021 08:24:48 +0100 Subject: [Tvplayhome] Fix extractor (#1357) Authored by: pukkandan, 18928172992817182 (gustaf) --- yt_dlp/extractor/tvplay.py | 113 ++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 47 deletions(-) diff --git a/yt_dlp/extractor/tvplay.py b/yt_dlp/extractor/tvplay.py index 9771d9108..b5dbc5526 100644 --- a/yt_dlp/extractor/tvplay.py +++ b/yt_dlp/extractor/tvplay.py @@ -12,9 +12,9 @@ from ..utils import ( determine_ext, ExtractorError, int_or_none, - parse_duration, parse_iso8601, qualities, + traverse_obj, try_get, update_url_query, url_or_none, @@ -431,77 +431,96 @@ class ViafreeIE(InfoExtractor): class TVPlayHomeIE(InfoExtractor): - _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:tv3?)? + play\.(?:tv3|skaties)\.(?Plv|lt|ee)/ + (?Plives/)? + [^?#&]+(?:episode|programme|clip)-(?P\d+) + ''' _TESTS = [{ - 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/', + 'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828', 'info_dict': { - 'id': '366367', + 'id': '2343828', 'ext': 'mp4', - 'title': 'Aferistai', - 'description': 'Aferistai. Kalėdinė pasaka.', - 'series': 'Aferistai [N-7]', - 'season': '1 sezonas', + 'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8', + 'description': 'md5:f6fcfbb236429f05531131640dfa7c81', + 'duration': 2710, + 'season': 'Gaujų karai. Karveliai', 'season_number': 1, - 'duration': 464, - 'timestamp': 1394209658, - 'upload_date': '20140307', - 'age_limit': 18, + 'release_year': 2021, + 'episode': 'Serija 8', + 'episode_number': 8, }, 'params': { - 'skip_download': True, + 'skip_download': 'm3u8', }, }, { - 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/', - 'only_matching': True, + 'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937', + 'info_dict': { + 'id': '3284937', + 'ext': 'mp4', + 'season': 'Moterys meluoja geriau [N-7]', + 'season_number': 14, + 'release_year': 2021, + 'episode': 'Serija 25', + 'episode_number': 25, + 'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25', + 'description': 'md5:c6926e9710f1a126f028fbe121eddb79', + 'duration': 2440, + }, + 'skip': '404' }, { - 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/', + 'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014', 'only_matching': True, }, { - 'url': 'https://play.tv3.lt/aferistai-10047125', + 'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762', 'only_matching': True, }, { - 'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317', + 'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474', 'only_matching': True, }, { - 'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354', + 'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509', 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) + country, is_live, video_id = self._match_valid_url(url).groups() - asset = self._download_json( - urljoin(url, '/sb/public/asset/' + video_id), video_id) + api_path = 'lives/programmes' if is_live else 'vods' + data = self._download_json( + urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'), + video_id) - m3u8_url = asset['movie']['contentUrl'] - video_id = asset['assetId'] - asset_title = asset['title'] - title = asset_title['title'] - - formats = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') + video_type = 'CATCHUP' if is_live else 'MOVIE' + stream_id = data['programRecordingId'] if is_live else video_id + stream = self._download_json( + urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id) + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) - thumbnails = None - image_url = asset.get('imageUrl') - if image_url: - thumbnails = [{ - 'url': urljoin(url, image_url), - 'ext': 'jpg', - }] - - metadata = asset.get('metadata') or {} + thumbnails = set(traverse_obj( + data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none)) return { 'id': video_id, - 'title': title, - 'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'), - 'thumbnails': thumbnails, - 'duration': parse_duration(asset_title.get('runTime')), - 'series': asset.get('tvSeriesTitle'), - 'season': asset.get('tvSeasonTitle'), - 'season_number': int_or_none(metadata.get('seasonNumber')), - 'episode': asset_title.get('titleBrief'), - 'episode_number': int_or_none(metadata.get('episodeNumber')), + 'title': self._resolve_title(data), + 'description': traverse_obj(data, 'description', 'lead'), + 'duration': int_or_none(data.get('duration')), + 'season': traverse_obj(data, ('season', 'serial', 'title')), + 'season_number': int_or_none(traverse_obj(data, ('season', 'number'))), + 'episode': data.get('title'), + 'episode_number': int_or_none(data.get('episode')), + 'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))), + 'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails], 'formats': formats, + 'subtitles': subtitles, } + + @staticmethod + def _resolve_title(data): + return try_get(data, lambda x: ( + f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | ' + f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}' + )) or data.get('title') -- cgit v1.2.3 From 639f80c1f9feca69509ede153c28f8651213f7fc Mon Sep 17 00:00:00 2001 From: mpeter50 <83356418+mpeter50@users.noreply.github.com> Date: Sat, 27 Nov 2021 09:00:58 +0100 Subject: [Twitch:vod] Add chapters (#1515) Authored by: mpeter50 --- yt_dlp/extractor/twitch.py | 71 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index be70beed4..c5b16f2b0 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -24,6 +24,8 @@ from ..utils import ( parse_iso8601, parse_qs, qualities, + str_or_none, + traverse_obj, try_get, unified_timestamp, update_url_query, @@ -52,6 +54,7 @@ class TwitchBaseIE(InfoExtractor): 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11', 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c', 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687', + 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41', } def _real_initialize(self): @@ -249,6 +252,38 @@ class TwitchVodIE(TwitchBaseIE): }, { 'url': 'https://player.twitch.tv/?video=480452374', 'only_matching': True, + }, { + 'url': 'https://www.twitch.tv/videos/635475444', + 'info_dict': { + 'id': 'v635475444', + 'ext': 'mp4', + 'title': 'Riot Games', + 'duration': 11643, + 'uploader': 'Riot Games', + 'uploader_id': 'riotgames', + 'timestamp': 1590770569, + 'upload_date': '20200529', + 'chapters': [ + { + 'start_time': 0, + 'end_time': 573, + 'title': 'League of Legends' + }, + { + 'start_time': 573, + 'end_time': 3922, + 'title': 'Legends of Runeterra' + }, + { + 'start_time': 3922, + 'end_time': 11643, + 'title': 'Art' + } + ], + }, + 'params': { + 'skip_download': True + } }] def _download_info(self, item_id): @@ -259,16 +294,24 @@ class TwitchVodIE(TwitchBaseIE): 'channelLogin': '', 'videoID': item_id, }, + }, { + 'operationName': 'VideoPlayer_ChapterSelectButtonVideo', + 'variables': { + 'includePrivate': False, + 'videoID': item_id, + }, }], - 'Downloading stream metadata GraphQL')[0]['data'] - video = data.get('video') + 'Downloading stream metadata GraphQL') + + video = traverse_obj(data, (0, 'data', 'video')) + video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node')) + if video is None: raise ExtractorError( 'Video %s does not exist' % item_id, expected=True) return self._extract_info_gql(video, item_id) - @staticmethod - def _extract_info(info): + def _extract_info(self, info): status = info.get('status') if status == 'recording': is_live = True @@ -304,8 +347,22 @@ class TwitchVodIE(TwitchBaseIE): 'is_live': is_live, } - @staticmethod - def _extract_info_gql(info, item_id): + def _extract_moments(self, info, item_id): + for moment in info.get('moments') or []: + start_time = int_or_none(moment.get('positionMilliseconds'), 1000) + duration = int_or_none(moment.get('durationMilliseconds'), 1000) + name = str_or_none(moment.get('description')) + + if start_time is None or duration is None: + self.report_warning(f'Important chapter information missing for chapter {name}', item_id) + continue + yield { + 'start_time': start_time, + 'end_time': start_time + duration, + 'title': name, + } + + def _extract_info_gql(self, info, item_id): vod_id = info.get('id') or item_id # id backward compatibility for download archives if vod_id[0] != 'v': @@ -314,6 +371,7 @@ class TwitchVodIE(TwitchBaseIE): if thumbnail: for p in ('width', 'height'): thumbnail = thumbnail.replace('{%s}' % p, '0') + return { 'id': vod_id, 'title': info.get('title') or 'Untitled Broadcast', @@ -324,6 +382,7 @@ class TwitchVodIE(TwitchBaseIE): 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str), 'timestamp': unified_timestamp(info.get('publishedAt')), 'view_count': int_or_none(info.get('viewCount')), + 'chapters': list(self._extract_moments(info, item_id)), } def _real_extract(self, url): -- cgit v1.2.3 From dfd78699f59d66fe7cd109c2534240ea0254426c Mon Sep 17 00:00:00 2001 From: u-spec-png <54671367+u-spec-png@users.noreply.github.com> Date: Sat, 27 Nov 2021 08:12:56 +0000 Subject: [Aljazeera] Fix extractor (#1577) Closes #1518 Authored by: u-spec-png --- yt_dlp/extractor/aljazeera.py | 87 +++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 28 deletions(-) diff --git a/yt_dlp/extractor/aljazeera.py b/yt_dlp/extractor/aljazeera.py index e829b45e4..7bcdb7afb 100644 --- a/yt_dlp/extractor/aljazeera.py +++ b/yt_dlp/extractor/aljazeera.py @@ -1,55 +1,86 @@ +# coding: utf-8 from __future__ import unicode_literals import json from .common import InfoExtractor +from ..utils import ( + try_get, +) class AlJazeeraIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?aljazeera\.com/(?Pprogram/[^/]+|(?:feature|video)s)/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?&#]+)' + _VALID_URL = r'https?://(?P\w+\.aljazeera\.\w+)/(?Pprograms?/[^/]+|(?:feature|video|new)s)?/\d{4}/\d{1,2}/\d{1,2}/(?P[^/?&#]+)' _TESTS = [{ - 'url': 'https://www.aljazeera.com/program/episode/2014/9/19/deliverance', + 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/pojedini-domovi-u-sarajevu-jos-pod-vodom-mjestanima-se-dostavlja-hrana', 'info_dict': { - 'id': '3792260579001', + 'id': '6280641530001', 'ext': 'mp4', - 'title': 'The Slum - Episode 1: Deliverance', - 'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.', - 'uploader_id': '665003303001', - 'timestamp': 1411116829, - 'upload_date': '20140919', - }, - 'add_ie': ['BrightcoveNew'], - 'skip': 'Not accessible from Travis CI server', - }, { - 'url': 'https://www.aljazeera.com/videos/2017/5/11/sierra-leone-709-carat-diamond-to-be-auctioned-off', - 'only_matching': True, + 'title': 'Pojedini domovi u Sarajevu još pod vodom, mještanima se dostavlja hrana', + 'timestamp': 1636219149, + 'description': 'U sarajevskim naseljima Rajlovac i Reljevo stambeni objekti, ali i industrijska postrojenja i dalje su pod vodom.', + 'upload_date': '20211106', + } }, { - 'url': 'https://www.aljazeera.com/features/2017/8/21/transforming-pakistans-buses-into-art', - 'only_matching': True, + 'url': 'https://balkans.aljazeera.net/videos/2021/11/6/djokovic-usao-u-finale-mastersa-u-parizu', + 'info_dict': { + 'id': '6280654936001', + 'ext': 'mp4', + 'title': 'Đoković ušao u finale Mastersa u Parizu', + 'timestamp': 1636221686, + 'description': 'Novak Đoković je u polufinalu Mastersa u Parizu nakon preokreta pobijedio Poljaka Huberta Hurkacza.', + 'upload_date': '20211106', + }, }] - BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s' + BRIGHTCOVE_URL_RE = r'https?://players.brightcove.net/(?P\d+)/(?P[a-zA-Z0-9]+)_(?P[^/]+)/index.html\?videoId=(?P\d+)' def _real_extract(self, url): - post_type, name = self._match_valid_url(url).groups() + base, post_type, id = self._match_valid_url(url).groups() + wp = { + 'balkans.aljazeera.net': 'ajb', + 'chinese.aljazeera.net': 'chinese', + 'mubasher.aljazeera.net': 'ajm', + }.get(base) or 'aje' post_type = { 'features': 'post', 'program': 'episode', + 'programs': 'episode', 'videos': 'video', + 'news': 'news', }[post_type.split('/')[0]] video = self._download_json( - 'https://www.aljazeera.com/graphql', name, query={ + f'https://{base}/graphql', id, query={ + 'wp-site': wp, 'operationName': 'ArchipelagoSingleArticleQuery', 'variables': json.dumps({ - 'name': name, + 'name': id, 'postType': post_type, }), }, headers={ - 'wp-site': 'aje', - })['data']['article']['video'] - video_id = video['id'] - account_id = video.get('accountId') or '665003303001' - player_id = video.get('playerId') or 'BkeSH5BDb' - return self.url_result( - self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id), - 'BrightcoveNew', video_id) + 'wp-site': wp, + }) + video = try_get(video, lambda x: x['data']['article']['video']) or {} + video_id = video.get('id') + account = video.get('accountId') or '911432371001' + player_id = video.get('playerId') or 'csvTfAlKW' + embed = 'default' + + if video_id is None: + webpage = self._download_webpage(url, id) + + account, player_id, embed, video_id = self._search_regex(self.BRIGHTCOVE_URL_RE, webpage, 'video id', + group=(1, 2, 3, 4), default=(None, None, None, None)) + + if video_id is None: + return { + '_type': 'url_transparent', + 'url': url, + 'ie_key': 'Generic' + } + + return { + '_type': 'url_transparent', + 'url': f'https://players.brightcove.net/{account}/{player_id}_{embed}/index.html?videoId={video_id}', + 'ie_key': 'BrightcoveNew' + } -- cgit v1.2.3 From 909b0d66f47c4fb73ee320f512f0c12502f16294 Mon Sep 17 00:00:00 2001 From: Grabien <60237587+Grabien@users.noreply.github.com> Date: Sat, 27 Nov 2021 12:37:45 +0200 Subject: [Senate.gov] Add SenateGovIE and fix SenateISVPIE (#1435) Authored by: Grabien, pukkandan --- yt_dlp/extractor/cspan.py | 2 +- yt_dlp/extractor/extractors.py | 2 +- yt_dlp/extractor/generic.py | 2 +- yt_dlp/extractor/senategov.py | 213 +++++++++++++++++++++++++++++++++++++++++ yt_dlp/extractor/senateisvp.py | 153 ----------------------------- 5 files changed, 216 insertions(+), 156 deletions(-) create mode 100644 yt_dlp/extractor/senategov.py delete mode 100644 yt_dlp/extractor/senateisvp.py diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index 2e01aff48..c717aec3a 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -18,7 +18,7 @@ from ..utils import ( str_to_int, unescapeHTML, ) -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE from .ustream import UstreamIE diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index dd9edff0e..a4baad2da 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1285,7 +1285,7 @@ from .scte import ( SCTECourseIE, ) from .seeker import SeekerIE -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE, SenateGovIE from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index ae0ebb14a..51557f0f1 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -56,7 +56,7 @@ from .sportbox import SportBoxIE from .myvi import MyviIE from .condenast import CondeNastIE from .udn import UDNEmbedIE -from .senateisvp import SenateISVPIE +from .senategov import SenateISVPIE from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE diff --git a/yt_dlp/extractor/senategov.py b/yt_dlp/extractor/senategov.py new file mode 100644 index 000000000..6f4240422 --- /dev/null +++ b/yt_dlp/extractor/senategov.py @@ -0,0 +1,213 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import ( + compat_parse_qs, + compat_urlparse, +) +from ..utils import ( + ExtractorError, + parse_qs, + unsmuggle_url, +) + +_COMMITTEES = { + 'ag': ('76440', 'http://ag-f.akamaihd.net'), + 'aging': ('76442', 'http://aging-f.akamaihd.net'), + 'approps': ('76441', 'http://approps-f.akamaihd.net'), + 'arch': ('', 'http://ussenate-f.akamaihd.net'), + 'armed': ('76445', 'http://armed-f.akamaihd.net'), + 'banking': ('76446', 'http://banking-f.akamaihd.net'), + 'budget': ('76447', 'http://budget-f.akamaihd.net'), + 'cecc': ('76486', 'http://srs-f.akamaihd.net'), + 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), + 'csce': ('75229', 'http://srs-f.akamaihd.net'), + 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), + 'energy': ('76448', 'http://energy-f.akamaihd.net'), + 'epw': ('76478', 'http://epw-f.akamaihd.net'), + 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), + 'finance': ('76450', 'http://finance-f.akamaihd.net'), + 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), + 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), + 'help': ('76452', 'http://help-f.akamaihd.net'), + 'indian': ('76455', 'http://indian-f.akamaihd.net'), + 'intel': ('76456', 'http://intel-f.akamaihd.net'), + 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), + 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), + 'jec': ('76458', 'http://jec-f.akamaihd.net'), + 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), + 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), + 'rules': ('76460', 'http://rules-f.akamaihd.net'), + 'saa': ('76489', 'http://srs-f.akamaihd.net'), + 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), + 'srs': ('75229', 'http://srs-f.akamaihd.net'), + 'uscc': ('76487', 'http://srs-f.akamaihd.net'), + 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), +} + + +class SenateISVPIE(InfoExtractor): + _IE_NAME = 'senate.gov:isvp' + _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' + + _TESTS = [{ + 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', + 'info_dict': { + 'id': 'judiciary031715', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', + 'info_dict': { + 'id': 'commerce011514', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', + # checksum differs each time + 'info_dict': { + 'id': 'intel090613', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player' + } + }, { + # From http://www.c-span.org/video/?96791-1 + 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', + 'only_matching': True, + }] + + @staticmethod + def _search_iframe_url(webpage): + mobj = re.search( + r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]", + webpage) + if mobj: + return mobj.group('url') + + def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + + qs = compat_parse_qs(self._match_valid_url(url).group('qs')) + if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): + raise ExtractorError('Invalid URL', expected=True) + + video_id = re.sub(r'.mp4$', '', qs['filename'][0]) + + webpage = self._download_webpage(url, video_id) + + if smuggled_data.get('force_title'): + title = smuggled_data['force_title'] + else: + title = self._html_search_regex(r'([^<]+)', webpage, video_id) + poster = qs.get('poster') + thumbnail = poster[0] if poster else None + + video_type = qs['type'][0] + committee = video_type if video_type == 'arch' else qs['comm'][0] + + stream_num, domain = _COMMITTEES[committee] + + formats = [] + if video_type == 'arch': + filename = video_id if '.' in video_id else video_id + '.mp4' + m3u8_url = compat_urlparse.urljoin(domain, 'i/' + filename + '/master.m3u8') + formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') + else: + hdcore_sign = 'hdcore=3.1.0' + url_params = (domain, video_id, stream_num) + f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params + m3u8_url = '%s/i/%s_1@%s/master.m3u8' % url_params + for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): + # URLs without the extra param induce an 404 error + entry.update({'extra_param_to_segment_url': hdcore_sign}) + formats.append(entry) + for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): + mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) + if mobj: + entry['format_id'] += mobj.group('tag') + formats.append(entry) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'formats': formats, + 'thumbnail': thumbnail, + } + + +class SenateGovIE(InfoExtractor): + _IE_NAME = 'senate.gov' + _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' + _TESTS = [{ + 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'info_dict': { + 'id': 'help090920', + 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', + 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', + 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'info_dict': { + 'id': 'appropsA051518', + 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', + 'title': 'Review of the FY2019 Budget Request for the U.S. Army', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'info_dict': { + 'id': 'banking041521', + 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', + 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', + 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', + 'ext': 'mp4', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_extract(self, url): + display_id = self._generic_id(url) + webpage = self._download_webpage(url, display_id) + parse_info = parse_qs(self._search_regex( + r'