diff options
-rw-r--r-- | test/test_youtube_signature.py | 4 | ||||
-rw-r--r-- | yt_dlp/YoutubeDL.py | 27 | ||||
-rw-r--r-- | yt_dlp/downloader/external.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/abc.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/bandcamp.py | 68 | ||||
-rw-r--r-- | yt_dlp/extractor/bbc.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/biqle.py | 93 | ||||
-rw-r--r-- | yt_dlp/extractor/common.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/cspan.py | 50 | ||||
-rw-r--r-- | yt_dlp/extractor/dropbox.py | 4 | ||||
-rw-r--r-- | yt_dlp/extractor/extractors.py | 5 | ||||
-rw-r--r-- | yt_dlp/extractor/peekvids.py | 48 | ||||
-rw-r--r-- | yt_dlp/extractor/piapro.py | 100 | ||||
-rw-r--r-- | yt_dlp/extractor/rtvs.py | 74 | ||||
-rw-r--r-- | yt_dlp/extractor/twitcasting.py | 11 | ||||
-rw-r--r-- | yt_dlp/extractor/washingtonpost.py | 21 | ||||
-rw-r--r-- | yt_dlp/extractor/youtube.py | 32 | ||||
-rw-r--r-- | yt_dlp/postprocessor/common.py | 4 | ||||
-rw-r--r-- | yt_dlp/postprocessor/ffmpeg.py | 17 | ||||
-rw-r--r-- | yt_dlp/postprocessor/modify_chapters.py | 2 |
20 files changed, 418 insertions, 162 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index cb07d3e23..bbbba073f 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -90,6 +90,10 @@ _NSIG_TESTS = [ 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js', 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw', ), + ( + 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js', + 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA', + ), ] diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2043614ed..a96fc0bdd 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -887,7 +887,8 @@ class YoutubeDL(object): def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): if test_encoding: original_text = text - encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii') + # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 + encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii' text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback @@ -2661,12 +2662,15 @@ class YoutubeDL(object): # given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041 requested_langs = [] for lang_re in self.params.get('subtitleslangs'): - if lang_re == 'all': - requested_langs.extend(all_sub_langs) - continue discard = lang_re[0] == '-' if discard: lang_re = lang_re[1:] + if lang_re == 'all': + if discard: + requested_langs = [] + else: + requested_langs.extend(all_sub_langs) + continue current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs) if discard: for lang in current_langs: @@ -2730,8 +2734,9 @@ class YoutubeDL(object): filename = self.evaluate_outtmpl(file_tmpl, info_dict) tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') - with io.open(filename, 'a', encoding='utf-8') as f: - f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') + if self._ensure_dir_exists(filename): + with io.open(filename, 'a', encoding='utf-8') as f: + f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n') def __forced_printings(self, info_dict, filename, incomplete): def print_mandatory(field, actual_field=None): @@ -2902,9 +2907,11 @@ class YoutubeDL(object): # Write internet shortcut files def _write_link_file(link_type): - if 'webpage_url' not in info_dict: - self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information') - return False + url = try_get(info_dict['webpage_url'], iri_to_uri) + if not url: + self.report_warning( + f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') + return True linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) if not self._ensure_dir_exists(encodeFilename(linkfn)): return False @@ -2915,7 +2922,7 @@ class YoutubeDL(object): self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline='\r\n' if link_type == 'url' else '\n') as linkfile: - template_vars = {'url': iri_to_uri(info_dict['webpage_url'])} + template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py index f4fdcf120..03ae3a00e 100644 --- a/yt_dlp/downloader/external.py +++ b/yt_dlp/downloader/external.py @@ -253,7 +253,7 @@ class Aria2cFD(ExternalFD): def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', - '--file-allocation=none', '-x16', '-j16', '-s16'] + '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] else: diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index 9d6f5a435..6fe195e82 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -213,7 +213,7 @@ class ABCIViewIE(InfoExtractor): 'hdnea': token, }) - for sd in ('720', 'sd', 'sd-low'): + for sd in ('1080', '720', 'sd', 'sd-low'): sd_url = try_get( stream, lambda x: x['streams']['hls'][sd], compat_str) if not sd_url: diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py index b664145a1..42223dab7 100644 --- a/yt_dlp/extractor/bandcamp.py +++ b/yt_dlp/extractor/bandcamp.py @@ -212,7 +212,7 @@ class BandcampIE(InfoExtractor): class BandcampAlbumIE(BandcampIE): IE_NAME = 'Bandcamp:album' - _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?' + _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', @@ -258,14 +258,6 @@ class BandcampAlbumIE(BandcampIE): }, 'playlist_mincount': 9, }, { - 'url': 'http://dotscale.bandcamp.com', - 'info_dict': { - 'title': 'Loom', - 'id': 'dotscale', - 'uploader_id': 'dotscale', - }, - 'playlist_mincount': 7, - }, { # with escaped quote in title 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', 'info_dict': { @@ -391,41 +383,63 @@ class BandcampWeeklyIE(BandcampIE): } -class BandcampMusicIE(InfoExtractor): - _VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music' +class BandcampUserIE(InfoExtractor): + IE_NAME = 'Bandcamp:user' + _VALID_URL = r'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)' + _TESTS = [{ + # Type 1 Bandcamp user page. + 'url': 'https://adrianvonziegler.bandcamp.com', + 'info_dict': { + 'id': 'adrianvonziegler', + 'title': 'Discography of adrianvonziegler', + }, + 'playlist_mincount': 23, + }, { + # Bandcamp user page with only one album + 'url': 'http://dotscale.bandcamp.com', + 'info_dict': { + 'id': 'dotscale', + 'title': 'Discography of dotscale' + }, + 'playlist_count': 1, + }, { + # Type 2 Bandcamp user page. + 'url': 'https://nightcallofficial.bandcamp.com', + 'info_dict': { + 'id': 'nightcallofficial', + 'title': 'Discography of nightcallofficial', + }, + 'playlist_count': 4, + }, { 'url': 'https://steviasphere.bandcamp.com/music', 'playlist_mincount': 47, 'info_dict': { 'id': 'steviasphere', + 'title': 'Discography of steviasphere', }, }, { 'url': 'https://coldworldofficial.bandcamp.com/music', 'playlist_mincount': 10, 'info_dict': { 'id': 'coldworldofficial', + 'title': 'Discography of coldworldofficial', }, }, { 'url': 'https://nuclearwarnowproductions.bandcamp.com/music', 'playlist_mincount': 399, 'info_dict': { 'id': 'nuclearwarnowproductions', + 'title': 'Discography of nuclearwarnowproductions', }, - } - ] - - _TYPE_IE_DICT = { - 'album': BandcampAlbumIE.ie_key(), - 'track': BandcampIE.ie_key() - } + }] def _real_extract(self, url): - id = self._match_id(url) - webpage = self._download_webpage(url, id) - items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage) - entries = [ - self.url_result( - f'https://{id}.bandcamp.com/{item[0]}', - ie=self._TYPE_IE_DICT[item[1]]) - for item in items] - return self.playlist_result(entries, id) + uploader = self._match_id(url) + webpage = self._download_webpage(url, uploader) + + discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage) + or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage)) + + return self.playlist_from_matches( + discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x)) diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py index 85ab478a6..199a3f8e2 100644 --- a/yt_dlp/extractor/bbc.py +++ b/yt_dlp/extractor/bbc.py @@ -1171,9 +1171,9 @@ class BBCIE(BBCCoUkIE): return self.playlist_result( entries, playlist_id, playlist_title, playlist_description) - initial_data = self._parse_json(self._search_regex( - r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage, - 'preload state', default='{}'), playlist_id, fatal=False) + initial_data = self._parse_json(self._parse_json(self._search_regex( + r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage, + 'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False) if initial_data: def parse_media(media): if not media: @@ -1214,7 +1214,7 @@ class BBCIE(BBCCoUkIE): if name == 'media-experience': parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict)) elif name == 'article': - for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []): + for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []): if block.get('type') != 'media': continue parse_media(block.get('model')) diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py index 17ebbb257..2b57bade3 100644 --- a/yt_dlp/extractor/biqle.py +++ b/yt_dlp/extractor/biqle.py @@ -3,27 +3,28 @@ from __future__ import unicode_literals from .common import InfoExtractor from .vk import VKIE -from ..compat import ( - compat_b64decode, - compat_urllib_parse_unquote, +from ..compat import compat_b64decode +from ..utils import ( + int_or_none, + js_to_json, + traverse_obj, + unified_timestamp, ) -from ..utils import int_or_none class BIQLEIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)' _TESTS = [{ - # Youtube embed - 'url': 'https://biqle.ru/watch/-115995369_456239081', - 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06', + 'url': 'https://biqle.ru/watch/-2000421746_85421746', + 'md5': 'ae6ef4f04d19ac84e4658046d02c151c', 'info_dict': { - 'id': '8v4f-avW-VI', + 'id': '-2000421746_85421746', 'ext': 'mp4', - 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer", - 'description': 'Passe-Partout', - 'uploader_id': 'mrsimpsonstef3', - 'uploader': 'Phanolito', - 'upload_date': '20120822', + 'title': 'Forsaken By Hope Studio Clip', + 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн', + 'upload_date': '19700101', + 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb', + 'timestamp': 0, }, }, { 'url': 'http://biqle.org/watch/-44781847_168547604', @@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor): 'id': '-44781847_168547604', 'ext': 'mp4', 'title': 'Ребенок в шоке от автоматической мойки', + 'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн', 'timestamp': 1396633454, - 'uploader': 'Dmitry Kotov', 'upload_date': '20140404', - 'uploader_id': '47850140', + 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - embed_url = self._proto_relative_url(self._search_regex( - r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>', - webpage, 'embed url')) + + title = self._html_search_meta('name', webpage, 'Title', fatal=False) + timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None)) + description = self._html_search_meta('description', webpage, 'Description', default=None) + + global_embed_url = self._search_regex( + r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'', + webpage, 'global Embed url') + hash = self._search_regex( + r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash') + + embed_url = global_embed_url + hash + if VKIE.suitable(embed_url): return self.url_result(embed_url, VKIE.ie_key(), video_id) embed_page = self._download_webpage( - embed_url, video_id, headers={'Referer': url}) - video_ext = self._get_cookies(embed_url).get('video_ext') - if video_ext: - video_ext = compat_urllib_parse_unquote(video_ext.value) - if not video_ext: - video_ext = compat_b64decode(self._search_regex( - r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)', - embed_page, 'video_ext')).decode() - video_id, sig, _, access_token = video_ext.split(':') + embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url}) + + glob_params = self._parse_json(self._search_regex( + r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>', + embed_page, 'Global Parameters'), video_id, transform_source=js_to_json) + host_name = compat_b64decode(glob_params['server'][::-1]).decode() + item = self._download_json( - 'https://api.vk.com/method/video.get', video_id, - headers={'User-Agent': 'okhttp/3.4.1'}, query={ - 'access_token': access_token, - 'sig': sig, - 'v': 5.44, + f'https://{host_name}/method/video.get/{video_id}', video_id, + headers={'Referer': url}, query={ + 'token': glob_params['video']['access_token'], 'videos': video_id, + 'ckey': glob_params['c_key'], + 'credentials': glob_params['video']['credentials'], })['response']['items'][0] - title = item['title'] formats = [] for f_id, f_url in item.get('files', {}).items(): if f_id == 'external': return self.url_result(f_url) ext, height = f_id.split('_') - formats.append({ - 'format_id': height + 'p', - 'url': f_url, - 'height': int_or_none(height), - 'ext': ext, - }) + height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height)) + if height_extra_key: + formats.append({ + 'format_id': f'{height}p', + 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}', + 'height': int_or_none(height), + 'ext': ext, + }) self._sort_formats(formats) thumbnails = [] @@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor): 'title': title, 'formats': formats, 'comment_count': int_or_none(item.get('comments')), - 'description': item.get('description'), + 'description': description, 'duration': int_or_none(item.get('duration')), 'thumbnails': thumbnails, - 'timestamp': int_or_none(item.get('date')), - 'uploader': item.get('owner_id'), + 'timestamp': timestamp, 'view_count': int_or_none(item.get('views')), } diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 37c8be5f6..04d4c0733 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -75,6 +75,7 @@ from ..utils import ( str_to_int, strip_or_none, traverse_obj, + try_get, unescapeHTML, UnsupportedError, unified_strdate, @@ -2878,7 +2879,8 @@ class InfoExtractor(object): segment_duration = None if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) - representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) + representation_ms_info['total_number'] = int(math.ceil( + float_or_none(period_duration, segment_duration, default=0))) representation_ms_info['fragments'] = [{ media_location_key: media_template % { 'Number': segment_number, @@ -2969,6 +2971,10 @@ class InfoExtractor(object): f['url'] = initialization_url f['fragments'].append({location_key(initialization_url): initialization_url}) f['fragments'].extend(representation_ms_info['fragments']) + if not period_duration: + period_duration = try_get( + representation_ms_info, + lambda r: sum(frag['duration'] for frag in r['fragments']), float) else: # Assuming direct URL to unfragmented media. f['url'] = base_url diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py index c717aec3a..d29b58ba6 100644 --- a/yt_dlp/extractor/cspan.py +++ b/yt_dlp/extractor/cspan.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..compat import compat_HTMLParseError from ..utils import ( determine_ext, ExtractorError, @@ -11,9 +12,11 @@ from ..utils import ( get_element_by_attribute, get_element_by_class, int_or_none, + join_nonempty, js_to_json, merge_dicts, parse_iso8601, + parse_qs, smuggle_url, str_to_int, unescapeHTML, @@ -126,8 +129,12 @@ class CSpanIE(InfoExtractor): ext = 'vtt' subtitle['ext'] = ext ld_info = self._search_json_ld(webpage, video_id, default={}) - title = get_element_by_class('video-page-title', webpage) or \ - self._og_search_title(webpage) + try: + title = get_element_by_class('video-page-title', webpage) + except compat_HTMLParseError: + title = None + if title is None: + title = self._og_search_title(webpage) description = get_element_by_attribute('itemprop', 'description', webpage) or \ self._html_search_meta(['og:description', 'description'], webpage) return merge_dicts(info, ld_info, { @@ -242,3 +249,42 @@ class CSpanIE(InfoExtractor): 'title': title, 'id': 'c' + video_id if video_type == 'clip' else video_id, } + + +class CSpanCongressIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?c-span\.org/congress/' + _TESTS = [{ + 'url': 'https://www.c-span.org/congress/?chamber=house&date=2017-12-13&t=1513208380', + 'info_dict': { + 'id': 'house_2017-12-13', + 'title': 'Congressional Chronicle - Members of Congress, Hearings and More', + 'description': 'md5:54c264b7a8f219937987610243305a84', + 'thumbnail': r're:https://ximage.c-spanvideo.org/.+', + 'ext': 'mp4' + } + }] + + def _real_extract(self, url): + query = parse_qs(url) + video_date = query.get('date', [None])[0] + video_id = join_nonempty(query.get('chamber', ['senate'])[0], video_date, delim='_') + webpage = self._download_webpage(url, video_id) + if not video_date: + jwp_date = re.search(r'jwsetup.clipprogdate = \'(?P<date>\d{4}-\d{2}-\d{2})\';', webpage) + if jwp_date: + video_id = f'{video_id}_{jwp_date.group("date")}' + jwplayer_data = self._parse_json( + self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'), + video_id, transform_source=js_to_json) + + title = (self._og_search_title(webpage, default=None) + or self._html_search_regex(r'(?s)<title>(.*?)</title>', webpage, 'video title')) + description = (self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, 'description', default=None)) + + return { + **self._parse_jwplayer_data(jwplayer_data, video_id, False), + 'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(), + 'description': description, + 'http_headers': {'Referer': 'https://www.c-span.org/'}, + } diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py index 3ae3a8d3d..2559657ad 100644 --- a/yt_dlp/extractor/dropbox.py +++ b/yt_dlp/extractor/dropbox.py @@ -56,8 +56,8 @@ class DropboxIE(InfoExtractor): else: raise ExtractorError('Password protected video, use --video-password <password>', expected=True) - json_string = self._html_search_regex(r'InitReact\.mountComponent.+ "props":(.+), "elem_id"', webpage, 'Info JSON') - info_json = self._parse_json(json_string, video_id) + json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON') + info_json = self._parse_json(json_string, video_id).get('props') transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False) formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index c3f3eb974..15bc74915 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -118,7 +118,7 @@ from .bandcamp import ( BandcampIE, BandcampAlbumIE, BandcampWeeklyIE, - BandcampMusicIE, + BandcampUserIE, ) from .bannedvideo import BannedVideoIE from .bbc import ( @@ -316,7 +316,7 @@ from .crunchyroll import ( CrunchyrollBetaIE, CrunchyrollBetaShowIE, ) -from .cspan import CSpanIE +from .cspan import CSpanIE, CSpanCongressIE from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE @@ -1162,6 +1162,7 @@ from .periscope import ( from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .piapro import PiaproIE from .picarto import ( PicartoIE, PicartoVodIE, diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index 62050a8e4..4bf68559a 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import remove_end class PeekVidsIE(InfoExtractor): @@ -13,11 +12,17 @@ class PeekVidsIE(InfoExtractor): ''' _TESTS = [{ 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', - 'md5': '2ff6a357a9717dc9dc9894b51307e9a2', + 'md5': 'a00940646c428e232407e3e62f0e8ef5', 'info_dict': { 'id': 'BSyLMbN0YCd', + 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', 'ext': 'mp4', - 'title': 'Dane Jones - Cute redhead with perfect tits with Mini Vamp', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com', + 'timestamp': 1642579329, + 'upload_date': '20220119', + 'duration': 416, + 'view_count': int, 'age_limit': 18, }, }] @@ -40,46 +45,37 @@ class PeekVidsIE(InfoExtractor): formats = [{'url': url} for url in srcs.values()] self._sort_formats(formats) - title = remove_end(self._html_search_regex( - (r'<h1.*?>\s*(.+?)\s*</h1>', r'<title>\s*(.+?)\s*</title>'), - webpage, 'video title', default=None), ' - PeekVids') - - return { + info = self._search_json_ld(webpage, video_id, expected_type='VideoObject') + info.update({ 'id': video_id, - 'title': title, 'age_limit': 18, 'formats': formats, - } + }) + return info class PlayVidsIE(PeekVidsIE): _VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|[^/]{2}/)?(?P<id>[^/?#]*)' _TESTS = [{ 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', - 'md5': '2f12e50213dd65f142175da633c4564c', + 'md5': 'cd7dfd8a2e815a45402369c76e3c1825', 'info_dict': { 'id': 'U3pBrYhsjXM', + 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub', 'ext': 'mp4', - 'title': 'Dane Jones - Cute redhead with perfect tits with Mini Vamp', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp video in HD, uploaded by SEXYhub.com', + 'timestamp': 1640435839, + 'upload_date': '20211225', + 'duration': 416, + 'view_count': int, 'age_limit': 18, }, }, { 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', - 'md5': '2f12e50213dd65f142175da633c4564c', - 'info_dict': { - 'id': 'U3pBrYhsjXM', - 'ext': 'mp4', - 'title': 'Dane Jones - Cute redhead with perfect tits with Mini Vamp', - 'age_limit': 18, - }, + 'only_matching': True, }, { 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', - 'md5': '2f12e50213dd65f142175da633c4564c', - 'info_dict': { - 'id': 'U3pBrYhsjXM', - 'ext': 'mp4', - 'title': 'U3pBrYhsjXM', - 'age_limit': 18, - }, + 'only_matching': True, }] _DOMAIN = 'www.playvids.com' diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py new file mode 100644 index 000000000..497e1edbc --- /dev/null +++ b/yt_dlp/extractor/piapro.py @@ -0,0 +1,100 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_urlparse +from ..utils import ( + ExtractorError, + parse_duration, + parse_filesize, + str_to_int, + unified_timestamp, + urlencode_postdata, +) + + +class PiaproIE(InfoExtractor): + _NETRC_MACHINE = 'piapro' + _VALID_URL = r'https?://piapro\.jp/t/(?P<id>\w+)/?' + _TESTS = [{ + 'url': 'https://piapro.jp/t/NXYR', + 'md5': 'a9d52f27d13bafab7ee34116a7dcfa77', + 'info_dict': { + 'id': 'NXYR', + 'ext': 'mp3', + 'uploader': 'wowaka', + 'uploader_id': 'wowaka', + 'title': '裏表ラバーズ', + 'thumbnail': r're:^https?://.*\.jpg$', + } + }] + + def _real_initialize(self): + self._login_status = self._login() + + def _login(self): + username, password = self._get_login_info() + if not username: + return False + login_ok = True + login_form_strs = { + '_username': username, + '_password': password, + '_remember_me': 'on', + 'login': 'ログイン' + } + self._request_webpage('https://piapro.jp/login/', None) + urlh = self._request_webpage( + 'https://piapro.jp/login/exe', None, + note='Logging in', errnote='Unable to log in', + data=urlencode_postdata(login_form_strs)) + if urlh is False: + login_ok = False + else: + parts = compat_urlparse.urlparse(urlh.geturl()) + if parts.path != '/': + login_ok = False + if not login_ok: + self.report_warning( + 'unable to log in: bad username or password') + return login_ok + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + + category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID') + if category_id not in ('1', '2', '21', '22', '23', '24', '25'): + raise ExtractorError('The URL does not contain audio.', expected=True) + + str_duration, str_filesize = self._search_regex( + r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size', + group=(1, 2), default=(None, None)) + str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False) + + uploader_id, uploader = self._search_regex( + r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader', + group=(1, 2), default=(None, None)) + content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID') + create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp') + + player_webpage = self._download_webpage( + f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}', + video_id, note='Downloading player webpage') + + return { + 'id': video_id, + 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False), + 'description': self._html_search_regex(r'<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False), + 'uploader': uploader, + 'uploader_id': uploader_id, + 'timestamp': unified_timestamp(create_date, False), + 'duration': parse_duration(str_duration), + 'view_count': str_to_int(str_viewcount), + 'thumbnail': self._html_search_meta('twitter:image', webpage), + + 'filesize_approx': parse_filesize(str_filesize.replace(',', '')), + 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'), + 'ext': 'mp3', + 'vcodec': 'none', + } diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py index 6573b260d..3ea0f1883 100644 --- a/yt_dlp/extractor/rtvs.py +++ b/yt_dlp/extractor/rtvs.py @@ -1,11 +1,19 @@ # coding: utf-8 from __future__ import unicode_literals +import re + from .common import InfoExtractor +from ..utils import ( + parse_duration, + traverse_obj, + unified_timestamp, +) + class RTVSIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)' + _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)' _TESTS = [{ # radio archive 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', @@ -13,23 +21,37 @@ class RTVSIE(InfoExtractor): 'info_dict': { 'id': '414872', 'ext': 'mp3', - 'title': 'Ostrov pokladov 1 časť.mp3' - }, - 'params': { - 'skip_download': True, + 'title': 'Ostrov pokladov 1 časť.mp3', + 'duration': 2854, + 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', + 'display_id': '135331', } }, { # tv archive 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', - 'md5': '85e2c55cf988403b70cac24f5c086dc6', 'info_dict': { 'id': '63118', 'ext': 'mp4', 'title': 'Amaro Džives - Náš deň', - 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.' - }, - 'params': { - 'skip_download': True, + 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.', + 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg', + 'timestamp': 1428555900, + 'upload_date': '20150409', + 'duration': 4986, + } + }, { + # tv archive + 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', + 'info_dict': { + 'id': '18083', + 'ext': 'mp4', + 'title': 'Robin', + 'description': 'md5:2f70505a7b8364491003d65ff7a0940a', + 'timestamp': 1636652760, + 'display_id': '307655', + 'duration': 831, + 'upload_date': '20211111', + 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', } }] @@ -37,11 +59,31 @@ class RTVSIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) + iframe_id = self._search_regex( + r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID') + iframe_url = self._search_regex( + fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL') + + webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe') + json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL') + data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id) - playlist_url = self._search_regex( - r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, - 'playlist url', group='url') + if data.get('clip'): + data['playlist'] = [data['clip']] - data = self._download_json( - playlist_url, video_id, 'Downloading playlist')[0] - return self._parse_jwplayer_data(data, video_id=video_id) + if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3': + formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] + else: + formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) + self._sort_formats(formats) + + return { + 'id': video_id, + 'display_id': iframe_id, + 'title': traverse_obj(data, ('playlist', 0, 'title')), + 'description': traverse_obj(data, ('playlist', 0, 'description')), + 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), + 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), + 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), + 'formats': formats + } diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py index 98ef330cb..08222df95 100644 --- a/yt_dlp/extractor/twitcasting.py +++ b/yt_dlp/extractor/twitcasting.py @@ -221,6 +221,17 @@ class TwitCastingLiveIE(InfoExtractor): r'tw-sound-flag-open-link" data-id="(\d+)" style=',), webpage, 'current live ID', default=None) if not current_live: + # fetch unfiltered /show to find running livestreams; we can't get ID of the password-protected livestream above + webpage = self._download_webpage( + f'https://twitcasting.tv/{uploader_id}/show/', uploader_id, + note='Downloading live history') + is_live = self._search_regex(r'(?s)(<span\s*class="tw-movie-thumbnail-badge"\s*data-status="live">\s*LIVE)', webpage, 'is live?', default=None) + if is_live: + # get the first live; running live is always at the first + current_live = self._search_regex( + r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>', + webpage, 'current live ID 2', default=None, group='video_id') + if not current_live: raise ExtractorError('The user is not currently live') return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live)) diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py index 8afb1af83..9d6ae2870 100644 --- a/yt_dlp/extractor/washingtonpost.py +++ b/yt_dlp/extractor/washingtonpost.py @@ -5,6 +5,8 @@ import re from .common import InfoExtractor +from ..utils import traverse_obj + class WashingtonPostIE(InfoExtractor): IE_NAME = 'washingtonpost' @@ -50,7 +52,7 @@ class WashingtonPostArticleIE(InfoExtractor): 'title': 'Sinkhole of bureaucracy', }, 'playlist': [{ - 'md5': 'b9be794ceb56c7267d410a13f99d801a', + 'md5': '7ccf53ea8cbb77de5f570242b3b21a59', 'info_dict': { 'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f', 'ext': 'mp4', @@ -59,9 +61,10 @@ class WashingtonPostArticleIE(InfoExtractor): 'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.', 'timestamp': 1395440416, 'upload_date': '20140321', + 'thumbnail': r're:https://[^\.]+.cloudfront\.net/PAPERMINESplash\.jpg', }, }, { - 'md5': '1fff6a689d8770966df78c8cb6c8c17c', + 'md5': '7ccf53ea8cbb77de5f570242b3b21a59', 'info_dict': { 'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f', 'ext': 'mp4', @@ -70,6 +73,7 @@ class WashingtonPostArticleIE(InfoExtractor): 'duration': 2220, 'timestamp': 1395441819, 'upload_date': '20140321', + 'thumbnail': r're:https://[^\.]+.cloudfront\.net/BoyersSplash\.jpeg', }, }], }, { @@ -88,7 +92,11 @@ class WashingtonPostArticleIE(InfoExtractor): 'timestamp': 1419972442, 'title': 'Why black boxes don’t transmit data in real time', } - }] + }], + 'skip': 'Doesnt have a video anymore', + }, { + 'url': 'https://www.washingtonpost.com/nation/2021/08/05/dixie-river-fire-california-climate/', + 'only_matching': True, }] @classmethod @@ -106,6 +114,13 @@ class WashingtonPostArticleIE(InfoExtractor): <div\s+class="posttv-video-embed[^>]*?data-uuid=| data-video-uuid= )"([^"]+)"''', webpage) + + if not uuids: + json_data = self._search_nextjs_data(webpage, page_id) + for content_element in traverse_obj(json_data, ('props', 'pageProps', 'globalContent', 'content_elements')): + if content_element.get('type') == 'video': + uuids.append(content_element.get('_id')) + entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids] return { diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index d5f9b6962..c03637f5f 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -225,28 +225,28 @@ INNERTUBE_CLIENTS = { def build_innertube_clients(): - third_party = { + THIRD_PARTY = { 'embedUrl': 'https://google.com', # Can be any valid URL } - base_clients = ('android', 'web', 'ios', 'mweb') - priority = qualities(base_clients[::-1]) + BASE_CLIENTS = ('android', 'web', 'ios', 'mweb') + priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') - ytcfg['priority'] = 10 * priority(client.split('_', 1)[0]) - if client in base_clients: - INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) + base_client, *variant = client.split('_') + ytcfg['priority'] = 10 * priority(base_client) + + if variant == ['embedded']: + ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY + INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party agegate_ytcfg['priority'] -= 1 - elif client.endswith('_embedded'): - ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party ytcfg['priority'] -= 2 - else: + elif variant: ytcfg['priority'] -= 3 @@ -2413,7 +2413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_n_function_name(self, jscode): nfunc, idx = self._search_regex( - r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)', + r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)', jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) if not idx: return nfunc @@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) + approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None for fmt in streaming_formats: if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): @@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor): itags[itag] = 'https' stream_ids.append(stream_id) - tbr = float_or_none( - fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) + tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) language_preference = ( 10 if audio_track.get('audioIsDefault') and 10 else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 else -1) + # Some formats may have much smaller duration than others (possibly damaged during encoding) + # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 + is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), @@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '%s%s' % (audio_track.get('displayName') or '', ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), - throttled and 'THROTTLED', delim=', '), + throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), 'source_preference': -10 if throttled else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, @@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'language': join_nonempty(audio_track.get('id', '').split('.')[0], 'desc' if language_preference < -1 else ''), 'language_preference': language_preference, + 'preference': -10 if is_damaged else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index f2467c542..d761c9303 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -103,12 +103,14 @@ class PostProcessor(metaclass=PostProcessorMetaClass): return getattr(self._downloader, '_copy_infodict', dict)(info_dict) @staticmethod - def _restrict_to(*, video=True, audio=True, images=True): + def _restrict_to(*, video=True, audio=True, images=True, simulated=True): allowed = {'video': video, 'audio': audio, 'images': images} def decorator(func): @functools.wraps(func) def wrapper(self, info): + if not simulated and (self.get_param('simulate') or self.get_param('skip_download')): + return [], info format_type = ( 'video' if info.get('vcodec') != 'none' else 'audio' if info.get('acodec') != 'none' diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 42e9d12a7..d4495b4a2 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -384,12 +384,10 @@ class FFmpegPostProcessor(PostProcessor): out_flags = list(self.stream_copy_opts(ext=determine_ext(out_file))) - try: - self.real_run_ffmpeg( - [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], - [(out_file, out_flags)]) - finally: - os.remove(concat_file) + self.real_run_ffmpeg( + [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], + [(out_file, out_flags)]) + os.remove(concat_file) @classmethod def _concat_spec(cls, in_files, concat_opts=None): @@ -1147,16 +1145,15 @@ class FFmpegConcatPP(FFmpegPostProcessor): super().concat_files(in_files, out_file) return in_files - @PostProcessor._restrict_to(images=False) + @PostProcessor._restrict_to(images=False, simulated=False) def run(self, info): entries = info.get('entries') or [] - if (self.get_param('skip_download') or not any(entries) - or self._only_multi_video and info['_type'] != 'multi_video'): + if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'): return [], info elif any(len(entry) > 1 for entry in traverse_obj(entries, (..., 'requested_downloads')) or []): raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats') - in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) + in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or [] if len(in_files) < len(entries): raise PostProcessingError('Aborting concatenation because some downloads failed') diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py index 435a144e2..22506bc21 100644 --- a/yt_dlp/postprocessor/modify_chapters.py +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -68,9 +68,11 @@ class ModifyChaptersPP(FFmpegPostProcessor): # Renaming should only happen after all files are processed files_to_remove = [] for in_file, out_file in in_out_files: + mtime = os.stat(in_file).st_mtime uncut_file = prepend_extension(in_file, 'uncut') os.replace(in_file, uncut_file) os.replace(out_file, in_file) + self.try_utime(in_file, mtime, mtime) files_to_remove.append(uncut_file) return files_to_remove, info |