aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--test/test_youtube_signature.py4
-rw-r--r--yt_dlp/YoutubeDL.py27
-rw-r--r--yt_dlp/downloader/external.py2
-rw-r--r--yt_dlp/extractor/abc.py2
-rw-r--r--yt_dlp/extractor/bandcamp.py68
-rw-r--r--yt_dlp/extractor/bbc.py8
-rw-r--r--yt_dlp/extractor/biqle.py93
-rw-r--r--yt_dlp/extractor/common.py8
-rw-r--r--yt_dlp/extractor/cspan.py50
-rw-r--r--yt_dlp/extractor/dropbox.py4
-rw-r--r--yt_dlp/extractor/extractors.py5
-rw-r--r--yt_dlp/extractor/peekvids.py48
-rw-r--r--yt_dlp/extractor/piapro.py100
-rw-r--r--yt_dlp/extractor/rtvs.py74
-rw-r--r--yt_dlp/extractor/twitcasting.py11
-rw-r--r--yt_dlp/extractor/washingtonpost.py21
-rw-r--r--yt_dlp/extractor/youtube.py32
-rw-r--r--yt_dlp/postprocessor/common.py4
-rw-r--r--yt_dlp/postprocessor/ffmpeg.py17
-rw-r--r--yt_dlp/postprocessor/modify_chapters.py2
20 files changed, 418 insertions, 162 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index cb07d3e23..bbbba073f 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -90,6 +90,10 @@ _NSIG_TESTS = [
'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js',
'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw',
),
+ (
+ 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js',
+ 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA',
+ ),
]
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 2043614ed..a96fc0bdd 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -887,7 +887,8 @@ class YoutubeDL(object):
def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False):
if test_encoding:
original_text = text
- encoding = self.params.get('encoding') or getattr(handle, 'encoding', 'ascii')
+ # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711
+ encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii'
text = text.encode(encoding, 'ignore').decode(encoding)
if fallback is not None and text != original_text:
text = fallback
@@ -2661,12 +2662,15 @@ class YoutubeDL(object):
# given in subtitleslangs. See https://github.com/yt-dlp/yt-dlp/issues/1041
requested_langs = []
for lang_re in self.params.get('subtitleslangs'):
- if lang_re == 'all':
- requested_langs.extend(all_sub_langs)
- continue
discard = lang_re[0] == '-'
if discard:
lang_re = lang_re[1:]
+ if lang_re == 'all':
+ if discard:
+ requested_langs = []
+ else:
+ requested_langs.extend(all_sub_langs)
+ continue
current_langs = filter(re.compile(lang_re + '$').match, all_sub_langs)
if discard:
for lang in current_langs:
@@ -2730,8 +2734,9 @@ class YoutubeDL(object):
filename = self.evaluate_outtmpl(file_tmpl, info_dict)
tmpl = format_tmpl(tmpl)
self.to_screen(f'[info] Writing {tmpl!r} to: {filename}')
- with io.open(filename, 'a', encoding='utf-8') as f:
- f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
+ if self._ensure_dir_exists(filename):
+ with io.open(filename, 'a', encoding='utf-8') as f:
+ f.write(self.evaluate_outtmpl(tmpl, info_copy) + '\n')
def __forced_printings(self, info_dict, filename, incomplete):
def print_mandatory(field, actual_field=None):
@@ -2902,9 +2907,11 @@ class YoutubeDL(object):
# Write internet shortcut files
def _write_link_file(link_type):
- if 'webpage_url' not in info_dict:
- self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
- return False
+ url = try_get(info_dict['webpage_url'], iri_to_uri)
+ if not url:
+ self.report_warning(
+ f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown')
+ return True
linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext'))
if not self._ensure_dir_exists(encodeFilename(linkfn)):
return False
@@ -2915,7 +2922,7 @@ class YoutubeDL(object):
self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}')
with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8',
newline='\r\n' if link_type == 'url' else '\n') as linkfile:
- template_vars = {'url': iri_to_uri(info_dict['webpage_url'])}
+ template_vars = {'url': url}
if link_type == 'desktop':
template_vars['filename'] = linkfn[:-(len(link_type) + 1)]
linkfile.write(LINK_TEMPLATES[link_type] % template_vars)
diff --git a/yt_dlp/downloader/external.py b/yt_dlp/downloader/external.py
index f4fdcf120..03ae3a00e 100644
--- a/yt_dlp/downloader/external.py
+++ b/yt_dlp/downloader/external.py
@@ -253,7 +253,7 @@ class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
- '--file-allocation=none', '-x16', '-j16', '-s16']
+ '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict:
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
else:
diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py
index 9d6f5a435..6fe195e82 100644
--- a/yt_dlp/extractor/abc.py
+++ b/yt_dlp/extractor/abc.py
@@ -213,7 +213,7 @@ class ABCIViewIE(InfoExtractor):
'hdnea': token,
})
- for sd in ('720', 'sd', 'sd-low'):
+ for sd in ('1080', '720', 'sd', 'sd-low'):
sd_url = try_get(
stream, lambda x: x['streams']['hls'][sd], compat_str)
if not sd_url:
diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py
index b664145a1..42223dab7 100644
--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@@ -212,7 +212,7 @@ class BandcampIE(InfoExtractor):
class BandcampAlbumIE(BandcampIE):
IE_NAME = 'Bandcamp:album'
- _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?!/music)(?:/album/(?P<id>[^/?#&]+))?'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com/album/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
@@ -258,14 +258,6 @@ class BandcampAlbumIE(BandcampIE):
},
'playlist_mincount': 9,
}, {
- 'url': 'http://dotscale.bandcamp.com',
- 'info_dict': {
- 'title': 'Loom',
- 'id': 'dotscale',
- 'uploader_id': 'dotscale',
- },
- 'playlist_mincount': 7,
- }, {
# with escaped quote in title
'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
'info_dict': {
@@ -391,41 +383,63 @@ class BandcampWeeklyIE(BandcampIE):
}
-class BandcampMusicIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<id>[^/]+)\.bandcamp\.com/music'
+class BandcampUserIE(InfoExtractor):
+ IE_NAME = 'Bandcamp:user'
+ _VALID_URL = r'https?://(?!www\.)(?P<id>[^.]+)\.bandcamp\.com(?:/music)?/?(?:[#?]|$)'
+
_TESTS = [{
+ # Type 1 Bandcamp user page.
+ 'url': 'https://adrianvonziegler.bandcamp.com',
+ 'info_dict': {
+ 'id': 'adrianvonziegler',
+ 'title': 'Discography of adrianvonziegler',
+ },
+ 'playlist_mincount': 23,
+ }, {
+ # Bandcamp user page with only one album
+ 'url': 'http://dotscale.bandcamp.com',
+ 'info_dict': {
+ 'id': 'dotscale',
+ 'title': 'Discography of dotscale'
+ },
+ 'playlist_count': 1,
+ }, {
+ # Type 2 Bandcamp user page.
+ 'url': 'https://nightcallofficial.bandcamp.com',
+ 'info_dict': {
+ 'id': 'nightcallofficial',
+ 'title': 'Discography of nightcallofficial',
+ },
+ 'playlist_count': 4,
+ }, {
'url': 'https://steviasphere.bandcamp.com/music',
'playlist_mincount': 47,
'info_dict': {
'id': 'steviasphere',
+ 'title': 'Discography of steviasphere',
},
}, {
'url': 'https://coldworldofficial.bandcamp.com/music',
'playlist_mincount': 10,
'info_dict': {
'id': 'coldworldofficial',
+ 'title': 'Discography of coldworldofficial',
},
}, {
'url': 'https://nuclearwarnowproductions.bandcamp.com/music',
'playlist_mincount': 399,
'info_dict': {
'id': 'nuclearwarnowproductions',
+ 'title': 'Discography of nuclearwarnowproductions',
},
- }
- ]
-
- _TYPE_IE_DICT = {
- 'album': BandcampAlbumIE.ie_key(),
- 'track': BandcampIE.ie_key()
- }
+ }]
def _real_extract(self, url):
- id = self._match_id(url)
- webpage = self._download_webpage(url, id)
- items = re.findall(r'href\=\"\/(?P<path>(?P<type>album|track)+/[^\"]+)', webpage)
- entries = [
- self.url_result(
- f'https://{id}.bandcamp.com/{item[0]}',
- ie=self._TYPE_IE_DICT[item[1]])
- for item in items]
- return self.playlist_result(entries, id)
+ uploader = self._match_id(url)
+ webpage = self._download_webpage(url, uploader)
+
+ discography_data = (re.findall(r'<li data-item-id=["\'][^>]+>\s*<a href=["\']([^"\']+)', webpage)
+ or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
+
+ return self.playlist_from_matches(
+ discography_data, uploader, f'Discography of {uploader}', getter=lambda x: urljoin(url, x))
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
index 85ab478a6..199a3f8e2 100644
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -1171,9 +1171,9 @@ class BBCIE(BBCCoUkIE):
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
- initial_data = self._parse_json(self._search_regex(
- r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), playlist_id, fatal=False)
+ initial_data = self._parse_json(self._parse_json(self._search_regex(
+ r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage,
+ 'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False)
if initial_data:
def parse_media(media):
if not media:
@@ -1214,7 +1214,7 @@ class BBCIE(BBCCoUkIE):
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
- for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+ for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []):
if block.get('type') != 'media':
continue
parse_media(block.get('model'))
diff --git a/yt_dlp/extractor/biqle.py b/yt_dlp/extractor/biqle.py
index 17ebbb257..2b57bade3 100644
--- a/yt_dlp/extractor/biqle.py
+++ b/yt_dlp/extractor/biqle.py
@@ -3,27 +3,28 @@ from __future__ import unicode_literals
from .common import InfoExtractor
from .vk import VKIE
-from ..compat import (
- compat_b64decode,
- compat_urllib_parse_unquote,
+from ..compat import compat_b64decode
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ traverse_obj,
+ unified_timestamp,
)
-from ..utils import int_or_none
class BIQLEIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
_TESTS = [{
- # Youtube embed
- 'url': 'https://biqle.ru/watch/-115995369_456239081',
- 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
+ 'url': 'https://biqle.ru/watch/-2000421746_85421746',
+ 'md5': 'ae6ef4f04d19ac84e4658046d02c151c',
'info_dict': {
- 'id': '8v4f-avW-VI',
+ 'id': '-2000421746_85421746',
'ext': 'mp4',
- 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
- 'description': 'Passe-Partout',
- 'uploader_id': 'mrsimpsonstef3',
- 'uploader': 'Phanolito',
- 'upload_date': '20120822',
+ 'title': 'Forsaken By Hope Studio Clip',
+ 'description': 'Forsaken By Hope Studio Clip — Смотреть онлайн',
+ 'upload_date': '19700101',
+ 'thumbnail': r're:https://[^/]+/impf/7vN3ACwSTgChP96OdOfzFjUCzFR6ZglDQgWsIw/KPaACiVJJxM\.jpg\?size=800x450&quality=96&keep_aspect_ratio=1&background=000000&sign=b48ea459c4d33dbcba5e26d63574b1cb&type=video_thumb',
+ 'timestamp': 0,
},
}, {
'url': 'http://biqle.org/watch/-44781847_168547604',
@@ -32,53 +33,62 @@ class BIQLEIE(InfoExtractor):
'id': '-44781847_168547604',
'ext': 'mp4',
'title': 'Ребенок в шоке от автоматической мойки',
+ 'description': 'Ребенок в шоке от автоматической мойки — Смотреть онлайн',
'timestamp': 1396633454,
- 'uploader': 'Dmitry Kotov',
'upload_date': '20140404',
- 'uploader_id': '47850140',
+ 'thumbnail': r're:https://[^/]+/c535507/u190034692/video/l_b84df002\.jpg',
},
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- embed_url = self._proto_relative_url(self._search_regex(
- r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
- webpage, 'embed url'))
+
+ title = self._html_search_meta('name', webpage, 'Title', fatal=False)
+ timestamp = unified_timestamp(self._html_search_meta('uploadDate', webpage, 'Upload Date', default=None))
+ description = self._html_search_meta('description', webpage, 'Description', default=None)
+
+ global_embed_url = self._search_regex(
+ r'<script[^<]+?window.globEmbedUrl\s*=\s*\'((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^\']+)\'',
+ webpage, 'global Embed url')
+ hash = self._search_regex(
+ r'<script id="data-embed-video[^<]+?hash: "([^"]+)"[^<]*</script>', webpage, 'Hash')
+
+ embed_url = global_embed_url + hash
+
if VKIE.suitable(embed_url):
return self.url_result(embed_url, VKIE.ie_key(), video_id)
embed_page = self._download_webpage(
- embed_url, video_id, headers={'Referer': url})
- video_ext = self._get_cookies(embed_url).get('video_ext')
- if video_ext:
- video_ext = compat_urllib_parse_unquote(video_ext.value)
- if not video_ext:
- video_ext = compat_b64decode(self._search_regex(
- r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
- embed_page, 'video_ext')).decode()
- video_id, sig, _, access_token = video_ext.split(':')
+ embed_url, video_id, 'Downloading embed webpage', headers={'Referer': url})
+
+ glob_params = self._parse_json(self._search_regex(
+ r'<script id="globParams">[^<]*window.globParams = ([^;]+);[^<]+</script>',
+ embed_page, 'Global Parameters'), video_id, transform_source=js_to_json)
+ host_name = compat_b64decode(glob_params['server'][::-1]).decode()
+
item = self._download_json(
- 'https://api.vk.com/method/video.get', video_id,
- headers={'User-Agent': 'okhttp/3.4.1'}, query={
- 'access_token': access_token,
- 'sig': sig,
- 'v': 5.44,
+ f'https://{host_name}/method/video.get/{video_id}', video_id,
+ headers={'Referer': url}, query={
+ 'token': glob_params['video']['access_token'],
'videos': video_id,
+ 'ckey': glob_params['c_key'],
+ 'credentials': glob_params['video']['credentials'],
})['response']['items'][0]
- title = item['title']
formats = []
for f_id, f_url in item.get('files', {}).items():
if f_id == 'external':
return self.url_result(f_url)
ext, height = f_id.split('_')
- formats.append({
- 'format_id': height + 'p',
- 'url': f_url,
- 'height': int_or_none(height),
- 'ext': ext,
- })
+ height_extra_key = traverse_obj(glob_params, ('video', 'partial', 'quality', height))
+ if height_extra_key:
+ formats.append({
+ 'format_id': f'{height}p',
+ 'url': f'https://{host_name}/{f_url[8:]}&videos={video_id}&extra_key={height_extra_key}',
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
self._sort_formats(formats)
thumbnails = []
@@ -96,10 +106,9 @@ class BIQLEIE(InfoExtractor):
'title': title,
'formats': formats,
'comment_count': int_or_none(item.get('comments')),
- 'description': item.get('description'),
+ 'description': description,
'duration': int_or_none(item.get('duration')),
'thumbnails': thumbnails,
- 'timestamp': int_or_none(item.get('date')),
- 'uploader': item.get('owner_id'),
+ 'timestamp': timestamp,
'view_count': int_or_none(item.get('views')),
}
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 37c8be5f6..04d4c0733 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -75,6 +75,7 @@ from ..utils import (
str_to_int,
strip_or_none,
traverse_obj,
+ try_get,
unescapeHTML,
UnsupportedError,
unified_strdate,
@@ -2878,7 +2879,8 @@ class InfoExtractor(object):
segment_duration = None
if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
- representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+ representation_ms_info['total_number'] = int(math.ceil(
+ float_or_none(period_duration, segment_duration, default=0)))
representation_ms_info['fragments'] = [{
media_location_key: media_template % {
'Number': segment_number,
@@ -2969,6 +2971,10 @@ class InfoExtractor(object):
f['url'] = initialization_url
f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
+ if not period_duration:
+ period_duration = try_get(
+ representation_ms_info,
+ lambda r: sum(frag['duration'] for frag in r['fragments']), float)
else:
# Assuming direct URL to unfragmented media.
f['url'] = base_url
diff --git a/yt_dlp/extractor/cspan.py b/yt_dlp/extractor/cspan.py
index c717aec3a..d29b58ba6 100644
--- a/yt_dlp/extractor/cspan.py
+++ b/yt_dlp/extractor/cspan.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
import re
from .common import InfoExtractor
+from ..compat import compat_HTMLParseError
from ..utils import (
determine_ext,
ExtractorError,
@@ -11,9 +12,11 @@ from ..utils import (
get_element_by_attribute,
get_element_by_class,
int_or_none,
+ join_nonempty,
js_to_json,
merge_dicts,
parse_iso8601,
+ parse_qs,
smuggle_url,
str_to_int,
unescapeHTML,
@@ -126,8 +129,12 @@ class CSpanIE(InfoExtractor):
ext = 'vtt'
subtitle['ext'] = ext
ld_info = self._search_json_ld(webpage, video_id, default={})
- title = get_element_by_class('video-page-title', webpage) or \
- self._og_search_title(webpage)
+ try:
+ title = get_element_by_class('video-page-title', webpage)
+ except compat_HTMLParseError:
+ title = None
+ if title is None:
+ title = self._og_search_title(webpage)
description = get_element_by_attribute('itemprop', 'description', webpage) or \
self._html_search_meta(['og:description', 'description'], webpage)
return merge_dicts(info, ld_info, {
@@ -242,3 +249,42 @@ class CSpanIE(InfoExtractor):
'title': title,
'id': 'c' + video_id if video_type == 'clip' else video_id,
}
+
+
+class CSpanCongressIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?c-span\.org/congress/'
+ _TESTS = [{
+ 'url': 'https://www.c-span.org/congress/?chamber=house&date=2017-12-13&t=1513208380',
+ 'info_dict': {
+ 'id': 'house_2017-12-13',
+ 'title': 'Congressional Chronicle - Members of Congress, Hearings and More',
+ 'description': 'md5:54c264b7a8f219937987610243305a84',
+ 'thumbnail': r're:https://ximage.c-spanvideo.org/.+',
+ 'ext': 'mp4'
+ }
+ }]
+
+ def _real_extract(self, url):
+ query = parse_qs(url)
+ video_date = query.get('date', [None])[0]
+ video_id = join_nonempty(query.get('chamber', ['senate'])[0], video_date, delim='_')
+ webpage = self._download_webpage(url, video_id)
+ if not video_date:
+ jwp_date = re.search(r'jwsetup.clipprogdate = \'(?P<date>\d{4}-\d{2}-\d{2})\';', webpage)
+ if jwp_date:
+ video_id = f'{video_id}_{jwp_date.group("date")}'
+ jwplayer_data = self._parse_json(
+ self._search_regex(r'jwsetup\s*=\s*({(?:.|\n)[^;]+});', webpage, 'player config'),
+ video_id, transform_source=js_to_json)
+
+ title = (self._og_search_title(webpage, default=None)
+ or self._html_search_regex(r'(?s)<title>(.*?)</title>', webpage, 'video title'))
+ description = (self._og_search_description(webpage, default=None)
+ or self._html_search_meta('description', webpage, 'description', default=None))
+
+ return {
+ **self._parse_jwplayer_data(jwplayer_data, video_id, False),
+ 'title': re.sub(r'\s+', ' ', title.split('|')[0]).strip(),
+ 'description': description,
+ 'http_headers': {'Referer': 'https://www.c-span.org/'},
+ }
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 3ae3a8d3d..2559657ad 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -56,8 +56,8 @@ class DropboxIE(InfoExtractor):
else:
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
- json_string = self._html_search_regex(r'InitReact\.mountComponent.+ "props":(.+), "elem_id"', webpage, 'Info JSON')
- info_json = self._parse_json(json_string, video_id)
+ json_string = self._html_search_regex(r'InitReact\.mountComponent\(.*?,\s*(\{.+\})\s*?\)', webpage, 'Info JSON')
+ info_json = self._parse_json(json_string, video_id).get('props')
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py
index c3f3eb974..15bc74915 100644
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@@ -118,7 +118,7 @@ from .bandcamp import (
BandcampIE,
BandcampAlbumIE,
BandcampWeeklyIE,
- BandcampMusicIE,
+ BandcampUserIE,
)
from .bannedvideo import BannedVideoIE
from .bbc import (
@@ -316,7 +316,7 @@ from .crunchyroll import (
CrunchyrollBetaIE,
CrunchyrollBetaShowIE,
)
-from .cspan import CSpanIE
+from .cspan import CSpanIE, CSpanCongressIE
from .ctsnews import CtsNewsIE
from .ctv import CTVIE
from .ctvnews import CTVNewsIE
@@ -1162,6 +1162,7 @@ from .periscope import (
from .philharmoniedeparis import PhilharmonieDeParisIE
from .phoenix import PhoenixIE
from .photobucket import PhotobucketIE
+from .piapro import PiaproIE
from .picarto import (
PicartoIE,
PicartoVodIE,
diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py
index 62050a8e4..4bf68559a 100644
--- a/yt_dlp/extractor/peekvids.py
+++ b/yt_dlp/extractor/peekvids.py
@@ -2,7 +2,6 @@
from __future__ import unicode_literals
from .common import InfoExtractor
-from ..utils import remove_end
class PeekVidsIE(InfoExtractor):
@@ -13,11 +12,17 @@ class PeekVidsIE(InfoExtractor):
'''
_TESTS = [{
'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd',
- 'md5': '2ff6a357a9717dc9dc9894b51307e9a2',
+ 'md5': 'a00940646c428e232407e3e62f0e8ef5',
'info_dict': {
'id': 'BSyLMbN0YCd',
+ 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub',
'ext': 'mp4',
- 'title': 'Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp (7 min), uploaded by SEXYhub.com',
+ 'timestamp': 1642579329,
+ 'upload_date': '20220119',
+ 'duration': 416,
+ 'view_count': int,
'age_limit': 18,
},
}]
@@ -40,46 +45,37 @@ class PeekVidsIE(InfoExtractor):
formats = [{'url': url} for url in srcs.values()]
self._sort_formats(formats)
- title = remove_end(self._html_search_regex(
- (r'<h1.*?>\s*(.+?)\s*</h1>', r'<title>\s*(.+?)\s*</title>'),
- webpage, 'video title', default=None), ' - PeekVids')
-
- return {
+ info = self._search_json_ld(webpage, video_id, expected_type='VideoObject')
+ info.update({
'id': video_id,
- 'title': title,
'age_limit': 18,
'formats': formats,
- }
+ })
+ return info
class PlayVidsIE(PeekVidsIE):
_VALID_URL = r'https?://(?:www\.)?playvids\.com/(?:embed/|[^/]{2}/)?(?P<id>[^/?#]*)'
_TESTS = [{
'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
- 'md5': '2f12e50213dd65f142175da633c4564c',
+ 'md5': 'cd7dfd8a2e815a45402369c76e3c1825',
'info_dict': {
'id': 'U3pBrYhsjXM',
+ 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp, SEXYhub',
'ext': 'mp4',
- 'title': 'Dane Jones - Cute redhead with perfect tits with Mini Vamp',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Watch Dane Jones - Cute redhead with perfect tits with Mini Vamp video in HD, uploaded by SEXYhub.com',
+ 'timestamp': 1640435839,
+ 'upload_date': '20211225',
+ 'duration': 416,
+ 'view_count': int,
'age_limit': 18,
},
}, {
'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp',
- 'md5': '2f12e50213dd65f142175da633c4564c',
- 'info_dict': {
- 'id': 'U3pBrYhsjXM',
- 'ext': 'mp4',
- 'title': 'Dane Jones - Cute redhead with perfect tits with Mini Vamp',
- 'age_limit': 18,
- },
+ 'only_matching': True,
}, {
'url': 'https://www.playvids.com/embed/U3pBrYhsjXM',
- 'md5': '2f12e50213dd65f142175da633c4564c',
- 'info_dict': {
- 'id': 'U3pBrYhsjXM',
- 'ext': 'mp4',
- 'title': 'U3pBrYhsjXM',
- 'age_limit': 18,
- },
+ 'only_matching': True,
}]
_DOMAIN = 'www.playvids.com'
diff --git a/yt_dlp/extractor/piapro.py b/yt_dlp/extractor/piapro.py
new file mode 100644
index 000000000..497e1edbc
--- /dev/null
+++ b/yt_dlp/extractor/piapro.py
@@ -0,0 +1,100 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ parse_duration,
+ parse_filesize,
+ str_to_int,
+ unified_timestamp,
+ urlencode_postdata,
+)
+
+
+class PiaproIE(InfoExtractor):
+ _NETRC_MACHINE = 'piapro'
+ _VALID_URL = r'https?://piapro\.jp/t/(?P<id>\w+)/?'
+ _TESTS = [{
+ 'url': 'https://piapro.jp/t/NXYR',
+ 'md5': 'a9d52f27d13bafab7ee34116a7dcfa77',
+ 'info_dict': {
+ 'id': 'NXYR',
+ 'ext': 'mp3',
+ 'uploader': 'wowaka',
+ 'uploader_id': 'wowaka',
+ 'title': '裏表ラバーズ',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }]
+
+ def _real_initialize(self):
+ self._login_status = self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if not username:
+ return False
+ login_ok = True
+ login_form_strs = {
+ '_username': username,
+ '_password': password,
+ '_remember_me': 'on',
+ 'login': 'ログイン'
+ }
+ self._request_webpage('https://piapro.jp/login/', None)
+ urlh = self._request_webpage(
+ 'https://piapro.jp/login/exe', None,
+ note='Logging in', errnote='Unable to log in',
+ data=urlencode_postdata(login_form_strs))
+ if urlh is False:
+ login_ok = False
+ else:
+ parts = compat_urlparse.urlparse(urlh.geturl())
+ if parts.path != '/':
+ login_ok = False
+ if not login_ok:
+ self.report_warning(
+ 'unable to log in: bad username or password')
+ return login_ok
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID')
+ if category_id not in ('1', '2', '21', '22', '23', '24', '25'):
+ raise ExtractorError('The URL does not contain audio.', expected=True)
+
+ str_duration, str_filesize = self._search_regex(
+ r'サイズ:</span>(.+?)/\(([0-9,]+?[KMG]?B))', webpage, 'duration and size',
+ group=(1, 2), default=(None, None))
+ str_viewcount = self._search_regex(r'閲覧数:</span>([0-9,]+)\s+', webpage, 'view count', fatal=False)
+
+ uploader_id, uploader = self._search_regex(
+ r'<a\s+class="cd_user-name"\s+href="/(.*)">([^<]+)さん<', webpage, 'uploader',
+ group=(1, 2), default=(None, None))
+ content_id = self._search_regex(r'contentId\:\'(.+)\'', webpage, 'content ID')
+ create_date = self._search_regex(r'createDate\:\'(.+)\'', webpage, 'timestamp')
+
+ player_webpage = self._download_webpage(
+ f'https://piapro.jp/html5_player_popup/?id={content_id}&cdate={create_date}',
+ video_id, note='Downloading player webpage')
+
+ return {
+ 'id': video_id,
+ 'title': self._html_search_regex(r'<h1\s+class="cd_works-title">(.+?)</h1>', webpage, 'title', fatal=False),
+ 'description': self._html_search_regex(r'<p\s+class="cd_dtl_cap">(.+?)</p>\s*<div', webpage, 'description', fatal=False),
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'timestamp': unified_timestamp(create_date, False),
+ 'duration': parse_duration(str_duration),
+ 'view_count': str_to_int(str_viewcount),
+ 'thumbnail': self._html_search_meta('twitter:image', webpage),
+
+ 'filesize_approx': parse_filesize(str_filesize.replace(',', '')),
+ 'url': self._search_regex(r'mp3:\s*\'(.*?)\'\}', player_webpage, 'url'),
+ 'ext': 'mp3',
+ 'vcodec': 'none',
+ }
diff --git a/yt_dlp/extractor/rtvs.py b/yt_dlp/extractor/rtvs.py
index 6573b260d..3ea0f1883 100644
--- a/yt_dlp/extractor/rtvs.py
+++ b/yt_dlp/extractor/rtvs.py
@@ -1,11 +1,19 @@
# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ traverse_obj,
+ unified_timestamp,
+)
+
class RTVSIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv/\d+/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)'
_TESTS = [{
# radio archive
'url': 'http://www.rtvs.sk/radio/archiv/11224/414872',
@@ -13,23 +21,37 @@ class RTVSIE(InfoExtractor):
'info_dict': {
'id': '414872',
'ext': 'mp3',
- 'title': 'Ostrov pokladov 1 časť.mp3'
- },
- 'params': {
- 'skip_download': True,
+ 'title': 'Ostrov pokladov 1 časť.mp3',
+ 'duration': 2854,
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg',
+ 'display_id': '135331',
}
}, {
# tv archive
'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118',
- 'md5': '85e2c55cf988403b70cac24f5c086dc6',
'info_dict': {
'id': '63118',
'ext': 'mp4',
'title': 'Amaro Džives - Náš deň',
- 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.'
- },
- 'params': {
- 'skip_download': True,
+ 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.',
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg',
+ 'timestamp': 1428555900,
+ 'upload_date': '20150409',
+ 'duration': 4986,
+ }
+ }, {
+ # tv archive
+ 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin',
+ 'info_dict': {
+ 'id': '18083',
+ 'ext': 'mp4',
+ 'title': 'Robin',
+ 'description': 'md5:2f70505a7b8364491003d65ff7a0940a',
+ 'timestamp': 1636652760,
+ 'display_id': '307655',
+ 'duration': 831,
+ 'upload_date': '20211111',
+ 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg',
}
}]
@@ -37,11 +59,31 @@ class RTVSIE(InfoExtractor):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
+ iframe_id = self._search_regex(
+ r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID')
+ iframe_url = self._search_regex(
+ fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL')
+
+ webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe')
+ json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL')
+ data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id)
- playlist_url = self._search_regex(
- r'playlist["\']?\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'playlist url', group='url')
+ if data.get('clip'):
+ data['playlist'] = [data['clip']]
- data = self._download_json(
- playlist_url, video_id, 'Downloading playlist')[0]
- return self._parse_jwplayer_data(data, video_id=video_id)
+ if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3':
+ formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}]
+ else:
+ formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': iframe_id,
+ 'title': traverse_obj(data, ('playlist', 0, 'title')),
+ 'description': traverse_obj(data, ('playlist', 0, 'description')),
+ 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))),
+ 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')),
+ 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))),
+ 'formats': formats
+ }
diff --git a/yt_dlp/extractor/twitcasting.py b/yt_dlp/extractor/twitcasting.py
index 98ef330cb..08222df95 100644
--- a/yt_dlp/extractor/twitcasting.py
+++ b/yt_dlp/extractor/twitcasting.py
@@ -221,6 +221,17 @@ class TwitCastingLiveIE(InfoExtractor):
r'tw-sound-flag-open-link" data-id="(\d+)" style=',),
webpage, 'current live ID', default=None)
if not current_live:
+ # fetch unfiltered /show to find running livestreams; we can't get ID of the password-protected livestream above
+ webpage = self._download_webpage(
+ f'https://twitcasting.tv/{uploader_id}/show/', uploader_id,
+ note='Downloading live history')
+ is_live = self._search_regex(r'(?s)(<span\s*class="tw-movie-thumbnail-badge"\s*data-status="live">\s*LIVE)', webpage, 'is live?', default=None)
+ if is_live:
+ # get the first live; running live is always at the first
+ current_live = self._search_regex(
+ r'(?s)<a\s+class="tw-movie-thumbnail"\s*href="/[^/]+/movie/(?P<video_id>\d+)"\s*>.+?</a>',
+ webpage, 'current live ID 2', default=None, group='video_id')
+ if not current_live:
raise ExtractorError('The user is not currently live')
return self.url_result('https://twitcasting.tv/%s/movie/%s' % (uploader_id, current_live))
diff --git a/yt_dlp/extractor/washingtonpost.py b/yt_dlp/extractor/washingtonpost.py
index 8afb1af83..9d6ae2870 100644
--- a/yt_dlp/extractor/washingtonpost.py
+++ b/yt_dlp/extractor/washingtonpost.py
@@ -5,6 +5,8 @@ import re
from .common import InfoExtractor
+from ..utils import traverse_obj
+
class WashingtonPostIE(InfoExtractor):
IE_NAME = 'washingtonpost'
@@ -50,7 +52,7 @@ class WashingtonPostArticleIE(InfoExtractor):
'title': 'Sinkhole of bureaucracy',
},
'playlist': [{
- 'md5': 'b9be794ceb56c7267d410a13f99d801a',
+ 'md5': '7ccf53ea8cbb77de5f570242b3b21a59',
'info_dict': {
'id': 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
@@ -59,9 +61,10 @@ class WashingtonPostArticleIE(InfoExtractor):
'description': 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.',
'timestamp': 1395440416,
'upload_date': '20140321',
+ 'thumbnail': r're:https://[^\.]+.cloudfront\.net/PAPERMINESplash\.jpg',
},
}, {
- 'md5': '1fff6a689d8770966df78c8cb6c8c17c',
+ 'md5': '7ccf53ea8cbb77de5f570242b3b21a59',
'info_dict': {
'id': '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f',
'ext': 'mp4',
@@ -70,6 +73,7 @@ class WashingtonPostArticleIE(InfoExtractor):
'duration': 2220,
'timestamp': 1395441819,
'upload_date': '20140321',
+ 'thumbnail': r're:https://[^\.]+.cloudfront\.net/BoyersSplash\.jpeg',
},
}],
}, {
@@ -88,7 +92,11 @@ class WashingtonPostArticleIE(InfoExtractor):
'timestamp': 1419972442,
'title': 'Why black boxes don’t transmit data in real time',
}
- }]
+ }],
+ 'skip': 'Doesnt have a video anymore',
+ }, {
+ 'url': 'https://www.washingtonpost.com/nation/2021/08/05/dixie-river-fire-california-climate/',
+ 'only_matching': True,
}]
@classmethod
@@ -106,6 +114,13 @@ class WashingtonPostArticleIE(InfoExtractor):
<div\s+class="posttv-video-embed[^>]*?data-uuid=|
data-video-uuid=
)"([^"]+)"''', webpage)
+
+ if not uuids:
+ json_data = self._search_nextjs_data(webpage, page_id)
+ for content_element in traverse_obj(json_data, ('props', 'pageProps', 'globalContent', 'content_elements')):
+ if content_element.get('type') == 'video':
+ uuids.append(content_element.get('_id'))
+
entries = [self.url_result('washingtonpost:%s' % uuid, 'WashingtonPost', uuid) for uuid in uuids]
return {
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index d5f9b6962..c03637f5f 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -225,28 +225,28 @@ INNERTUBE_CLIENTS = {
def build_innertube_clients():
- third_party = {
+ THIRD_PARTY = {
'embedUrl': 'https://google.com', # Can be any valid URL
}
- base_clients = ('android', 'web', 'ios', 'mweb')
- priority = qualities(base_clients[::-1])
+ BASE_CLIENTS = ('android', 'web', 'ios', 'mweb')
+ priority = qualities(BASE_CLIENTS[::-1])
for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8')
ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com')
ytcfg.setdefault('REQUIRE_JS_PLAYER', True)
ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en')
- ytcfg['priority'] = 10 * priority(client.split('_', 1)[0])
- if client in base_clients:
- INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
+ base_client, *variant = client.split('_')
+ ytcfg['priority'] = 10 * priority(base_client)
+
+ if variant == ['embedded']:
+ ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY
+ INNERTUBE_CLIENTS[f'{base_client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg)
agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED'
- agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
agegate_ytcfg['priority'] -= 1
- elif client.endswith('_embedded'):
- ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party
ytcfg['priority'] -= 2
- else:
+ elif variant:
ytcfg['priority'] -= 3
@@ -2413,7 +2413,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_n_function_name(self, jscode):
nfunc, idx = self._search_regex(
- r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
+ r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
return nfunc
@@ -2936,6 +2936,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
])
streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[])
+ approx_duration = max(traverse_obj(streaming_formats, (..., 'approxDurationMs'), expected_type=float_or_none) or [0]) or None
for fmt in streaming_formats:
if fmt.get('targetDurationSec') or fmt.get('drmFamilies'):
@@ -2995,12 +2996,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
itags[itag] = 'https'
stream_ids.append(stream_id)
- tbr = float_or_none(
- fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
+ tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000)
language_preference = (
10 if audio_track.get('audioIsDefault') and 10
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
+ # Some formats may have much smaller duration than others (possibly damaged during encoding)
+ # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+ is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) < approx_duration - 10000)
dct = {
'asr': int_or_none(fmt.get('audioSampleRate')),
'filesize': int_or_none(fmt.get('contentLength')),
@@ -3009,7 +3012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
- throttled and 'THROTTLED', delim=', '),
+ throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
'source_preference': -10 if throttled else -1,
'fps': int_or_none(fmt.get('fps')) or None,
'height': height,
@@ -3020,6 +3023,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'language': join_nonempty(audio_track.get('id', '').split('.')[0],
'desc' if language_preference < -1 else ''),
'language_preference': language_preference,
+ 'preference': -10 if is_damaged else None,
}
mime_mobj = re.match(
r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '')
diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
index f2467c542..d761c9303 100644
--- a/yt_dlp/postprocessor/common.py
+++ b/yt_dlp/postprocessor/common.py
@@ -103,12 +103,14 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
return getattr(self._downloader, '_copy_infodict', dict)(info_dict)
@staticmethod
- def _restrict_to(*, video=True, audio=True, images=True):
+ def _restrict_to(*, video=True, audio=True, images=True, simulated=True):
allowed = {'video': video, 'audio': audio, 'images': images}
def decorator(func):
@functools.wraps(func)
def wrapper(self, info):
+ if not simulated and (self.get_param('simulate') or self.get_param('skip_download')):
+ return [], info
format_type = (
'video' if info.get('vcodec') != 'none'
else 'audio' if info.get('acodec') != 'none'
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 42e9d12a7..d4495b4a2 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -384,12 +384,10 @@ class FFmpegPostProcessor(PostProcessor):
out_flags = list(self.stream_copy_opts(ext=determine_ext(out_file)))
- try:
- self.real_run_ffmpeg(
- [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
- [(out_file, out_flags)])
- finally:
- os.remove(concat_file)
+ self.real_run_ffmpeg(
+ [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])],
+ [(out_file, out_flags)])
+ os.remove(concat_file)
@classmethod
def _concat_spec(cls, in_files, concat_opts=None):
@@ -1147,16 +1145,15 @@ class FFmpegConcatPP(FFmpegPostProcessor):
super().concat_files(in_files, out_file)
return in_files
- @PostProcessor._restrict_to(images=False)
+ @PostProcessor._restrict_to(images=False, simulated=False)
def run(self, info):
entries = info.get('entries') or []
- if (self.get_param('skip_download') or not any(entries)
- or self._only_multi_video and info['_type'] != 'multi_video'):
+ if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'):
return [], info
elif any(len(entry) > 1 for entry in traverse_obj(entries, (..., 'requested_downloads')) or []):
raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats')
- in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath'))
+ in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or []
if len(in_files) < len(entries):
raise PostProcessingError('Aborting concatenation because some downloads failed')
diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py
index 435a144e2..22506bc21 100644
--- a/yt_dlp/postprocessor/modify_chapters.py
+++ b/yt_dlp/postprocessor/modify_chapters.py
@@ -68,9 +68,11 @@ class ModifyChaptersPP(FFmpegPostProcessor):
# Renaming should only happen after all files are processed
files_to_remove = []
for in_file, out_file in in_out_files:
+ mtime = os.stat(in_file).st_mtime
uncut_file = prepend_extension(in_file, 'uncut')
os.replace(in_file, uncut_file)
os.replace(out_file, in_file)
+ self.try_utime(in_file, mtime, mtime)
files_to_remove.append(uncut_file)
return files_to_remove, info