diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | yt_dlp/YoutubeDL.py | 95 | ||||
-rw-r--r-- | yt_dlp/__init__.py | 5 | ||||
-rw-r--r-- | yt_dlp/downloader/dash.py | 2 | ||||
-rw-r--r-- | yt_dlp/extractor/extractors.py | 8 | ||||
-rw-r--r-- | yt_dlp/extractor/rtve.py | 88 |
6 files changed, 160 insertions, 40 deletions
@@ -656,6 +656,8 @@ The simplest case is requesting a specific format, for example with `-f 22` you You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. +You can use `-f -` to interactively provide the format selector *for each video* + You can also use special names to select particular edge case formats: - `best`: Select the best quality format represented by a single file with video and audio. diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 165f2ecc3..d542d22e6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -623,7 +623,7 @@ class YoutubeDL(object): # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( - None if self.params.get('format') is None + self.params.get('format') if self.params.get('format') in (None, '-') else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) @@ -817,14 +817,15 @@ class YoutubeDL(object): if self.params.get('cookiefile') is not None: self.cookiejar.save(ignore_discard=True, ignore_expires=True) - def trouble(self, message=None, tb=None): + def trouble(self, message=None, tb=None, is_error=True): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or not when errors are found, after printing the message. - tb, if given, is additional traceback information. + @param tb If given, is additional traceback information + @param is_error Whether to raise error according to ignorerrors """ if message is not None: self.to_stderr(message) @@ -840,6 +841,8 @@ class YoutubeDL(object): tb = ''.join(tb_data) if tb: self.to_stderr(tb) + if not is_error: + return if not self.params.get('ignoreerrors'): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info @@ -899,12 +902,12 @@ class YoutubeDL(object): else: self.to_stderr(f'{self._format_err("DeprecationWarning:", self.Styles.ERROR)} {message}', True) - def report_error(self, message, tb=None): + def report_error(self, message, *args, **kwargs): ''' Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. ''' - self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', tb) + self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): '''Log debug message or Print message to stderr''' @@ -2447,20 +2450,21 @@ class YoutubeDL(object): # The pre-processors may have modified the formats formats = info_dict.get('formats', [info_dict]) + list_only = self.params.get('simulate') is None and ( + self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) - if self.params.get('listformats'): - if not info_dict.get('formats') and not info_dict.get('url'): - self.to_screen('%s has no formats' % info_dict['id']) - else: - self.list_formats(info_dict) if self.params.get('listsubtitles'): if 'automatic_captions' in info_dict: self.list_subtitles( info_dict['id'], automatic_captions, 'automatic captions') self.list_subtitles(info_dict['id'], subtitles, 'subtitles') - list_only = self.params.get('simulate') is None and ( - self.params.get('list_thumbnails') or self.params.get('listformats') or self.params.get('listsubtitles')) + if self.params.get('listformats') or interactive_format_selection: + if not info_dict.get('formats') and not info_dict.get('url'): + self.to_screen('%s has no formats' % info_dict['id']) + else: + self.list_formats(info_dict) if list_only: # Without this printing, -F --print-json will not work self.__forced_printings(info_dict, self.prepare_filename(info_dict), incomplete=True) @@ -2472,33 +2476,48 @@ class YoutubeDL(object): self.write_debug('Default format spec: %s' % req_format) format_selector = self.build_format_selector(req_format) - # While in format selection we may need to have an access to the original - # format set in order to calculate some metrics or do some processing. - # For now we need to be able to guess whether original formats provided - # by extractor are incomplete or not (i.e. whether extractor provides only - # video-only or audio-only formats) for proper formats selection for - # extractors with such incomplete formats (see - # https://github.com/ytdl-org/youtube-dl/pull/5556). - # Since formats may be filtered during format selection and may not match - # the original formats the results may be incorrect. Thus original formats - # or pre-calculated metrics should be passed to format selection routines - # as well. - # We will pass a context object containing all necessary additional data - # instead of just formats. - # This fixes incorrect format selection issue (see - # https://github.com/ytdl-org/youtube-dl/issues/10083). - incomplete_formats = ( - # All formats are video-only or - all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) - # all formats are audio-only - or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) - - ctx = { - 'formats': formats, - 'incomplete_formats': incomplete_formats, - } + while True: + if interactive_format_selection: + req_format = input( + self._format_screen('\nEnter format selector: ', self.Styles.EMPHASIS)) + try: + format_selector = self.build_format_selector(req_format) + except SyntaxError as err: + self.report_error(err, tb=False, is_error=False) + continue + + # While in format selection we may need to have an access to the original + # format set in order to calculate some metrics or do some processing. + # For now we need to be able to guess whether original formats provided + # by extractor are incomplete or not (i.e. whether extractor provides only + # video-only or audio-only formats) for proper formats selection for + # extractors with such incomplete formats (see + # https://github.com/ytdl-org/youtube-dl/pull/5556). + # Since formats may be filtered during format selection and may not match + # the original formats the results may be incorrect. Thus original formats + # or pre-calculated metrics should be passed to format selection routines + # as well. + # We will pass a context object containing all necessary additional data + # instead of just formats. + # This fixes incorrect format selection issue (see + # https://github.com/ytdl-org/youtube-dl/issues/10083). + incomplete_formats = ( + # All formats are video-only or + all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) + # all formats are audio-only + or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats)) + + ctx = { + 'formats': formats, + 'incomplete_formats': incomplete_formats, + } + + formats_to_download = list(format_selector(ctx)) + if interactive_format_selection and not formats_to_download: + self.report_error('Requested format is not available', tb=False, is_error=False) + continue + break - formats_to_download = list(format_selector(ctx)) if not formats_to_download: if not self.params.get('ignore_no_formats_error'): raise ExtractorError('Requested format is not available', expected=True, diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 2a13f61c5..198962aa5 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -133,6 +133,11 @@ def _real_main(argv=None): sys.exit(0) # Conflicting, missing and erroneous options + if opts.format == 'best': + warnings.append('.\n '.join( + '"-f best" selects the best pre-merged format which is often not the best option', + 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', + 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning')) if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error('using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: diff --git a/yt_dlp/downloader/dash.py b/yt_dlp/downloader/dash.py index 8dd43f4fa..4c23edd32 100644 --- a/yt_dlp/downloader/dash.py +++ b/yt_dlp/downloader/dash.py @@ -47,7 +47,7 @@ class DashSegmentsFD(FragmentFD): if real_downloader: self.to_screen( '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename())) - info_dict['fragments'] = fragments_to_download + info_dict['fragments'] = list(fragments_to_download) fd = real_downloader(self.ydl, self.params) return fd.real_download(filename, info_dict) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index ee5ea533f..0b359a253 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1260,7 +1260,13 @@ from .rtl2 import ( from .rtp import RTPIE from .rtrfm import RTRFMIE from .rts import RTSIE -from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE +from .rtve import ( + RTVEALaCartaIE, + RTVEAudioIE, + RTVELiveIE, + RTVEInfantilIE, + RTVETelevisionIE, +) from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .ruhd import RUHDIE diff --git a/yt_dlp/extractor/rtve.py b/yt_dlp/extractor/rtve.py index 0654fb08b..af1bb943d 100644 --- a/yt_dlp/extractor/rtve.py +++ b/yt_dlp/extractor/rtve.py @@ -18,6 +18,7 @@ from ..utils import ( remove_end, remove_start, std_headers, + try_get, ) _bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x)) @@ -178,6 +179,93 @@ class RTVEALaCartaIE(InfoExtractor): for s in subs) +class RTVEAudioIE(RTVEALaCartaIE): + IE_NAME = 'rtve.es:audio' + IE_DESC = 'RTVE audio' + _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' + + _TESTS = [{ + 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', + 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', + 'info_dict': { + 'id': '5889192', + 'ext': 'mp3', + 'title': 'Códigos informáticos', + 'thumbnail': r're:https?://.+/1598856591583.jpg', + 'duration': 349.440, + 'series': 'A hombros de gigantes', + }, + }, { + 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', + 'md5': '072855ab89a9450e0ba314c717fa5ebc', + 'info_dict': { + 'id': '5791165', + 'ext': 'mp3', + 'title': 'Ignatius Farray', + 'thumbnail': r're:https?://.+/1613243011863.jpg', + 'duration': 3559.559, + 'series': 'En Radio 3' + }, + }, { + 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', + 'md5': '0eadab248cc8dd193fa5765712e84d5c', + 'info_dict': { + 'id': '6082623', + 'ext': 'mp3', + 'title': 'Capítulo 26 y último: La muerte de Victor', + 'thumbnail': r're:https?://.+/1632147445707.jpg', + 'duration': 3174.086, + 'series': 'Frankenstein o el moderno Prometeo' + }, + }] + + def _extract_png_formats(self, audio_id): + """ + This function retrieves media related png thumbnail which obfuscate + valuable information about the media. This information is decrypted + via base class _decrypt_url function providing media quality and + media url + """ + png = self._download_webpage( + 'http://www.rtve.es/ztnr/movil/thumbnail/%s/audios/%s.png' % + (self._manager, audio_id), + audio_id, 'Downloading url information', query={'q': 'v2'}) + q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) + formats = [] + for quality, audio_url in self._decrypt_url(png): + ext = determine_ext(audio_url) + if ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + audio_url, audio_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + elif ext == 'mpd': + formats.extend(self._extract_mpd_formats( + audio_url, audio_id, 'dash', fatal=False)) + else: + formats.append({ + 'format_id': quality, + 'quality': q(quality), + 'url': audio_url, + }) + self._sort_formats(formats) + return formats + + def _real_extract(self, url): + audio_id = self._match_id(url) + info = self._download_json( + 'https://www.rtve.es/api/audios/%s.json' % audio_id, + audio_id)['page']['items'][0] + + return { + 'id': audio_id, + 'title': info['title'].strip(), + 'thumbnail': info.get('thumbnail'), + 'duration': float_or_none(info.get('duration'), 1000), + 'series': try_get(info, lambda x: x['programInfo']['title']), + 'formats': self._extract_png_formats(audio_id), + } + + class RTVEInfantilIE(RTVEALaCartaIE): IE_NAME = 'rtve.es:infantil' IE_DESC = 'RTVE infantil' |