diff options
Diffstat (limited to 'hypervideo_dl/postprocessor')
-rw-r--r-- | hypervideo_dl/postprocessor/__init__.py | 15 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/common.py | 64 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/embedthumbnail.py | 79 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/exec.py | 8 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/execafterdownload.py | 31 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/ffmpeg.py | 521 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/metadatafromtitle.py | 48 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/metadataparser.py | 28 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/modify_chapters.py | 37 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/movefilesafterdownload.py | 7 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/sponskrub.py | 25 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/sponsorblock.py | 29 | ||||
-rw-r--r-- | hypervideo_dl/postprocessor/xattrpp.py | 85 |
13 files changed, 433 insertions, 544 deletions
diff --git a/hypervideo_dl/postprocessor/__init__.py b/hypervideo_dl/postprocessor/__init__.py index e47631e..f168be4 100644 --- a/hypervideo_dl/postprocessor/__init__.py +++ b/hypervideo_dl/postprocessor/__init__.py @@ -1,27 +1,25 @@ # flake8: noqa: F401 -from ..utils import load_plugins - from .common import PostProcessor from .embedthumbnail import EmbedThumbnailPP -from .exec import ExecPP, ExecAfterDownloadPP +from .exec import ExecAfterDownloadPP, ExecPP from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegCopyStreamPP, FFmpegConcatPP, + FFmpegCopyStreamPP, FFmpegEmbedSubtitlePP, FFmpegExtractAudioPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, - FFmpegFixupStretchedPP, - FFmpegFixupTimestampPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, + FFmpegFixupStretchedPP, + FFmpegFixupTimestampPP, FFmpegMergerPP, FFmpegMetadataPP, + FFmpegPostProcessor, + FFmpegSplitChaptersPP, FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, - FFmpegSplitChaptersPP, FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, ) @@ -35,6 +33,7 @@ from .movefilesafterdownload import MoveFilesAfterDownloadPP from .sponskrub import SponSkrubPP from .sponsorblock import SponsorBlockPP from .xattrpp import XAttrMetadataPP +from ..utils import load_plugins _PLUGIN_CLASSES = load_plugins('postprocessor', 'PP', globals()) diff --git a/hypervideo_dl/postprocessor/common.py b/hypervideo_dl/postprocessor/common.py index 3899646..c3fca35 100644 --- a/hypervideo_dl/postprocessor/common.py +++ b/hypervideo_dl/postprocessor/common.py @@ -1,19 +1,16 @@ -from __future__ import unicode_literals - import functools -import itertools import json import os -import time import urllib.error from ..utils import ( + PostProcessingError, + RetryManager, _configuration_args, + deprecation_warning, encodeFilename, network_exceptions, - PostProcessingError, sanitized_Request, - write_string, ) @@ -47,9 +44,6 @@ class PostProcessor(metaclass=PostProcessorMetaClass): an initial argument and then with the returned value of the previous PostProcessor. - The chain will be stopped if one of them ever returns None or the end - of the chain is reached. - PostProcessor objects follow a "mutual registration" process similar to InfoExtractor objects. @@ -71,21 +65,26 @@ class PostProcessor(metaclass=PostProcessorMetaClass): return name[6:] if name[:6].lower() == 'ffmpeg' else name def to_screen(self, text, prefix=True, *args, **kwargs): - tag = '[%s] ' % self.PP_NAME if prefix else '' if self._downloader: - return self._downloader.to_screen('%s%s' % (tag, text), *args, **kwargs) + tag = '[%s] ' % self.PP_NAME if prefix else '' + return self._downloader.to_screen(f'{tag}{text}', *args, **kwargs) def report_warning(self, text, *args, **kwargs): if self._downloader: return self._downloader.report_warning(text, *args, **kwargs) - def deprecation_warning(self, text): + def deprecation_warning(self, msg): + warn = getattr(self._downloader, 'deprecation_warning', deprecation_warning) + return warn(msg, stacklevel=1) + + def deprecated_feature(self, msg): if self._downloader: - return self._downloader.deprecation_warning(text) - write_string(f'DeprecationWarning: {text}') + return self._downloader.deprecated_feature(msg) + return deprecation_warning(msg, stacklevel=1) def report_error(self, text, *args, **kwargs): - # Exists only for compatibility. Do not use + self.deprecation_warning('"hypervideo_dl.postprocessor.PostProcessor.report_error" is deprecated. ' + 'raise "hypervideo_dl.utils.PostProcessingError" instead') if self._downloader: return self._downloader.report_error(text, *args, **kwargs) @@ -93,6 +92,12 @@ class PostProcessor(metaclass=PostProcessorMetaClass): if self._downloader: return self._downloader.write_debug(text, *args, **kwargs) + def _delete_downloaded_files(self, *files_to_delete, **kwargs): + if self._downloader: + return self._downloader._delete_downloaded_files(*files_to_delete, **kwargs) + for filename in set(filter(None, files_to_delete)): + os.remove(filename) + def get_param(self, name, default=None, *args, **kwargs): if self._downloader: return self._downloader.params.get(name, default, *args, **kwargs) @@ -171,6 +176,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): def report_progress(self, s): s['_default_template'] = '%(postprocessor)s %(status)s' % s + if not self._downloader: + return progress_dict = s.copy() progress_dict.pop('info_dict') @@ -179,34 +186,31 @@ class PostProcessor(metaclass=PostProcessorMetaClass): progress_template = self.get_param('progress_template', {}) tmpl = progress_template.get('postprocess') if tmpl: - self._downloader.to_stdout(self._downloader.evaluate_outtmpl(tmpl, progress_dict)) + self._downloader.to_screen( + self._downloader.evaluate_outtmpl(tmpl, progress_dict), skip_eol=True, quiet=False) self._downloader.to_console_title(self._downloader.evaluate_outtmpl( progress_template.get('postprocess-title') or 'hypervideo %(progress._default_template)s', progress_dict)) - def _download_json(self, url, *, expected_http_errors=(404,)): + def _retry_download(self, err, count, retries): # While this is not an extractor, it behaves similar to one and - # so obey extractor_retries and sleep_interval_requests - max_retries = self.get_param('extractor_retries', 3) - sleep_interval = self.get_param('sleep_interval_requests') or 0 + # so obey extractor_retries and "--retry-sleep extractor" + RetryManager.report_retry(err, count, retries, info=self.to_screen, warn=self.report_warning, + sleep_func=self.get_param('retry_sleep_functions', {}).get('extractor')) + def _download_json(self, url, *, expected_http_errors=(404,)): self.write_debug(f'{self.PP_NAME} query: {url}') - for retries in itertools.count(): + for retry in RetryManager(self.get_param('extractor_retries', 3), self._retry_download): try: rsp = self._downloader.urlopen(sanitized_Request(url)) - return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) except network_exceptions as e: if isinstance(e, urllib.error.HTTPError) and e.code in expected_http_errors: return None - if retries < max_retries: - self.report_warning(f'{e}. Retrying...') - if sleep_interval > 0: - self.to_screen(f'Sleeping {sleep_interval} seconds ...') - time.sleep(sleep_interval) - continue - raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}') + retry.error = PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}') + continue + return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8')) -class AudioConversionError(PostProcessingError): +class AudioConversionError(PostProcessingError): # Deprecated pass diff --git a/hypervideo_dl/postprocessor/embedthumbnail.py b/hypervideo_dl/postprocessor/embedthumbnail.py index 815221d..7cd3952 100644 --- a/hypervideo_dl/postprocessor/embedthumbnail.py +++ b/hypervideo_dl/postprocessor/embedthumbnail.py @@ -1,37 +1,29 @@ -# coding: utf-8 -from __future__ import unicode_literals - import base64 -import imghdr import os -import subprocess import re - -try: - from mutagen.flac import Picture, FLAC - from mutagen.mp4 import MP4, MP4Cover - from mutagen.oggopus import OggOpus - from mutagen.oggvorbis import OggVorbis - has_mutagen = True -except ImportError: - has_mutagen = False +import subprocess from .common import PostProcessor -from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegThumbnailsConvertorPP, -) +from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP +from ..compat import imghdr +from ..dependencies import mutagen from ..utils import ( + Popen, + PostProcessingError, check_executable, encodeArgument, encodeFilename, error_to_compat_str, - Popen, - PostProcessingError, prepend_extension, shell_quote, ) +if mutagen: + from mutagen.flac import FLAC, Picture + from mutagen.mp4 import MP4, MP4Cover + from mutagen.oggopus import OggOpus + from mutagen.oggvorbis import OggVorbis + class EmbedThumbnailPPError(PostProcessingError): pass @@ -61,7 +53,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): return int(mobj.group('w')), int(mobj.group('h')) def _report_run(self, exe, filename): - self.to_screen('%s: Adding thumbnail to "%s"' % (exe, filename)) + self.to_screen(f'{exe}: Adding thumbnail to "{filename}"') @PostProcessor._restrict_to(images=False) def run(self, info): @@ -87,12 +79,10 @@ class EmbedThumbnailPP(FFmpegPostProcessor): original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath'] - # Convert unsupported thumbnail formats to PNG (see #25687, #25717) - # Original behavior was to convert to JPG, but since JPG is a lossy - # format, there will be some additional data loss. - # PNG, on the other hand, is lossless. + # Convert unsupported thumbnail formats (see #25687, #25717) + # PNG is preferred since JPEG is lossy thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:] - if thumbnail_ext not in ('jpg', 'jpeg', 'png'): + if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'): thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png') thumbnail_ext = 'png' @@ -101,8 +91,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): success = True if info['ext'] == 'mp3': options = [ - '-c', 'copy', '-map', '0:0', '-map', '1:0', '-id3v2_version', '3', - '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (front)"'] + '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3', + '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)'] self._report_run('ffmpeg', filename) self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) @@ -110,7 +100,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['mkv', 'mka']: options = list(self.stream_copy_opts()) - mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg') + mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}' old_stream, new_stream = self.get_stream_number( filename, ('tags', 'mimetype'), mimetype) if old_stream is not None: @@ -127,7 +117,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): elif info['ext'] in ['m4a', 'mp4', 'mov']: prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', []) # Method 1: Use mutagen - if not has_mutagen or prefer_atomicparsley: + if not mutagen or prefer_atomicparsley: success = False else: try: @@ -149,7 +139,8 @@ class EmbedThumbnailPP(FFmpegPostProcessor): if not success: success = True atomicparsley = next(( - x for x in ['AtomicParsley', 'atomicparsley'] + # libatomicparsley.so : See https://github.com/xibr/ytdlp-lazy/issues/1 + x for x in ['AtomicParsley', 'atomicparsley', 'libatomicparsley.so'] if check_executable(x, ['-v'])), None) if atomicparsley is None: self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg') @@ -167,14 +158,12 @@ class EmbedThumbnailPP(FFmpegPostProcessor): self._report_run('atomicparsley', filename) self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd)) - p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout, stderr = p.communicate_or_kill() - if p.returncode != 0: - msg = stderr.decode('utf-8', 'replace').strip() - self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {msg}') + stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if returncode: + self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}') # for formats that don't support thumbnails (like 3gp) AtomicParsley # won't create to the temporary file - if b'No changes' in stdout: + if 'No changes' in stdout: self.report_warning('The file format doesn\'t support embedding a thumbnail') success = False @@ -200,7 +189,7 @@ class EmbedThumbnailPP(FFmpegPostProcessor): raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}') elif info['ext'] in ['ogg', 'opus', 'flac']: - if not has_mutagen: + if not mutagen: raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python -m pip install mutagen`') self._report_run('mutagen', filename) @@ -230,11 +219,9 @@ class EmbedThumbnailPP(FFmpegPostProcessor): os.replace(temp_filename, filename) self.try_utime(filename, mtime, mtime) - - files_to_delete = [thumbnail_filename] - if self._already_have_thumbnail: - if original_thumbnail == thumbnail_filename: - files_to_delete = [] - elif original_thumbnail != thumbnail_filename: - files_to_delete.append(original_thumbnail) - return files_to_delete, info + converted = original_thumbnail != thumbnail_filename + self._delete_downloaded_files( + thumbnail_filename if converted or not self._already_have_thumbnail else None, + original_thumbnail if converted and not self._already_have_thumbnail else None, + info=info) + return [], info diff --git a/hypervideo_dl/postprocessor/exec.py b/hypervideo_dl/postprocessor/exec.py index c0bd6df..65fe6d4 100644 --- a/hypervideo_dl/postprocessor/exec.py +++ b/hypervideo_dl/postprocessor/exec.py @@ -1,14 +1,8 @@ -from __future__ import unicode_literals - import subprocess from .common import PostProcessor from ..compat import compat_shlex_quote -from ..utils import ( - encodeArgument, - PostProcessingError, - variadic, -) +from ..utils import PostProcessingError, encodeArgument, variadic class ExecPP(PostProcessor): diff --git a/hypervideo_dl/postprocessor/execafterdownload.py b/hypervideo_dl/postprocessor/execafterdownload.py deleted file mode 100644 index 64dabe7..0000000 --- a/hypervideo_dl/postprocessor/execafterdownload.py +++ /dev/null @@ -1,31 +0,0 @@ -from __future__ import unicode_literals - -import subprocess - -from .common import PostProcessor -from ..compat import compat_shlex_quote -from ..utils import ( - encodeArgument, - PostProcessingError, -) - - -class ExecAfterDownloadPP(PostProcessor): - def __init__(self, downloader, exec_cmd): - super(ExecAfterDownloadPP, self).__init__(downloader) - self.exec_cmd = exec_cmd - - def run(self, information): - cmd = self.exec_cmd - if '{}' not in cmd: - cmd += ' {}' - - cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) - - self._downloader.to_screen('[exec] Executing command: %s' % cmd) - retCode = subprocess.call(encodeArgument(cmd), shell=True) - if retCode != 0: - raise PostProcessingError( - 'Command returned error code %d' % retCode) - - return [], information diff --git a/hypervideo_dl/postprocessor/ffmpeg.py b/hypervideo_dl/postprocessor/ffmpeg.py index 3e6edcf..0471594 100644 --- a/hypervideo_dl/postprocessor/ffmpeg.py +++ b/hypervideo_dl/postprocessor/ffmpeg.py @@ -1,30 +1,30 @@ -from __future__ import unicode_literals - import collections -import io +import contextvars import itertools +import json import os +import re import subprocess import time -import re -import json -from .common import AudioConversionError, PostProcessor - -from ..compat import compat_str +from .common import PostProcessor +from ..compat import functools, imghdr from ..utils import ( + MEDIA_EXTENSIONS, + ISO639Utils, + Popen, + PostProcessingError, + _get_exe_version_output, + deprecation_warning, + detect_exe_version, determine_ext, dfxp2srt, encodeArgument, encodeFilename, + filter_dict, float_or_none, - _get_exe_version_output, - detect_exe_version, is_outdated_version, - ISO639Utils, orderedSet, - Popen, - PostProcessingError, prepend_extension, replace_extension, shell_quote, @@ -33,7 +33,6 @@ from ..utils import ( write_json_file, ) - EXT_TO_OUT_FORMATS = { 'aac': 'adts', 'flac': 'flac', @@ -48,36 +47,48 @@ EXT_TO_OUT_FORMATS = { 'vtt': 'webvtt', } ACODECS = { - 'mp3': 'libmp3lame', - 'aac': 'aac', - 'flac': 'flac', - 'm4a': 'aac', - 'opus': 'libopus', - 'vorbis': 'libvorbis', - 'wav': None, - 'alac': None, + # name: (ext, encoder, opts) + 'mp3': ('mp3', 'libmp3lame', ()), + 'aac': ('m4a', 'aac', ('-f', 'adts')), + 'm4a': ('m4a', 'aac', ('-bsf:a', 'aac_adtstoasc')), + 'opus': ('opus', 'libopus', ()), + 'vorbis': ('ogg', 'libvorbis', ()), + 'flac': ('flac', 'flac', ()), + 'alac': ('m4a', None, ('-acodec', 'alac')), + 'wav': ('wav', None, ('-f', 'wav')), } +def create_mapping_re(supported): + return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported))) + + +def resolve_mapping(source, mapping): + """ + Get corresponding item from a mapping string like 'A>B/C>D/E' + @returns (target, error_message) + """ + for pair in mapping.lower().split('/'): + kv = pair.split('>', 1) + if len(kv) == 1 or kv[0].strip() == source: + target = kv[-1].strip() + if target == source: + return target, f'already is in target format {source}' + return target, None + return None, f'could not find a mapping for {source}' + + class FFmpegPostProcessorError(PostProcessingError): pass class FFmpegPostProcessor(PostProcessor): + _ffmpeg_location = contextvars.ContextVar('ffmpeg_location', default=None) + def __init__(self, downloader=None): PostProcessor.__init__(self, downloader) - self._determine_executables() - - def check_version(self): - if not self.available: - raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location') - - required_version = '10-0' if self.basename == 'avconv' else '1.0' - if is_outdated_version( - self._versions[self.basename], required_version): - warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( - self.basename, self.basename, required_version) - self.report_warning(warning) + self._prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) + self._paths = self._determine_executables() @staticmethod def get_versions_and_features(downloader=None): @@ -88,87 +99,105 @@ class FFmpegPostProcessor(PostProcessor): def get_versions(downloader=None): return FFmpegPostProcessor.get_versions_and_features(downloader)[0] - _version_cache, _features_cache = {}, {} + _ffmpeg_to_avconv = {'ffmpeg': 'avconv', 'ffprobe': 'avprobe'} def _determine_executables(self): - programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] - - def get_ffmpeg_version(path, prog): - if path in self._version_cache: - self._versions[prog], self._features = self._version_cache[path], self._features_cache.get(path, {}) - return - out = _get_exe_version_output(path, ['-bsfs'], to_screen=self.write_debug) - ver = detect_exe_version(out) if out else False - if ver: - regexs = [ - r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] - r'n([0-9.]+)$', # Arch Linux - # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/ - ] - for regex in regexs: - mobj = re.match(regex, ver) - if mobj: - ver = mobj.group(1) - self._versions[prog] = self._version_cache[path] = ver - if prog != 'ffmpeg' or not out: - return + programs = [*self._ffmpeg_to_avconv.keys(), *self._ffmpeg_to_avconv.values()] - mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out) - lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None - self._features = self._features_cache[path] = { - 'fdk': '--enable-libfdk-aac' in out, - 'setts': 'setts' in out.splitlines(), - 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False), - } - - self.basename = None - self.probe_basename = None - self._paths = None - self._versions = None - self._features = {} - - prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) - location = self.get_param('ffmpeg_location') + location = self.get_param('ffmpeg_location', self._ffmpeg_location.get()) if location is None: - self._paths = {p: p for p in programs} + return {p: p for p in programs} + + if not os.path.exists(location): + self.report_warning( + f'ffmpeg-location {location} does not exist! Continuing without ffmpeg', only_once=True) + return {} + elif os.path.isdir(location): + dirname, basename, filename = location, None, None else: - if not os.path.exists(location): - self.report_warning( - 'ffmpeg-location %s does not exist! ' - 'Continuing without ffmpeg.' % (location)) - self._versions = {} - return - elif os.path.isdir(location): - dirname, basename = location, None - else: - basename = os.path.splitext(os.path.basename(location))[0] - basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') - dirname = os.path.dirname(os.path.abspath(location)) - if basename in ('ffmpeg', 'ffprobe'): - prefer_ffmpeg = True - - self._paths = dict( - (p, os.path.join(dirname, p)) for p in programs) - if basename: - self._paths[basename] = location - - self._versions = {} - executables = {'basename': ('ffmpeg', 'avconv'), 'probe_basename': ('ffprobe', 'avprobe')} - if prefer_ffmpeg is False: - executables = {k: v[::-1] for k, v in executables.items()} - for var, prefs in executables.items(): - for p in prefs: - get_ffmpeg_version(self._paths[p], p) - if self._versions[p]: - setattr(self, var, p) - break - - if self.basename == 'avconv': - self.deprecation_warning( - 'Support for avconv is deprecated and may be removed in a future version. Use ffmpeg instead') - if self.probe_basename == 'avprobe': - self.deprecation_warning( - 'Support for avprobe is deprecated and may be removed in a future version. Use ffprobe instead') + filename = os.path.basename(location) + basename = next((p for p in programs if p in filename), 'ffmpeg') + dirname = os.path.dirname(os.path.abspath(location)) + if basename in self._ffmpeg_to_avconv.keys(): + self._prefer_ffmpeg = True + + paths = {p: os.path.join(dirname, p) for p in programs} + if basename and basename in filename: + for p in programs: + path = os.path.join(dirname, filename.replace(basename, p)) + if os.path.exists(path): + paths[p] = path + if basename: + paths[basename] = location + return paths + + _version_cache, _features_cache = {None: None}, {} + + def _get_ffmpeg_version(self, prog): + path = self._paths.get(prog) + if path in self._version_cache: + return self._version_cache[path], self._features_cache.get(path, {}) + out = _get_exe_version_output(path, ['-bsfs']) + ver = detect_exe_version(out) if out else False + if ver: + regexs = [ + r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1] + r'n([0-9.]+)$', # Arch Linux + # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/ + ] + for regex in regexs: + mobj = re.match(regex, ver) + if mobj: + ver = mobj.group(1) + self._version_cache[path] = ver + if prog != 'ffmpeg' or not out: + return ver, {} + + mobj = re.search(r'(?m)^\s+libavformat\s+(?:[0-9. ]+)\s+/\s+(?P<runtime>[0-9. ]+)', out) + lavf_runtime_version = mobj.group('runtime').replace(' ', '') if mobj else None + self._features_cache[path] = features = { + 'fdk': '--enable-libfdk-aac' in out, + 'setts': 'setts' in out.splitlines(), + 'needs_adtstoasc': is_outdated_version(lavf_runtime_version, '57.56.100', False), + } + return ver, features + + @property + def _versions(self): + return filter_dict({self.basename: self._version, self.probe_basename: self._probe_version}) + + @functools.cached_property + def basename(self): + self._version # run property + return self.basename + + @functools.cached_property + def probe_basename(self): + self._probe_version # run property + return self.probe_basename + + def _get_version(self, kind): + executables = (kind, ) + if not self._prefer_ffmpeg: + executables = (kind, self._ffmpeg_to_avconv[kind]) + basename, version, features = next(filter( + lambda x: x[1], ((p, *self._get_ffmpeg_version(p)) for p in executables)), (None, None, {})) + if kind == 'ffmpeg': + self.basename, self._features = basename, features + else: + self.probe_basename = basename + if basename == self._ffmpeg_to_avconv[kind]: + self.deprecated_feature(f'Support for {self._ffmpeg_to_avconv[kind]} is deprecated and ' + f'may be removed in a future version. Use {kind} instead') + return version + + @functools.cached_property + def _version(self): + return self._get_version('ffmpeg') + + @functools.cached_property + def _probe_version(self): + return self._get_version('ffprobe') @property def available(self): @@ -176,7 +205,7 @@ class FFmpegPostProcessor(PostProcessor): @property def executable(self): - return self._paths[self.basename] + return self._paths.get(self.basename) @property def probe_available(self): @@ -184,7 +213,7 @@ class FFmpegPostProcessor(PostProcessor): @property def probe_executable(self): - return self._paths[self.probe_basename] + return self._paths.get(self.probe_basename) @staticmethod def stream_copy_opts(copy=True, *, ext=None): @@ -194,10 +223,18 @@ class FFmpegPostProcessor(PostProcessor): yield from ('-dn', '-ignore_unknown') if copy: yield from ('-c', 'copy') - # For some reason, '-c copy -map 0' is not enough to copy subtitles - if ext in ('mp4', 'mov'): + if ext in ('mp4', 'mov', 'm4a'): yield from ('-c:s', 'mov_text') + def check_version(self): + if not self.available: + raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location') + + required_version = '10-0' if self.basename == 'avconv' else '1.0' + if is_outdated_version(self._version, required_version): + self.report_warning(f'Your copy of {self.basename} is outdated, update {self.basename} ' + f'to version {required_version} or newer if you encounter any errors') + def get_audio_codec(self, path): if not self.probe_available and not self.available: raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location') @@ -211,15 +248,14 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self.executable, True), encodeArgument('-i')] cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) - self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd))) - handle = Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout_data, stderr_data = handle.communicate_or_kill() - expected_ret = 0 if self.probe_available else 1 - if handle.wait() != expected_ret: + self.write_debug(f'{self.basename} command line: {shell_quote(cmd)}') + stdout, stderr, returncode = Popen.run( + cmd, text=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + if returncode != (0 if self.probe_available else 1): return None - except (IOError, OSError): + except OSError: return None - output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore') + output = stdout if self.probe_available else stderr if self.probe_available: audio_codec = None for line in output.split('\n'): @@ -253,11 +289,10 @@ class FFmpegPostProcessor(PostProcessor): ] cmd += opts - cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) - self.write_debug('ffprobe command line: %s' % shell_quote(cmd)) - p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - stdout, stderr = p.communicate() - return json.loads(stdout.decode('utf-8', 'replace')) + cmd.append(self._ffmpeg_filename_argument(path)) + self.write_debug(f'ffprobe command line: {shell_quote(cmd)}') + stdout, _, _ = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + return json.loads(stdout) def get_stream_number(self, path, keys, value): streams = self.get_metadata_object(path)['streams'] @@ -277,12 +312,12 @@ class FFmpegPostProcessor(PostProcessor): if fatal: raise PostProcessingError(f'Unable to determine video duration: {e.msg}') - def _duration_mismatch(self, d1, d2): + def _duration_mismatch(self, d1, d2, tolerance=2): if not d1 or not d2: return None # The duration is often only known to nearest second. So there can be <1sec disparity natually. # Further excuse an additional <1sec difference. - return abs(d1 - d2) > 2 + return abs(d1 - d2) > tolerance def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs): return self.real_run_ffmpeg( @@ -319,16 +354,15 @@ class FFmpegPostProcessor(PostProcessor): for i, (path, opts) in enumerate(path_opts) if path) self.write_debug('ffmpeg command line: %s' % shell_quote(cmd)) - p = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - stdout, stderr = p.communicate_or_kill() - if p.returncode not in variadic(expected_retcodes): - stderr = stderr.decode('utf-8', 'replace').strip() + _, stderr, returncode = Popen.run( + cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + if returncode not in variadic(expected_retcodes): self.write_debug(stderr) - raise FFmpegPostProcessorError(stderr.split('\n')[-1]) + raise FFmpegPostProcessorError(stderr.strip().splitlines()[-1]) for out_path, _ in output_path_opts: if out_path: self.try_utime(out_path, oldest_mtime, oldest_mtime) - return stderr.decode('utf-8', 'replace') + return stderr def run_ffmpeg(self, path, out_path, opts, **kwargs): return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs) @@ -381,7 +415,7 @@ class FFmpegPostProcessor(PostProcessor): self.real_run_ffmpeg( [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], [(out_file, out_flags)]) - os.remove(concat_file) + self._delete_downloaded_files(concat_file) @classmethod def _concat_spec(cls, in_files, concat_opts=None): @@ -397,12 +431,13 @@ class FFmpegPostProcessor(PostProcessor): class FFmpegExtractAudioPP(FFmpegPostProcessor): - COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') - SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac') + COMMON_AUDIO_EXTS = MEDIA_EXTENSIONS.common_audio + ('wma', ) + SUPPORTED_EXTS = tuple(ACODECS.keys()) + FORMAT_RE = create_mapping_re(('best', *SUPPORTED_EXTS)) def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) - self._preferredcodec = preferredcodec or 'best' + self.mapping = preferredcodec or 'best' self._preferredquality = float_or_none(preferredquality) self._nopostoverwrites = nopostoverwrites @@ -437,71 +472,47 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): try: FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) except FFmpegPostProcessorError as err: - raise AudioConversionError(err.msg) + raise PostProcessingError(f'audio conversion failed: {err.msg}') @PostProcessor._restrict_to(images=False) def run(self, information): orig_path = path = information['filepath'] - orig_ext = information['ext'] - - if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS: - self.to_screen('Skipping audio extraction since the file is already in a common audio format') + target_format, _skip_msg = resolve_mapping(information['ext'], self.mapping) + if target_format == 'best' and information['ext'] in self.COMMON_AUDIO_EXTS: + target_format, _skip_msg = None, 'the file is already in a common audio format' + if not target_format: + self.to_screen(f'Not converting audio {orig_path}; {_skip_msg}') return [], information filecodec = self.get_audio_codec(path) if filecodec is None: raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe') - more_opts = [] - if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): - if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: - # Lossless, but in another container - acodec = 'copy' - extension = 'm4a' - more_opts = ['-bsf:a', 'aac_adtstoasc'] - elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']: - # Lossless if possible - acodec = 'copy' - extension = filecodec - if filecodec == 'aac': - more_opts = ['-f', 'adts'] - if filecodec == 'vorbis': - extension = 'ogg' - elif filecodec == 'alac': - acodec = None - extension = 'm4a' - more_opts += ['-acodec', 'alac'] - else: - # MP3 otherwise. - acodec = 'libmp3lame' - extension = 'mp3' - more_opts = self._quality_args(acodec) + if filecodec == 'aac' and target_format in ('m4a', 'best'): + # Lossless, but in another container + extension, _, more_opts, acodec = *ACODECS['m4a'], 'copy' + elif target_format == 'best' or target_format == filecodec: + # Lossless if possible + try: + extension, _, more_opts, acodec = *ACODECS[filecodec], 'copy' + except KeyError: + extension, acodec, more_opts = ACODECS['mp3'] else: # We convert the audio (lossy if codec is lossy) - acodec = ACODECS[self._preferredcodec] + extension, acodec, more_opts = ACODECS[target_format] if acodec == 'aac' and self._features.get('fdk'): - acodec = 'libfdk_aac' - extension = self._preferredcodec + acodec, more_opts = 'libfdk_aac', [] + + more_opts = list(more_opts) + if acodec != 'copy': more_opts = self._quality_args(acodec) - if self._preferredcodec == 'aac': - more_opts += ['-f', 'adts'] - elif self._preferredcodec == 'm4a': - more_opts += ['-bsf:a', 'aac_adtstoasc'] - elif self._preferredcodec == 'vorbis': - extension = 'ogg' - elif self._preferredcodec == 'wav': - extension = 'wav' - more_opts += ['-f', 'wav'] - elif self._preferredcodec == 'alac': - extension = 'm4a' - more_opts += ['-acodec', 'alac'] - - prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups - temp_path = new_path = prefix + sep + extension + + # not os.path.splitext, since the latter does not work on unicode in all setups + temp_path = new_path = f'{path.rpartition(".")[0]}.{extension}' if new_path == path: if acodec == 'copy': - self.to_screen(f'File is already in target format {self._preferredcodec}, skipping') + self.to_screen(f'Not converting audio {orig_path}; file is already in target format {target_format}') return [], information orig_path = prepend_extension(path, 'orig') temp_path = prepend_extension(path, 'temp') @@ -510,14 +521,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): self.to_screen('Post-process file %s exists, skipping' % new_path) return [], information - try: - self.to_screen(f'Destination: {new_path}') - self.run_ffmpeg(path, temp_path, acodec, more_opts) - except AudioConversionError as e: - raise PostProcessingError( - 'audio conversion failed: ' + e.msg) - except Exception: - raise PostProcessingError('error running ' + self.basename) + self.to_screen(f'Destination: {new_path}') + self.run_ffmpeg(path, temp_path, acodec, more_opts) os.replace(path, orig_path) os.replace(temp_path, new_path) @@ -527,26 +532,19 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # Try to update the date time for extracted audio file. if information.get('filetime') is not None: self.try_utime( - new_path, time.time(), information['filetime'], - errnote='Cannot update utime of audio file') + new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file') return [orig_path], information class FFmpegVideoConvertorPP(FFmpegPostProcessor): - SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mka', 'ogg', *FFmpegExtractAudioPP.SUPPORTED_EXTS) - FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS))) + SUPPORTED_EXTS = (*MEDIA_EXTENSIONS.common_video, *sorted(MEDIA_EXTENSIONS.common_audio + ('aac', 'vorbis'))) + FORMAT_RE = create_mapping_re(SUPPORTED_EXTS) _ACTION = 'converting' def __init__(self, downloader=None, preferedformat=None): - super(FFmpegVideoConvertorPP, self).__init__(downloader) - self._preferedformats = preferedformat.lower().split('/') - - def _target_ext(self, source_ext): - for pair in self._preferedformats: - kv = pair.split('>') - if len(kv) == 1 or kv[0].strip() == source_ext: - return kv[-1].strip() + super().__init__(downloader) + self.mapping = preferedformat @staticmethod def _options(target_ext): @@ -557,11 +555,7 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): filename, source_ext = info['filepath'], info['ext'].lower() - target_ext = self._target_ext(source_ext) - _skip_msg = ( - f'could not find a mapping for {source_ext}' if not target_ext - else f'already is in target format {source_ext}' if source_ext == target_ext - else None) + target_ext, _skip_msg = resolve_mapping(source_ext, self.mapping) if _skip_msg: self.to_screen(f'Not {self._ACTION} media file "{filename}"; {_skip_msg}') return [], info @@ -584,14 +578,16 @@ class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('mp4', 'mov', 'm4a', 'webm', 'mkv', 'mka') + def __init__(self, downloader=None, already_have_subtitle=False): - super(FFmpegEmbedSubtitlePP, self).__init__(downloader) + super().__init__(downloader) self._already_have_subtitle = already_have_subtitle @PostProcessor._restrict_to(images=False) def run(self, info): - if info['ext'] not in ('mp4', 'webm', 'mkv'): - self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files') + if info['ext'] not in self.SUPPORTED_EXTS: + self.to_screen(f'Subtitles can only be embedded in {", ".join(self.SUPPORTED_EXTS)} files') return [], info subtitles = info.get('requested_subtitles') if not subtitles: @@ -600,7 +596,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): filename = info['filepath'] - # Disabled temporarily. There needs to be a way to overide this + # Disabled temporarily. There needs to be a way to override this # in case of duration actually mismatching in extractor # See: https://github.com/hypervideo/hypervideo/issues/1870, https://github.com/hypervideo/hypervideo/issues/1385 ''' @@ -706,14 +702,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg_multiple_files( (filename, metadata_filename), temp_filename, itertools.chain(self._options(info['ext']), *options)) - for file in filter(None, files_to_delete): - os.remove(file) # Don't obey --keep-files + self._delete_downloaded_files(*files_to_delete) os.replace(temp_filename, filename) return [], info @staticmethod def _get_chapter_opts(chapters, metadata_filename): - with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + with open(metadata_filename, 'wt', encoding='utf-8') as f: def ffmpeg_escape(text): return re.sub(r'([\\=;#\n])', r'\\\1', text) @@ -737,13 +732,13 @@ class FFmpegMetadataPP(FFmpegPostProcessor): str(info[key]) for key in [f'{meta_prefix}_'] + list(variadic(info_list or meta_list)) if info.get(key) is not None), None) if value not in ('', None): + value = value.replace('\0', '') # nul character cannot be passed in command line metadata['common'].update({meta_f: value for meta_f in variadic(meta_list)}) - # See [1-4] for some info on media metadata/metadata supported - # by ffmpeg. - # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/ - # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata - # 3. https://kodi.wiki/view/Video_file_tagging + # Info on media metadata/metadata supported by ffmpeg: + # https://wiki.multimedia.cx/index.php/FFmpeg_Metadata + # https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/ + # https://kodi.wiki/view/Video_file_tagging add('title', ('track', 'title')) add('date', 'upload_date') @@ -767,7 +762,10 @@ class FFmpegMetadataPP(FFmpegPostProcessor): for key, value in info.items(): mobj = re.fullmatch(meta_regex, key) if value is not None and mobj: - metadata[mobj.group('i') or 'common'][mobj.group('key')] = value + metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '') + + # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags + yield ('-write_id3v1', '1') for name, value in metadata['common'].items(): yield ('-metadata', f'{name}={value}') @@ -801,11 +799,16 @@ class FFmpegMetadataPP(FFmpegPostProcessor): yield ('-map', '-0:%d' % old_stream) new_stream -= 1 - yield ('-attach', infofn, - '-metadata:s:%d' % new_stream, 'mimetype=application/json') + yield ( + '-attach', infofn, + f'-metadata:s:{new_stream}', 'mimetype=application/json', + f'-metadata:s:{new_stream}', 'filename=info.json', + ) class FFmpegMergerPP(FFmpegPostProcessor): + SUPPORTED_EXTS = MEDIA_EXTENSIONS.common_video + @PostProcessor._restrict_to(images=False) def run(self, info): filename = info['filepath'] @@ -895,7 +898,7 @@ class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): def __init__(self, downloader=None, trim=0.001): # "trim" should be used when the video contains unintended packets - super(FFmpegFixupTimestampPP, self).__init__(downloader) + super().__init__(downloader) assert isinstance(trim, (int, float)) self.trim = str(trim) @@ -930,10 +933,10 @@ class FFmpegFixupDuplicateMoovPP(FFmpegCopyStreamPP): class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): - SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc') + SUPPORTED_EXTS = MEDIA_EXTENSIONS.subtitles def __init__(self, downloader=None, format=None): - super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) + super().__init__(downloader) self.format = format def run(self, info): @@ -975,7 +978,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) - with io.open(srt_file, 'wt', encoding='utf-8') as f: + with open(srt_file, 'wt', encoding='utf-8') as f: f.write(srt_data) old_file = srt_file @@ -992,7 +995,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): self.run_ffmpeg(old_file, new_file, ['-f', new_format]) - with io.open(new_file, 'rt', encoding='utf-8') as f: + with open(new_file, encoding='utf-8') as f: subs[lang] = { 'ext': new_ext, 'data': f.read(), @@ -1029,8 +1032,8 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): self.to_screen('Chapter %03d; Destination: %s' % (number, destination)) return ( destination, - ['-ss', compat_str(chapter['start_time']), - '-t', compat_str(chapter['end_time'] - chapter['start_time'])]) + ['-ss', str(chapter['start_time']), + '-t', str(chapter['end_time'] - chapter['start_time'])]) @PostProcessor._restrict_to(images=False) def run(self, info): @@ -1047,29 +1050,28 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) self.real_run_ffmpeg([(in_file, opts)], [(destination, self.stream_copy_opts())]) if in_file != info['filepath']: - os.remove(in_file) + self._delete_downloaded_files(in_file, msg=None) return [], info class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): - SUPPORTED_EXTS = ('jpg', 'png', 'webp') + SUPPORTED_EXTS = MEDIA_EXTENSIONS.thumbnails + FORMAT_RE = create_mapping_re(SUPPORTED_EXTS) def __init__(self, downloader=None, format=None): - super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) - self.format = format + super().__init__(downloader) + self.mapping = format - @staticmethod - def is_webp(path): - with open(encodeFilename(path), 'rb') as f: - b = f.read(12) - return b[0:4] == b'RIFF' and b[8:] == b'WEBP' + @classmethod + def is_webp(cls, path): + deprecation_warning(f'{cls.__module__}.{cls.__name__}.is_webp is deprecated') + return imghdr.what(path) == 'webp' def fixup_webp(self, info, idx=-1): thumbnail_filename = info['thumbnails'][idx]['filepath'] _, thumbnail_ext = os.path.splitext(thumbnail_filename) if thumbnail_ext: - thumbnail_ext = thumbnail_ext[1:].lower() - if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename): + if thumbnail_ext.lower() != '.webp' and imghdr.what(thumbnail_filename) == 'webp': self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename) webp_filename = replace_extension(thumbnail_filename, 'webp') os.replace(thumbnail_filename, webp_filename) @@ -1079,17 +1081,18 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): @staticmethod def _options(target_ext): + yield from ('-update', '1') if target_ext == 'jpg': - return ['-bsf:v', 'mjpeg2jpeg'] - return [] + yield from ('-bsf:v', 'mjpeg2jpeg') def convert_thumbnail(self, thumbnail_filename, target_ext): thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext) - self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext)) + self.to_screen(f'Converting thumbnail "{thumbnail_filename}" to {target_ext}') + _, source_ext = os.path.splitext(thumbnail_filename) self.real_run_ffmpeg( - [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])], - [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))]) + [(thumbnail_filename, [] if source_ext == '.gif' else ['-f', 'image2', '-pattern_type', 'none'])], + [(thumbnail_conv_filename, self._options(target_ext))]) return thumbnail_conv_filename def run(self, info): @@ -1102,18 +1105,18 @@ class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): continue has_thumbnail = True self.fixup_webp(info, idx) - _, thumbnail_ext = os.path.splitext(original_thumbnail) - if thumbnail_ext: - thumbnail_ext = thumbnail_ext[1:].lower() + original_thumbnail = thumbnail_dict['filepath'] # Path can change during fixup + thumbnail_ext = os.path.splitext(original_thumbnail)[1][1:].lower() if thumbnail_ext == 'jpeg': thumbnail_ext = 'jpg' - if thumbnail_ext == self.format: - self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail) + target_ext, _skip_msg = resolve_mapping(thumbnail_ext, self.mapping) + if _skip_msg: + self.to_screen(f'Not converting thumbnail "{original_thumbnail}"; {_skip_msg}') continue - thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format) + thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, target_ext) files_to_delete.append(original_thumbnail) info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension( - info['__files_to_move'][original_thumbnail], self.format) + info['__files_to_move'][original_thumbnail], target_ext) if not has_thumbnail: self.to_screen('There aren\'t any thumbnails to convert') @@ -1153,16 +1156,16 @@ class FFmpegConcatPP(FFmpegPostProcessor): entries = info.get('entries') or [] if not any(entries) or (self._only_multi_video and info['_type'] != 'multi_video'): return [], info - elif traverse_obj(entries, (..., 'requested_downloads', lambda _, v: len(v) > 1)): + elif traverse_obj(entries, (..., lambda k, v: k == 'requested_downloads' and len(v) > 1)): raise PostProcessingError('Concatenation is not supported when downloading multiple separate formats') in_files = traverse_obj(entries, (..., 'requested_downloads', 0, 'filepath')) or [] if len(in_files) < len(entries): raise PostProcessingError('Aborting concatenation because some downloads failed') - ie_copy = self._downloader._playlist_infodict(info) exts = traverse_obj(entries, (..., 'requested_downloads', 0, 'ext'), (..., 'ext')) - ie_copy['ext'] = exts[0] if len(set(exts)) == 1 else 'mkv' + ie_copy = collections.ChainMap({'ext': exts[0] if len(set(exts)) == 1 else 'mkv'}, + info, self._downloader._playlist_infodict(info)) out_file = self._downloader.prepare_filename(ie_copy, 'pl_video') files_to_delete = self.concat_files(in_files, out_file) diff --git a/hypervideo_dl/postprocessor/metadatafromtitle.py b/hypervideo_dl/postprocessor/metadatafromtitle.py deleted file mode 100644 index f5c14d9..0000000 --- a/hypervideo_dl/postprocessor/metadatafromtitle.py +++ /dev/null @@ -1,48 +0,0 @@ -from __future__ import unicode_literals - -import re - -from .common import PostProcessor - - -class MetadataFromTitlePP(PostProcessor): - def __init__(self, downloader, titleformat): - super(MetadataFromTitlePP, self).__init__(downloader) - self._titleformat = titleformat - self._titleregex = (self.format_to_regex(titleformat) - if re.search(r'%\(\w+\)s', titleformat) - else titleformat) - - def format_to_regex(self, fmt): - r""" - Converts a string like - '%(title)s - %(artist)s' - to a regex like - '(?P<title>.+)\ \-\ (?P<artist>.+)' - """ - lastpos = 0 - regex = '' - # replace %(..)s with regex group and escape other string parts - for match in re.finditer(r'%\((\w+)\)s', fmt): - regex += re.escape(fmt[lastpos:match.start()]) - regex += r'(?P<' + match.group(1) + '>.+)' - lastpos = match.end() - if lastpos < len(fmt): - regex += re.escape(fmt[lastpos:]) - return regex - - def run(self, info): - title = info['title'] - match = re.match(self._titleregex, title) - if match is None: - self._downloader.to_screen( - '[fromtitle] Could not interpret title of video as "%s"' - % self._titleformat) - return [], info - for attribute, value in match.groupdict().items(): - info[attribute] = value - self._downloader.to_screen( - '[fromtitle] parsed %s: %s' - % (attribute, value if value is not None else 'NA')) - - return [], info diff --git a/hypervideo_dl/postprocessor/metadataparser.py b/hypervideo_dl/postprocessor/metadataparser.py index 01ee6c1..381182b 100644 --- a/hypervideo_dl/postprocessor/metadataparser.py +++ b/hypervideo_dl/postprocessor/metadataparser.py @@ -1,31 +1,27 @@ import re -from enum import Enum from .common import PostProcessor +from ..utils import Namespace, filter_dict class MetadataParserPP(PostProcessor): - class Actions(Enum): - INTERPRET = 'interpretter' - REPLACE = 'replacer' - def __init__(self, downloader, actions): - PostProcessor.__init__(self, downloader) + super().__init__(downloader) self._actions = [] for f in actions: - action = f[0] - assert isinstance(action, self.Actions) - self._actions.append(getattr(self, action.value)(*f[1:])) + action, *args = f + assert action in self.Actions + self._actions.append(action(self, *args)) @classmethod def validate_action(cls, action, *data): - ''' Each action can be: + """Each action can be: (Actions.INTERPRET, from, to) OR (Actions.REPLACE, field, search, replace) - ''' - if not isinstance(action, cls.Actions): + """ + if action not in cls.Actions: raise ValueError(f'{action!r} is not a valid action') - getattr(cls, action.value)(cls, *data) # So this can raise error to validate + action(cls, *data) # So this can raise error to validate @staticmethod def field_to_template(tmpl): @@ -72,9 +68,9 @@ class MetadataParserPP(PostProcessor): if match is None: self.to_screen(f'Could not interpret {inp!r} as {out!r}') return - for attribute, value in match.groupdict().items(): + for attribute, value in filter_dict(match.groupdict()).items(): info[attribute] = value - self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA')) + self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}') template = self.field_to_template(inp) out_re = re.compile(self.format_to_regex(out)) @@ -99,6 +95,8 @@ class MetadataParserPP(PostProcessor): search_re = re.compile(search) return f + Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer) + class MetadataFromFieldPP(MetadataParserPP): @classmethod diff --git a/hypervideo_dl/postprocessor/modify_chapters.py b/hypervideo_dl/postprocessor/modify_chapters.py index 22506bc..a745b45 100644 --- a/hypervideo_dl/postprocessor/modify_chapters.py +++ b/hypervideo_dl/postprocessor/modify_chapters.py @@ -3,17 +3,9 @@ import heapq import os from .common import PostProcessor -from .ffmpeg import ( - FFmpegPostProcessor, - FFmpegSubtitlesConvertorPP -) +from .ffmpeg import FFmpegPostProcessor, FFmpegSubtitlesConvertorPP from .sponsorblock import SponsorBlockPP -from ..utils import ( - orderedSet, - PostProcessingError, - prepend_extension, -) - +from ..utils import PostProcessingError, orderedSet, prepend_extension _TINY_CHAPTER_DURATION = 1 DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' @@ -24,7 +16,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): *, sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): FFmpegPostProcessor.__init__(self, downloader) self._remove_chapters_patterns = set(remove_chapters_patterns or []) - self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.POI_CATEGORIES.keys()) + self._remove_sponsor_segments = set(remove_sponsor_segments or []) - set(SponsorBlockPP.NON_SKIPPABLE_CATEGORIES.keys()) self._ranges_to_remove = set(remove_ranges or []) self._sponsorblock_chapter_title = sponsorblock_chapter_title self._force_keyframes = force_keyframes @@ -40,14 +32,18 @@ class ModifyChaptersPP(FFmpegPostProcessor): real_duration = self._get_real_video_duration(info['filepath']) if not chapters: - chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}] + chapters = [{'start_time': 0, 'end_time': info.get('duration') or real_duration, 'title': info['title']}] info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters) if not cuts: return [], info + elif not info['chapters']: + self.report_warning('You have requested to remove the entire video, which is not possible') + return [], info - if self._duration_mismatch(real_duration, info.get('duration')): - if not self._duration_mismatch(real_duration, info['chapters'][-1]['end_time']): + original_duration, info['duration'] = info.get('duration'), info['chapters'][-1]['end_time'] + if self._duration_mismatch(real_duration, original_duration, 1): + if not self._duration_mismatch(real_duration, info['duration']): self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') return [], info if not info.get('__real_download'): @@ -106,7 +102,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): 'start_time': start, 'end_time': end, 'category': 'manually_removed', - '_categories': [('manually_removed', start, end)], + '_categories': [('manually_removed', start, end, 'Manually removed')], 'remove': True, } for start, end in self._ranges_to_remove) @@ -297,13 +293,12 @@ class ModifyChaptersPP(FFmpegPostProcessor): c.pop('_was_cut', None) cats = c.pop('_categories', None) if cats: - category = min(cats, key=lambda c: c[2] - c[1])[0] - cats = orderedSet(x[0] for x in cats) + category, _, _, category_name = min(cats, key=lambda c: c[2] - c[1]) c.update({ 'category': category, - 'categories': cats, - 'name': SponsorBlockPP.CATEGORIES[category], - 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats] + 'categories': orderedSet(x[0] for x in cats), + 'name': category_name, + 'category_names': orderedSet(x[3] for x in cats), }) c['title'] = self._downloader.evaluate_outtmpl(self._sponsorblock_chapter_title, c.copy()) # Merge identically named sponsors. @@ -322,7 +317,7 @@ class ModifyChaptersPP(FFmpegPostProcessor): self.to_screen(f'Removing chapters from {filename}') self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) if in_file != filename: - os.remove(in_file) + self._delete_downloaded_files(in_file, msg=None) return out_file @staticmethod diff --git a/hypervideo_dl/postprocessor/movefilesafterdownload.py b/hypervideo_dl/postprocessor/movefilesafterdownload.py index 1064a8c..23b0924 100644 --- a/hypervideo_dl/postprocessor/movefilesafterdownload.py +++ b/hypervideo_dl/postprocessor/movefilesafterdownload.py @@ -1,13 +1,12 @@ -from __future__ import unicode_literals import os -import shutil from .common import PostProcessor +from ..compat import shutil from ..utils import ( + PostProcessingError, decodeFilename, encodeFilename, make_dir, - PostProcessingError, ) @@ -47,7 +46,7 @@ class MoveFilesAfterDownloadPP(PostProcessor): % (oldfile, newfile)) continue make_dir(newfile, PostProcessingError) - self.to_screen('Moving file "%s" to "%s"' % (oldfile, newfile)) + self.to_screen(f'Moving file "{oldfile}" to "{newfile}"') shutil.move(oldfile, newfile) # os.rename cannot move between volumes info['filepath'] = finalpath diff --git a/hypervideo_dl/postprocessor/sponskrub.py b/hypervideo_dl/postprocessor/sponskrub.py index 400cbcc..4ba2520 100644 --- a/hypervideo_dl/postprocessor/sponskrub.py +++ b/hypervideo_dl/postprocessor/sponskrub.py @@ -1,19 +1,18 @@ -from __future__ import unicode_literals import os +import shlex import subprocess from .common import PostProcessor -from ..compat import compat_shlex_split from ..utils import ( + Popen, + PostProcessingError, check_executable, cli_option, encodeArgument, encodeFilename, + prepend_extension, shell_quote, str_or_none, - Popen, - PostProcessingError, - prepend_extension, ) @@ -79,23 +78,21 @@ class SponSkrubPP(PostProcessor): if not self.cutout: cmd += ['-chapter'] cmd += cli_option(self._downloader.params, '-proxy', 'proxy') - cmd += compat_shlex_split(self.args) # For backward compatibility + cmd += shlex.split(self.args) # For backward compatibility cmd += self._configuration_args(self._exe_name, use_compat=False) cmd += ['--', information['id'], filename, temp_filename] cmd = [encodeArgument(i) for i in cmd] self.write_debug('sponskrub command line: %s' % shell_quote(cmd)) - pipe = None if self.get_param('verbose') else subprocess.PIPE - p = Popen(cmd, stdout=pipe) - stdout = p.communicate_or_kill()[0] + stdout, _, returncode = Popen.run(cmd, text=True, stdout=None if self.get_param('verbose') else subprocess.PIPE) - if p.returncode == 0: + if not returncode: os.replace(temp_filename, filename) self.to_screen('Sponsor sections have been %s' % ('removed' if self.cutout else 'marked')) - elif p.returncode == 3: + elif returncode == 3: self.to_screen('No segments in the SponsorBlock database') else: - msg = stdout.decode('utf-8', 'replace').strip() if stdout else '' - msg = msg.split('\n')[0 if msg.lower().startswith('unrecognised') else -1] - raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s' % p.returncode) + raise PostProcessingError( + stdout.strip().splitlines()[0 if stdout.strip().lower().startswith('unrecognised') else -1] + or f'sponskrub failed with error code {returncode}') return [], information diff --git a/hypervideo_dl/postprocessor/sponsorblock.py b/hypervideo_dl/postprocessor/sponsorblock.py index 7943014..6ba87cd 100644 --- a/hypervideo_dl/postprocessor/sponsorblock.py +++ b/hypervideo_dl/postprocessor/sponsorblock.py @@ -1,9 +1,9 @@ -from hashlib import sha256 +import hashlib import json import re +import urllib.parse from .ffmpeg import FFmpegPostProcessor -from ..compat import compat_urllib_parse_urlencode class SponsorBlockPP(FFmpegPostProcessor): @@ -14,6 +14,10 @@ class SponsorBlockPP(FFmpegPostProcessor): POI_CATEGORIES = { 'poi_highlight': 'Highlight', } + NON_SKIPPABLE_CATEGORIES = { + **POI_CATEGORIES, + 'chapter': 'Chapter', + } CATEGORIES = { 'sponsor': 'Sponsor', 'intro': 'Intermission/Intro Animation', @@ -23,7 +27,7 @@ class SponsorBlockPP(FFmpegPostProcessor): 'filler': 'Filler Tangent', 'interaction': 'Interaction Reminder', 'music_offtopic': 'Non-Music Section', - **POI_CATEGORIES, + **NON_SKIPPABLE_CATEGORIES } def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): @@ -38,7 +42,7 @@ class SponsorBlockPP(FFmpegPostProcessor): return [], info self.to_screen('Fetching SponsorBlock segments') - info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) + info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info.get('duration')) return [], info def _get_sponsor_chapters(self, info, duration): @@ -60,7 +64,8 @@ class SponsorBlockPP(FFmpegPostProcessor): if duration and duration - start_end[1] <= 1: start_end[1] = duration # SponsorBlock duration may be absent or it may deviate from the real one. - return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1 + diff = abs(duration - s['videoDuration']) if s['videoDuration'] else 0 + return diff < 1 or (diff < 5 and diff / (start_end[1] - start_end[0]) < 0.05) duration_match = [s for s in segments if duration_filter(s)] if len(duration_match) != len(segments): @@ -68,28 +73,30 @@ class SponsorBlockPP(FFmpegPostProcessor): def to_chapter(s): (start, end), cat = s['segment'], s['category'] + title = s['description'] if cat == 'chapter' else self.CATEGORIES[cat] return { 'start_time': start, 'end_time': end, 'category': cat, - 'title': self.CATEGORIES[cat], - '_categories': [(cat, start, end)] + 'title': title, + 'type': s['actionType'], + '_categories': [(cat, start, end, title)], } sponsor_chapters = [to_chapter(s) for s in duration_match] if not sponsor_chapters: - self.to_screen('No segments were found in the SponsorBlock database') + self.to_screen('No matching segments were found in the SponsorBlock database') else: self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database') return sponsor_chapters def _get_sponsor_segments(self, video_id, service): - hash = sha256(video_id.encode('ascii')).hexdigest() + hash = hashlib.sha256(video_id.encode('ascii')).hexdigest() # SponsorBlock API recommends using first 4 hash characters. - url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ + url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + urllib.parse.urlencode({ 'service': service, 'categories': json.dumps(self._categories), - 'actionTypes': json.dumps(['skip', 'poi']) + 'actionTypes': json.dumps(['skip', 'poi', 'chapter']) }) for d in self._download_json(url) or []: if d['videoID'] == video_id: diff --git a/hypervideo_dl/postprocessor/xattrpp.py b/hypervideo_dl/postprocessor/xattrpp.py index 93acd6d..f822eff 100644 --- a/hypervideo_dl/postprocessor/xattrpp.py +++ b/hypervideo_dl/postprocessor/xattrpp.py @@ -1,78 +1,63 @@ -from __future__ import unicode_literals +import os from .common import PostProcessor from ..compat import compat_os_name from ..utils import ( - hyphenate_date, - write_xattr, PostProcessingError, XAttrMetadataError, XAttrUnavailableError, + hyphenate_date, + write_xattr, ) class XAttrMetadataPP(PostProcessor): - # - # More info about extended attributes for media: - # http://freedesktop.org/wiki/CommonExtendedAttributes/ - # http://www.freedesktop.org/wiki/PhreedomDraft/ - # http://dublincore.org/documents/usageguide/elements.shtml - # - # TODO: - # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) - # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' - # + """Set extended attributes on downloaded file (if xattr support is found) + + More info about extended attributes for media: + http://freedesktop.org/wiki/CommonExtendedAttributes/ + http://www.freedesktop.org/wiki/PhreedomDraft/ + http://dublincore.org/documents/usageguide/elements.shtml + + TODO: + * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + """ + + XATTR_MAPPING = { + 'user.xdg.referrer.url': 'webpage_url', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } def run(self, info): - """ Set extended attributes on downloaded file (if xattr support is found). """ - - # Write the metadata to the file's xattrs + mtime = os.stat(info['filepath']).st_mtime self.to_screen('Writing metadata to file\'s xattrs') - - filename = info['filepath'] - try: - xattr_mapping = { - 'user.xdg.referrer.url': 'webpage_url', - # 'user.xdg.comment': 'description', - 'user.dublincore.title': 'title', - 'user.dublincore.date': 'upload_date', - 'user.dublincore.description': 'description', - 'user.dublincore.contributor': 'uploader', - 'user.dublincore.format': 'format', - } - - num_written = 0 - for xattrname, infoname in xattr_mapping.items(): - + for xattrname, infoname in self.XATTR_MAPPING.items(): value = info.get(infoname) - if value: if infoname == 'upload_date': value = hyphenate_date(value) - - byte_value = value.encode('utf-8') - write_xattr(filename, xattrname, byte_value) - num_written += 1 - - return [], info + write_xattr(info['filepath'], xattrname, value.encode()) except XAttrUnavailableError as e: raise PostProcessingError(str(e)) - except XAttrMetadataError as e: if e.reason == 'NO_SPACE': self.report_warning( 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' - + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) + 'Some extended attributes are not written') elif e.reason == 'VALUE_TOO_LONG': - self.report_warning( - 'Unable to write extended attributes due to too long values.') + self.report_warning('Unable to write extended attributes due to too long values.') else: - msg = 'This filesystem doesn\'t support extended attributes. ' - if compat_os_name == 'nt': - msg += 'You need to use NTFS.' - else: - msg += '(You may have to enable them in your /etc/fstab)' - raise PostProcessingError(str(e)) - return [], info + tip = ('You need to use NTFS' if compat_os_name == 'nt' + else 'You may have to enable them in your "/etc/fstab"') + raise PostProcessingError(f'This filesystem doesn\'t support extended attributes. {tip}') + + self.try_utime(info['filepath'], mtime, mtime) + return [], info |