diff options
Diffstat (limited to 'hypervideo_dl/postprocessor/ffmpeg.py')
-rw-r--r-- | hypervideo_dl/postprocessor/ffmpeg.py | 732 |
1 files changed, 546 insertions, 186 deletions
diff --git a/hypervideo_dl/postprocessor/ffmpeg.py b/hypervideo_dl/postprocessor/ffmpeg.py index 3078329..a6d6d78 100644 --- a/hypervideo_dl/postprocessor/ffmpeg.py +++ b/hypervideo_dl/postprocessor/ffmpeg.py @@ -1,26 +1,32 @@ from __future__ import unicode_literals import io +import itertools import os import subprocess import time import re - +import json from .common import AudioConversionError, PostProcessor +from ..compat import compat_str from ..utils import ( + dfxp2srt, encodeArgument, encodeFilename, + float_or_none, get_exe_version, is_outdated_version, + ISO639Utils, + orderedSet, PostProcessingError, prepend_extension, - shell_quote, - subtitles_filename, - dfxp2srt, - ISO639Utils, + process_communicate_or_kill, replace_extension, + shell_quote, + traverse_obj, + variadic, ) @@ -58,15 +64,14 @@ class FFmpegPostProcessor(PostProcessor): def check_version(self): if not self.available: - raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') + raise FFmpegPostProcessorError('ffmpeg not found. Please install or provide the path using --ffmpeg-location') required_version = '10-0' if self.basename == 'avconv' else '1.0' if is_outdated_version( self._versions[self.basename], required_version): warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( self.basename, self.basename, required_version) - if self._downloader: - self._downloader.report_warning(warning) + self.report_warning(warning) @staticmethod def get_versions(downloader=None): @@ -96,30 +101,28 @@ class FFmpegPostProcessor(PostProcessor): self._paths = None self._versions = None if self._downloader: - prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True) - location = self._downloader.params.get('ffmpeg_location') + prefer_ffmpeg = self.get_param('prefer_ffmpeg', True) + location = self.get_param('ffmpeg_location') if location is not None: if not os.path.exists(location): - self._downloader.report_warning( + self.report_warning( 'ffmpeg-location %s does not exist! ' - 'Continuing without avconv/ffmpeg.' % (location)) + 'Continuing without ffmpeg.' % (location)) self._versions = {} return - elif not os.path.isdir(location): + elif os.path.isdir(location): + dirname, basename = location, None + else: basename = os.path.splitext(os.path.basename(location))[0] - if basename not in programs: - self._downloader.report_warning( - 'Cannot identify executable %s, its basename should be one of %s. ' - 'Continuing without avconv/ffmpeg.' % - (location, ', '.join(programs))) - self._versions = {} - return None - location = os.path.dirname(os.path.abspath(location)) + basename = next((p for p in programs if basename.startswith(p)), 'ffmpeg') + dirname = os.path.dirname(os.path.abspath(location)) if basename in ('ffmpeg', 'ffprobe'): prefer_ffmpeg = True self._paths = dict( - (p, os.path.join(location, p)) for p in programs) + (p, os.path.join(dirname, p)) for p in programs) + if basename: + self._paths[basename] = location self._versions = dict( (p, get_ffmpeg_version(self._paths[p])) for p in programs) if self._versions is None: @@ -163,7 +166,7 @@ class FFmpegPostProcessor(PostProcessor): def get_audio_codec(self, path): if not self.probe_available and not self.available: - raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.') + raise PostProcessingError('ffprobe and ffmpeg not found. Please install or provide the path using --ffmpeg-location') try: if self.probe_available: cmd = [ @@ -174,13 +177,11 @@ class FFmpegPostProcessor(PostProcessor): encodeFilename(self.executable, True), encodeArgument('-i')] cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) - if self._downloader.params.get('verbose', False): - self._downloader.to_screen( - '[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + self.write_debug('%s command line: %s' % (self.basename, shell_quote(cmd))) handle = subprocess.Popen( cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, stdin=subprocess.PIPE) - stdout_data, stderr_data = handle.communicate() + stdout_data, stderr_data = process_communicate_or_kill(handle) expected_ret = 0 if self.probe_available else 1 if handle.wait() != expected_ret: return None @@ -203,55 +204,174 @@ class FFmpegPostProcessor(PostProcessor): return mobj.group(1) return None - def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): + def get_metadata_object(self, path, opts=[]): + if self.probe_basename != 'ffprobe': + if self.probe_available: + self.report_warning('Only ffprobe is supported for metadata extraction') + raise PostProcessingError('ffprobe not found. Please install or provide the path using --ffmpeg-location') self.check_version() - oldest_mtime = min( - os.stat(encodeFilename(path)).st_mtime for path in input_paths) + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-hide_banner'), + encodeArgument('-show_format'), + encodeArgument('-show_streams'), + encodeArgument('-print_format'), + encodeArgument('json'), + ] + + cmd += opts + cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True)) + self.write_debug('ffprobe command line: %s' % shell_quote(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate() + return json.loads(stdout.decode('utf-8', 'replace')) + + def get_stream_number(self, path, keys, value): + streams = self.get_metadata_object(path)['streams'] + num = next( + (i for i, stream in enumerate(streams) if traverse_obj(stream, keys, casesense=False) == value), + None) + return num, len(streams) - opts += self._configuration_args() + def _get_real_video_duration(self, info, fatal=True): + try: + if '_real_duration' not in info: + info['_real_duration'] = float_or_none( + traverse_obj(self.get_metadata_object(info['filepath']), ('format', 'duration'))) + if not info['_real_duration']: + raise PostProcessingError('ffprobe returned empty duration') + except PostProcessingError as e: + if fatal: + raise PostProcessingError(f'Unable to determine video duration; {e}') + return info.setdefault('_real_duration', None) + + def _duration_mismatch(self, d1, d2): + if not d1 or not d2: + return None + return abs(d1 - d2) > 1 + + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts, **kwargs): + return self.real_run_ffmpeg( + [(path, []) for path in input_paths], + [(out_path, opts)], **kwargs) + + def real_run_ffmpeg(self, input_path_opts, output_path_opts, *, expected_retcodes=(0,)): + self.check_version() + + oldest_mtime = min( + os.stat(encodeFilename(path)).st_mtime for path, _ in input_path_opts if path) - files_cmd = [] - for path in input_paths: - files_cmd.extend([ - encodeArgument('-i'), - encodeFilename(self._ffmpeg_filename_argument(path), True) - ]) cmd = [encodeFilename(self.executable, True), encodeArgument('-y')] # avconv does not have repeat option if self.basename == 'ffmpeg': cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')] - cmd += (files_cmd - + [encodeArgument(o) for o in opts] - + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) + def make_args(file, args, name, number): + keys = ['_%s%d' % (name, number), '_%s' % name] + if name == 'o' and number == 1: + keys.append('') + args += self._configuration_args(self.basename, keys) + if name == 'i': + args.append('-i') + return ( + [encodeArgument(arg) for arg in args] + + [encodeFilename(self._ffmpeg_filename_argument(file), True)]) + + for arg_type, path_opts in (('i', input_path_opts), ('o', output_path_opts)): + cmd += itertools.chain.from_iterable( + make_args(path, list(opts), arg_type, i + 1) + for i, (path, opts) in enumerate(path_opts) if path) + + self.write_debug('ffmpeg command line: %s' % shell_quote(cmd)) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) - stdout, stderr = p.communicate() - if p.returncode != 0: - stderr = stderr.decode('utf-8', 'replace') - msg = stderr.strip().split('\n')[-1] - raise FFmpegPostProcessorError(msg) - self.try_utime(out_path, oldest_mtime, oldest_mtime) + stdout, stderr = process_communicate_or_kill(p) + if p.returncode not in variadic(expected_retcodes): + stderr = stderr.decode('utf-8', 'replace').strip() + self.write_debug(stderr) + raise FFmpegPostProcessorError(stderr.split('\n')[-1]) + for out_path, _ in output_path_opts: + if out_path: + self.try_utime(out_path, oldest_mtime, oldest_mtime) + return stderr.decode('utf-8', 'replace') + + def run_ffmpeg(self, path, out_path, opts, **kwargs): + return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs) - def run_ffmpeg(self, path, out_path, opts): - self.run_ffmpeg_multiple_files([path], out_path, opts) - - def _ffmpeg_filename_argument(self, fn): + @staticmethod + def _ffmpeg_filename_argument(fn): # Always use 'file:' because the filename may contain ':' (ffmpeg # interprets that as a protocol) or can start with '-' (-- is broken in # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) # Also leave '-' intact in order not to break streaming to stdout. + if fn.startswith(('http://', 'https://')): + return fn return 'file:' + fn if fn != '-' else fn + @staticmethod + def _quote_for_ffmpeg(string): + # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping + # A sequence of '' produces '\'''\''; + # final replace removes the empty '' between \' \'. + string = string.replace("'", r"'\''").replace("'''", "'") + # Handle potential ' at string boundaries. + string = string[1:] if string[0] == "'" else "'" + string + return string[:-1] if string[-1] == "'" else string + "'" + + def force_keyframes(self, filename, timestamps): + timestamps = orderedSet(timestamps) + if timestamps[0] == 0: + timestamps = timestamps[1:] + keyframe_file = prepend_extension(filename, 'keyframes.temp') + self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes') + self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join( + f'{t:.6f}' for t in timestamps)]) + return keyframe_file + + def concat_files(self, in_files, out_file, concat_opts=None): + """ + Use concat demuxer to concatenate multiple files having identical streams. + + Only inpoint, outpoint, and duration concat options are supported. + See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details + """ + concat_file = f'{out_file}.concat' + self.write_debug(f'Writing concat spec to {concat_file}') + with open(concat_file, 'wt', encoding='utf-8') as f: + f.writelines(self._concat_spec(in_files, concat_opts)) + + out_flags = ['-c', 'copy'] + if out_file.rpartition('.')[-1] in ('mp4', 'mov'): + # For some reason, '-c copy' is not enough to copy subtitles + out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart']) + + try: + self.real_run_ffmpeg( + [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], + [(out_file, out_flags)]) + finally: + os.remove(concat_file) + + @classmethod + def _concat_spec(cls, in_files, concat_opts=None): + if concat_opts is None: + concat_opts = [{}] * len(in_files) + yield 'ffconcat version 1.0\n' + for file, opts in zip(in_files, concat_opts): + yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n' + # Iterate explicitly to yield the following directives in order, ignoring the rest. + for directive in 'inpoint', 'outpoint', 'duration': + if directive in opts: + yield f'{directive} {opts[directive]}\n' + class FFmpegExtractAudioPP(FFmpegPostProcessor): + COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') + SUPPORTED_EXTS = ('best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav') + def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): FFmpegPostProcessor.__init__(self, downloader) - if preferredcodec is None: - preferredcodec = 'best' - self._preferredcodec = preferredcodec + self._preferredcodec = preferredcodec or 'best' self._preferredquality = preferredquality self._nopostoverwrites = nopostoverwrites @@ -266,8 +386,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): except FFmpegPostProcessorError as err: raise AudioConversionError(err.msg) + @PostProcessor._restrict_to(images=False) def run(self, information): path = information['filepath'] + orig_ext = information['ext'] + + if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS: + self.to_screen('Skipping audio extraction since the file is already in a common audio format') + return [], information filecodec = self.get_audio_codec(path) if filecodec is None: @@ -328,11 +454,11 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. if (new_path == path or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): - self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path) + self.to_screen('Post-process file %s exists, skipping' % new_path) return [], information try: - self._downloader.to_screen('[ffmpeg] Destination: ' + new_path) + self.to_screen('Destination: ' + new_path) self.run_ffmpeg(path, new_path, acodec, more_opts) except AudioConversionError as e: raise PostProcessingError( @@ -350,54 +476,102 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): class FFmpegVideoConvertorPP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus') + FORMAT_RE = re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(SUPPORTED_EXTS))) + _ACTION = 'converting' + def __init__(self, downloader=None, preferedformat=None): super(FFmpegVideoConvertorPP, self).__init__(downloader) - self._preferedformat = preferedformat + self._preferedformats = preferedformat.lower().split('/') - def run(self, information): - path = information['filepath'] - if information['ext'] == self._preferedformat: - self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) - return [], information - options = [] - if self._preferedformat == 'avi': - options.extend(['-c:v', 'libxvid', '-vtag', 'XVID']) - prefix, sep, ext = path.rpartition('.') - outpath = prefix + sep + self._preferedformat - self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) - self.run_ffmpeg(path, outpath, options) - information['filepath'] = outpath - information['format'] = self._preferedformat - information['ext'] = self._preferedformat - return [path], information + def _target_ext(self, source_ext): + for pair in self._preferedformats: + kv = pair.split('>') + if len(kv) == 1 or kv[0].strip() == source_ext: + return kv[-1].strip() + + @staticmethod + def _options(target_ext): + if target_ext == 'avi': + return ['-c:v', 'libxvid', '-vtag', 'XVID'] + return [] + + @PostProcessor._restrict_to(images=False) + def run(self, info): + filename, source_ext = info['filepath'], info['ext'].lower() + target_ext = self._target_ext(source_ext) + _skip_msg = ( + f'could not find a mapping for {source_ext}' if not target_ext + else f'already is in target format {source_ext}' if source_ext == target_ext + else None) + if _skip_msg: + self.to_screen(f'Not {self._ACTION} media file {filename!r}; {_skip_msg}') + return [], info + + outpath = replace_extension(filename, target_ext, source_ext) + self.to_screen(f'{self._ACTION.title()} video from {source_ext} to {target_ext}; Destination: {outpath}') + self.run_ffmpeg(filename, outpath, self._options(target_ext)) + + info['filepath'] = outpath + info['format'] = info['ext'] = target_ext + return [filename], info + + +class FFmpegVideoRemuxerPP(FFmpegVideoConvertorPP): + _ACTION = 'remuxing' + + @staticmethod + def _options(target_ext): + options = ['-c', 'copy', '-map', '0', '-dn'] + if target_ext in ['mp4', 'm4a', 'mov']: + options.extend(['-movflags', '+faststart']) + return options class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + def __init__(self, downloader=None, already_have_subtitle=False): + super(FFmpegEmbedSubtitlePP, self).__init__(downloader) + self._already_have_subtitle = already_have_subtitle + + @PostProcessor._restrict_to(images=False) def run(self, information): if information['ext'] not in ('mp4', 'webm', 'mkv'): - self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files') + self.to_screen('Subtitles can only be embedded in mp4, webm or mkv files') return [], information subtitles = information.get('requested_subtitles') if not subtitles: - self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') + self.to_screen('There aren\'t any subtitles to embed') return [], information filename = information['filepath'] + if information.get('duration') and self._duration_mismatch( + self._get_real_video_duration(information, False), information['duration']): + self.to_screen(f'Skipping {self.pp_key()} since the real and expected durations mismatch') + return [], information ext = information['ext'] - sub_langs = [] - sub_filenames = [] + sub_langs, sub_names, sub_filenames = [], [], [] webm_vtt_warn = False + mp4_ass_warn = False for lang, sub_info in subtitles.items(): + if not os.path.exists(sub_info.get('filepath', '')): + self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') + continue sub_ext = sub_info['ext'] - if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + if sub_ext == 'json': + self.report_warning('JSON subtitles cannot be embedded') + elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': sub_langs.append(lang) - sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext)) + sub_names.append(sub_info.get('name')) + sub_filenames.append(sub_info['filepath']) else: if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': webm_vtt_warn = True - self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + self.report_warning('Only WebVTT subtitles can be embedded in webm files') + if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass': + mp4_ass_warn = True + self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues') if not sub_langs: return [], information @@ -405,8 +579,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): input_files = [filename] + sub_filenames opts = [ - '-map', '0', - '-c', 'copy', + '-c', 'copy', '-map', '0', '-dn', # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', @@ -416,48 +589,100 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): ] if information['ext'] == 'mp4': opts += ['-c:s', 'mov_text'] - for (i, lang) in enumerate(sub_langs): + for i, (lang, name) in enumerate(zip(sub_langs, sub_names)): opts.extend(['-map', '%d:0' % (i + 1)]) lang_code = ISO639Utils.short2long(lang) or lang opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + if name: + opts.extend(['-metadata:s:s:%d' % i, 'handler_name=%s' % name, + '-metadata:s:s:%d' % i, 'title=%s' % name]) temp_filename = prepend_extension(filename, 'temp') - self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename) + self.to_screen('Embedding subtitles in "%s"' % filename) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) - return sub_filenames, information + files_to_delete = [] if self._already_have_subtitle else sub_filenames + return files_to_delete, information class FFmpegMetadataPP(FFmpegPostProcessor): + + def __init__(self, downloader, add_metadata=True, add_chapters=True): + FFmpegPostProcessor.__init__(self, downloader) + self._add_metadata = add_metadata + self._add_chapters = add_chapters + + @staticmethod + def _options(target_ext): + yield from ('-map', '0', '-dn') + if target_ext == 'm4a': + yield from ('-vn', '-acodec', 'copy') + else: + yield from ('-c', 'copy') + + @PostProcessor._restrict_to(images=False) def run(self, info): + filename, metadata_filename = info['filepath'], None + options = [] + if self._add_chapters and info.get('chapters'): + metadata_filename = replace_extension(filename, 'meta') + options.extend(self._get_chapter_opts(info['chapters'], metadata_filename)) + if self._add_metadata: + options.extend(self._get_metadata_opts(info)) + + if not options: + self.to_screen('There isn\'t any metadata to add') + return [], info + + temp_filename = prepend_extension(filename, 'temp') + self.to_screen('Adding metadata to "%s"' % filename) + self.run_ffmpeg_multiple_files( + (filename, metadata_filename), temp_filename, + itertools.chain(self._options(info['ext']), *options)) + if metadata_filename: + os.remove(metadata_filename) + os.replace(temp_filename, filename) + return [], info + + @staticmethod + def _get_chapter_opts(chapters, metadata_filename): + with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + def ffmpeg_escape(text): + return re.sub(r'([\\=;#\n])', r'\\\1', text) + + metadata_file_content = ';FFMETADATA1\n' + for chapter in chapters: + metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' + metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) + metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) + chapter_title = chapter.get('title') + if chapter_title: + metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) + f.write(metadata_file_content) + yield ('-map_metadata', '1') + + def _get_metadata_opts(self, info): metadata = {} + meta_prefix = 'meta_' def add(meta_list, info_list=None): - if not info_list: - info_list = meta_list - if not isinstance(meta_list, (list, tuple)): - meta_list = (meta_list,) - if not isinstance(info_list, (list, tuple)): - info_list = (info_list,) - for info_f in info_list: - if info.get(info_f) is not None: - for meta_f in meta_list: - metadata[meta_f] = info[info_f] - break + value = next(( + str(info[key]) for key in [meta_prefix] + list(variadic(info_list or meta_list)) + if info.get(key) is not None), None) + if value not in ('', None): + metadata.update({meta_f: value for meta_f in variadic(meta_list)}) # See [1-4] for some info on media metadata/metadata supported # by ffmpeg. # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/ # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata # 3. https://kodi.wiki/view/Video_file_tagging - # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html add('title', ('track', 'title')) add('date', 'upload_date') - add(('description', 'comment'), 'description') - add('purl', 'webpage_url') + add(('description', 'synopsis'), 'description') + add(('purl', 'comment'), 'webpage_url') add('track', 'track_number') add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) add('genre') @@ -469,57 +694,50 @@ class FFmpegMetadataPP(FFmpegPostProcessor): add('episode_id', ('episode', 'episode_id')) add('episode_sort', 'episode_number') - if not metadata: - self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') - return [], info + for key, value in info.items(): + if value is not None and key != meta_prefix and key.startswith(meta_prefix): + metadata[key[len(meta_prefix):]] = value - filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') - in_filenames = [filename] - options = [] + for name, value in metadata.items(): + yield ('-metadata', f'{name}={value}') - if info['ext'] == 'm4a': - options.extend(['-vn', '-acodec', 'copy']) - else: - options.extend(['-c', 'copy']) + stream_idx = 0 + for fmt in info.get('requested_formats') or []: + stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1 + if fmt.get('language'): + lang = ISO639Utils.short2long(fmt['language']) or fmt['language'] + for i in range(stream_count): + yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) + stream_idx += stream_count - for (name, value) in metadata.items(): - options.extend(['-metadata', '%s=%s' % (name, value)]) + if ('no-attach-info-json' not in self.get_param('compat_opts', []) + and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')): + old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') + if old_stream is not None: + yield ('-map', '-0:%d' % old_stream) + new_stream -= 1 - chapters = info.get('chapters', []) - if chapters: - metadata_filename = replace_extension(filename, 'meta') - with io.open(metadata_filename, 'wt', encoding='utf-8') as f: - def ffmpeg_escape(text): - return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text) - - metadata_file_content = ';FFMETADATA1\n' - for chapter in chapters: - metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' - metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) - metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) - chapter_title = chapter.get('title') - if chapter_title: - metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) - f.write(metadata_file_content) - in_filenames.append(metadata_filename) - options.extend(['-map_metadata', '1']) - - self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename) - self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options) - if chapters: - os.remove(metadata_filename) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return [], info + yield ('-attach', info['__infojson_filename'], + '-metadata:s:%d' % new_stream, 'mimetype=application/json') class FFmpegMergerPP(FFmpegPostProcessor): + @PostProcessor._restrict_to(images=False) def run(self, info): filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] - self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) + args = ['-c', 'copy'] + audio_streams = 0 + for (i, fmt) in enumerate(info['requested_formats']): + if fmt.get('acodec') != 'none': + args.extend(['-map', f'{i}:a:0']) + aac_fixup = fmt['protocol'].startswith('m3u8') and self.get_audio_codec(fmt['filepath']) == 'aac' + if aac_fixup: + args.extend([f'-bsf:a:{audio_streams}', 'aac_adtstoasc']) + audio_streams += 1 + if fmt.get('vcodec') != 'none': + args.extend(['-map', '%u:v:0' % (i)]) + self.to_screen('Merging formats into "%s"' % filename) self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) return info['__files_to_merge'], info @@ -536,98 +754,120 @@ class FFmpegMergerPP(FFmpegPostProcessor): 'hypervideo will download single file media. ' 'Update %s to version %s or newer to fix this.') % ( self.basename, self.basename, required_version) - if self._downloader: - self._downloader.report_warning(warning) + self.report_warning(warning) return False return True -class FFmpegFixupStretchedPP(FFmpegPostProcessor): - def run(self, info): - stretched_ratio = info.get('stretched_ratio') - if stretched_ratio is None or stretched_ratio == 1: - return [], info - - filename = info['filepath'] +class FFmpegFixupPostProcessor(FFmpegPostProcessor): + def _fixup(self, msg, filename, options): temp_filename = prepend_extension(filename, 'temp') - options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio] - self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename) + self.to_screen(f'{msg} of "{filename}"') self.run_ffmpeg(filename, temp_filename, options) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + os.replace(temp_filename, filename) + +class FFmpegFixupStretchedPP(FFmpegFixupPostProcessor): + @PostProcessor._restrict_to(images=False, audio=False) + def run(self, info): + stretched_ratio = info.get('stretched_ratio') + if stretched_ratio not in (None, 1): + self._fixup('Fixing aspect ratio', info['filepath'], [ + '-c', 'copy', '-map', '0', '-dn', '-aspect', '%f' % stretched_ratio]) return [], info -class FFmpegFixupM4aPP(FFmpegPostProcessor): +class FFmpegFixupM4aPP(FFmpegFixupPostProcessor): + @PostProcessor._restrict_to(images=False, video=False) def run(self, info): - if info.get('container') != 'm4a_dash': - return [], info + if info.get('container') == 'm4a_dash': + self._fixup('Correcting container', info['filepath'], [ + '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4']) + return [], info - filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') - options = ['-c', 'copy', '-f', 'mp4'] - self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) +class FFmpegFixupM3u8PP(FFmpegFixupPostProcessor): + @PostProcessor._restrict_to(images=False) + def run(self, info): + if self.get_audio_codec(info['filepath']) == 'aac': + self._fixup('Fixing malformed AAC bitstream', info['filepath'], [ + '-c', 'copy', '-map', '0', '-dn', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']) + return [], info - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return [], info +class FFmpegFixupTimestampPP(FFmpegFixupPostProcessor): + def __init__(self, downloader=None, trim=0.001): + # "trim" should be used when the video contains unintended packets + super(FFmpegFixupTimestampPP, self).__init__(downloader) + assert isinstance(trim, (int, float)) + self.trim = str(trim) -class FFmpegFixupM3u8PP(FFmpegPostProcessor): + @PostProcessor._restrict_to(images=False) def run(self, info): - filename = info['filepath'] - if self.get_audio_codec(filename) == 'aac': - temp_filename = prepend_extension(filename, 'temp') + required_version = '4.4' + if is_outdated_version(self._versions[self.basename], required_version): + self.report_warning( + 'A re-encode is needed to fix timestamps in older versions of ffmpeg. ' + f'Please install ffmpeg {required_version} or later to fixup without re-encoding') + opts = ['-vf', 'setpts=PTS-STARTPTS'] + else: + opts = ['-c', 'copy', '-bsf', 'setts=ts=TS-STARTPTS'] + self._fixup('Fixing frame timestamp', info['filepath'], opts + ['-map', '0', '-dn', '-ss', self.trim]) + return [], info - options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] - self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) - os.remove(encodeFilename(filename)) - os.rename(encodeFilename(temp_filename), encodeFilename(filename)) +class FFmpegFixupDurationPP(FFmpegFixupPostProcessor): + @PostProcessor._restrict_to(images=False) + def run(self, info): + self._fixup('Fixing video duration', info['filepath'], ['-c', 'copy', '-map', '0', '-dn']) return [], info class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('srt', 'vtt', 'ass', 'lrc') + def __init__(self, downloader=None, format=None): super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) self.format = format def run(self, info): subs = info.get('requested_subtitles') - filename = info['filepath'] new_ext = self.format new_format = new_ext if new_format == 'vtt': new_format = 'webvtt' if subs is None: - self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') + self.to_screen('There aren\'t any subtitles to convert') return [], info - self._downloader.to_screen('[ffmpeg] Converting subtitles') + self.to_screen('Converting subtitles') sub_filenames = [] for lang, sub in subs.items(): + if not os.path.exists(sub.get('filepath', '')): + self.report_warning(f'Skipping embedding {lang} subtitle because the file is missing') + continue ext = sub['ext'] if ext == new_ext: - self._downloader.to_screen( - '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) + self.to_screen('Subtitle file for %s is already in the requested format' % new_ext) + continue + elif ext == 'json': + self.to_screen( + 'You have requested to convert json subtitles into another format, ' + 'which is currently not possible') continue - old_file = subtitles_filename(filename, lang, ext, info.get('ext')) + old_file = sub['filepath'] sub_filenames.append(old_file) - new_file = subtitles_filename(filename, lang, new_ext, info.get('ext')) + new_file = replace_extension(old_file, new_ext) if ext in ('dfxp', 'ttml', 'tt'): - self._downloader.report_warning( + self.report_warning( 'You have requested to convert dfxp (TTML) subtitles into another format, ' 'which results in style information loss') dfxp_file = old_file - srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext')) + srt_file = replace_extension(old_file, 'srt') with open(dfxp_file, 'rb') as f: srt_data = dfxp2srt(f.read()) @@ -638,7 +878,8 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): subs[lang] = { 'ext': 'srt', - 'data': srt_data + 'data': srt_data, + 'filepath': srt_file, } if new_ext == 'srt': @@ -652,6 +893,125 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): subs[lang] = { 'ext': new_ext, 'data': f.read(), + 'filepath': new_file, } + info['__files_to_move'][new_file] = replace_extension( + info['__files_to_move'][sub['filepath']], new_ext) + return sub_filenames, info + + +class FFmpegSplitChaptersPP(FFmpegPostProcessor): + def __init__(self, downloader, force_keyframes=False): + FFmpegPostProcessor.__init__(self, downloader) + self._force_keyframes = force_keyframes + + def _prepare_filename(self, number, chapter, info): + info = info.copy() + info.update({ + 'section_number': number, + 'section_title': chapter.get('title'), + 'section_start': chapter.get('start_time'), + 'section_end': chapter.get('end_time'), + }) + return self._downloader.prepare_filename(info, 'chapter') + + def _ffmpeg_args_for_chapter(self, number, chapter, info): + destination = self._prepare_filename(number, chapter, info) + if not self._downloader._ensure_dir_exists(encodeFilename(destination)): + return + + chapter['filepath'] = destination + self.to_screen('Chapter %03d; Destination: %s' % (number, destination)) + return ( + destination, + ['-ss', compat_str(chapter['start_time']), + '-t', compat_str(chapter['end_time'] - chapter['start_time'])]) + + @PostProcessor._restrict_to(images=False) + def run(self, info): + chapters = info.get('chapters') or [] + if not chapters: + self.to_screen('Chapter information is unavailable') + return [], info + + in_file = info['filepath'] + if self._force_keyframes and len(chapters) > 1: + in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters)) + self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters)) + for idx, chapter in enumerate(chapters): + destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) + self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])]) + if in_file != info['filepath']: + os.remove(in_file) + return [], info + + +class FFmpegThumbnailsConvertorPP(FFmpegPostProcessor): + SUPPORTED_EXTS = ('jpg', 'png') + + def __init__(self, downloader=None, format=None): + super(FFmpegThumbnailsConvertorPP, self).__init__(downloader) + self.format = format + + @staticmethod + def is_webp(path): + with open(encodeFilename(path), 'rb') as f: + b = f.read(12) + return b[0:4] == b'RIFF' and b[8:] == b'WEBP' + + def fixup_webp(self, info, idx=-1): + thumbnail_filename = info['thumbnails'][idx]['filepath'] + _, thumbnail_ext = os.path.splitext(thumbnail_filename) + if thumbnail_ext: + thumbnail_ext = thumbnail_ext[1:].lower() + if thumbnail_ext != 'webp' and self.is_webp(thumbnail_filename): + self.to_screen('Correcting thumbnail "%s" extension to webp' % thumbnail_filename) + webp_filename = replace_extension(thumbnail_filename, 'webp') + os.replace(thumbnail_filename, webp_filename) + info['thumbnails'][idx]['filepath'] = webp_filename + info['__files_to_move'][webp_filename] = replace_extension( + info['__files_to_move'].pop(thumbnail_filename), 'webp') + + @staticmethod + def _options(target_ext): + if target_ext == 'jpg': + return ['-bsf:v', 'mjpeg2jpeg'] + return [] + + def convert_thumbnail(self, thumbnail_filename, target_ext): + thumbnail_conv_filename = replace_extension(thumbnail_filename, target_ext) + + self.to_screen('Converting thumbnail "%s" to %s' % (thumbnail_filename, target_ext)) + self.real_run_ffmpeg( + [(thumbnail_filename, ['-f', 'image2', '-pattern_type', 'none'])], + [(thumbnail_conv_filename.replace('%', '%%'), self._options(target_ext))]) + return thumbnail_conv_filename + + def run(self, info): + files_to_delete = [] + has_thumbnail = False + + for idx, thumbnail_dict in enumerate(info['thumbnails']): + if 'filepath' not in thumbnail_dict: + continue + has_thumbnail = True + self.fixup_webp(info, idx) + original_thumbnail = thumbnail_dict['filepath'] + _, thumbnail_ext = os.path.splitext(original_thumbnail) + if thumbnail_ext: + thumbnail_ext = thumbnail_ext[1:].lower() + if thumbnail_ext == 'jpeg': + thumbnail_ext = 'jpg' + if thumbnail_ext == self.format: + self.to_screen('Thumbnail "%s" is already in the requested format' % original_thumbnail) + continue + thumbnail_dict['filepath'] = self.convert_thumbnail(original_thumbnail, self.format) + files_to_delete.append(original_thumbnail) + info['__files_to_move'][thumbnail_dict['filepath']] = replace_extension( + info['__files_to_move'][original_thumbnail], self.format) + + if not has_thumbnail: + self.to_screen('There aren\'t any thumbnails to convert') + return files_to_delete, info |