diff options
Diffstat (limited to 'youtube_dl/postprocessor/ffmpeg.py')
-rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py new file mode 100644 index 0000000..757b496 --- /dev/null +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -0,0 +1,613 @@ +from __future__ import unicode_literals + +import io +import os +import subprocess +import time +import re + + +from .common import AudioConversionError, PostProcessor + +from ..compat import ( + compat_subprocess_get_DEVNULL, +) +from ..utils import ( + encodeArgument, + encodeFilename, + get_exe_version, + is_outdated_version, + PostProcessingError, + prepend_extension, + shell_quote, + subtitles_filename, + dfxp2srt, + ISO639Utils, + replace_extension, +) + + +EXT_TO_OUT_FORMATS = { + 'aac': 'adts', + 'flac': 'flac', + 'm4a': 'ipod', + 'mka': 'matroska', + 'mkv': 'matroska', + 'mpg': 'mpeg', + 'ogv': 'ogg', + 'ts': 'mpegts', + 'wma': 'asf', + 'wmv': 'asf', +} +ACODECS = { + 'mp3': 'libmp3lame', + 'aac': 'aac', + 'flac': 'flac', + 'm4a': 'aac', + 'opus': 'libopus', + 'vorbis': 'libvorbis', + 'wav': None, +} + + +class FFmpegPostProcessorError(PostProcessingError): + pass + + +class FFmpegPostProcessor(PostProcessor): + def __init__(self, downloader=None): + PostProcessor.__init__(self, downloader) + self._determine_executables() + + def check_version(self): + if not self.available: + raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') + + required_version = '10-0' if self.basename == 'avconv' else '1.0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + + @staticmethod + def get_versions(downloader=None): + return FFmpegPostProcessor(downloader)._versions + + def _determine_executables(self): + programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] + prefer_ffmpeg = True + + self.basename = None + self.probe_basename = None + + self._paths = None + self._versions = None + if self._downloader: + prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True) + location = self._downloader.params.get('ffmpeg_location') + if location is not None: + if not os.path.exists(location): + self._downloader.report_warning( + 'ffmpeg-location %s does not exist! ' + 'Continuing without avconv/ffmpeg.' % (location)) + self._versions = {} + return + elif not os.path.isdir(location): + basename = os.path.splitext(os.path.basename(location))[0] + if basename not in programs: + self._downloader.report_warning( + 'Cannot identify executable %s, its basename should be one of %s. ' + 'Continuing without avconv/ffmpeg.' % + (location, ', '.join(programs))) + self._versions = {} + return None + location = os.path.dirname(os.path.abspath(location)) + if basename in ('ffmpeg', 'ffprobe'): + prefer_ffmpeg = True + + self._paths = dict( + (p, os.path.join(location, p)) for p in programs) + self._versions = dict( + (p, get_exe_version(self._paths[p], args=['-version'])) + for p in programs) + if self._versions is None: + self._versions = dict( + (p, get_exe_version(p, args=['-version'])) for p in programs) + self._paths = dict((p, p) for p in programs) + + if prefer_ffmpeg is False: + prefs = ('avconv', 'ffmpeg') + else: + prefs = ('ffmpeg', 'avconv') + for p in prefs: + if self._versions[p]: + self.basename = p + break + + if prefer_ffmpeg is False: + prefs = ('avprobe', 'ffprobe') + else: + prefs = ('ffprobe', 'avprobe') + for p in prefs: + if self._versions[p]: + self.probe_basename = p + break + + @property + def available(self): + return self.basename is not None + + @property + def executable(self): + return self._paths[self.basename] + + @property + def probe_available(self): + return self.probe_basename is not None + + @property + def probe_executable(self): + return self._paths[self.probe_basename] + + def get_audio_codec(self, path): + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + try: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), + encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.decode('ascii', 'ignore').split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): + self.check_version() + + oldest_mtime = min( + os.stat(encodeFilename(path)).st_mtime for path in input_paths) + + opts += self._configuration_args() + + files_cmd = [] + for path in input_paths: + files_cmd.extend([ + encodeArgument('-i'), + encodeFilename(self._ffmpeg_filename_argument(path), True) + ]) + cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + + files_cmd + + [encodeArgument(o) for o in opts] + + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + stderr = stderr.decode('utf-8', 'replace') + msg = stderr.strip().split('\n')[-1] + raise FFmpegPostProcessorError(msg) + self.try_utime(out_path, oldest_mtime, oldest_mtime) + + def run_ffmpeg(self, path, out_path, opts): + self.run_ffmpeg_multiple_files([path], out_path, opts) + + def _ffmpeg_filename_argument(self, fn): + # Always use 'file:' because the filename may contain ':' (ffmpeg + # interprets that as a protocol) or can start with '-' (-- is broken in + # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) + # Also leave '-' intact in order not to break streaming to stdout. + return 'file:' + fn if fn != '-' else fn + + +class FFmpegExtractAudioPP(FFmpegPostProcessor): + def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): + FFmpegPostProcessor.__init__(self, downloader) + if preferredcodec is None: + preferredcodec = 'best' + self._preferredcodec = preferredcodec + self._preferredquality = preferredquality + self._nopostoverwrites = nopostoverwrites + + def run_ffmpeg(self, path, out_path, codec, more_opts): + if codec is None: + acodec_opts = [] + else: + acodec_opts = ['-acodec', codec] + opts = ['-vn'] + acodec_opts + more_opts + try: + FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) + except FFmpegPostProcessorError as err: + raise AudioConversionError(err.msg) + + def run(self, information): + path = information['filepath'] + + filecodec = self.get_audio_codec(path) + if filecodec is None: + raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe') + + more_opts = [] + if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): + if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: + # Lossless, but in another container + acodec = 'copy' + extension = 'm4a' + more_opts = ['-bsf:a', 'aac_adtstoasc'] + elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']: + # Lossless if possible + acodec = 'copy' + extension = filecodec + if filecodec == 'aac': + more_opts = ['-f', 'adts'] + if filecodec == 'vorbis': + extension = 'ogg' + else: + # MP3 otherwise. + acodec = 'libmp3lame' + extension = 'mp3' + more_opts = [] + if self._preferredquality is not None: + if int(self._preferredquality) < 10: + more_opts += ['-q:a', self._preferredquality] + else: + more_opts += ['-b:a', self._preferredquality + 'k'] + else: + # We convert the audio (lossy if codec is lossy) + acodec = ACODECS[self._preferredcodec] + extension = self._preferredcodec + more_opts = [] + if self._preferredquality is not None: + # The opus codec doesn't support the -aq option + if int(self._preferredquality) < 10 and extension != 'opus': + more_opts += ['-q:a', self._preferredquality] + else: + more_opts += ['-b:a', self._preferredquality + 'k'] + if self._preferredcodec == 'aac': + more_opts += ['-f', 'adts'] + if self._preferredcodec == 'm4a': + more_opts += ['-bsf:a', 'aac_adtstoasc'] + if self._preferredcodec == 'vorbis': + extension = 'ogg' + if self._preferredcodec == 'wav': + extension = 'wav' + more_opts += ['-f', 'wav'] + + prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups + new_path = prefix + sep + extension + + information['filepath'] = new_path + information['ext'] = extension + + # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. + if (new_path == path or + (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path) + return [], information + + try: + self._downloader.to_screen('[ffmpeg] Destination: ' + new_path) + self.run_ffmpeg(path, new_path, acodec, more_opts) + except AudioConversionError as e: + raise PostProcessingError( + 'audio conversion failed: ' + e.msg) + except Exception: + raise PostProcessingError('error running ' + self.basename) + + # Try to update the date time for extracted audio file. + if information.get('filetime') is not None: + self.try_utime( + new_path, time.time(), information['filetime'], + errnote='Cannot update utime of audio file') + + return [path], information + + +class FFmpegVideoConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, preferedformat=None): + super(FFmpegVideoConvertorPP, self).__init__(downloader) + self._preferedformat = preferedformat + + def run(self, information): + path = information['filepath'] + if information['ext'] == self._preferedformat: + self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) + return [], information + options = [] + if self._preferedformat == 'avi': + options.extend(['-c:v', 'libxvid', '-vtag', 'XVID']) + prefix, sep, ext = path.rpartition('.') + outpath = prefix + sep + self._preferedformat + self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) + self.run_ffmpeg(path, outpath, options) + information['filepath'] = outpath + information['format'] = self._preferedformat + information['ext'] = self._preferedformat + return [path], information + + +class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + def run(self, information): + if information['ext'] not in ('mp4', 'webm', 'mkv'): + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files') + return [], information + subtitles = information.get('requested_subtitles') + if not subtitles: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') + return [], information + + filename = information['filepath'] + + ext = information['ext'] + sub_langs = [] + sub_filenames = [] + webm_vtt_warn = False + + for lang, sub_info in subtitles.items(): + sub_ext = sub_info['ext'] + if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + sub_langs.append(lang) + sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) + else: + if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': + webm_vtt_warn = True + self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + + if not sub_langs: + return [], information + + input_files = [filename] + sub_filenames + + opts = [ + '-map', '0', + '-c', 'copy', + # Don't copy the existing subtitles, we may be running the + # postprocessor a second time + '-map', '-0:s', + ] + if information['ext'] == 'mp4': + opts += ['-c:s', 'mov_text'] + for (i, lang) in enumerate(sub_langs): + opts.extend(['-map', '%d:0' % (i + 1)]) + lang_code = ISO639Utils.short2long(lang) + if lang_code is not None: + opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + + temp_filename = prepend_extension(filename, 'temp') + self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename) + self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return sub_filenames, information + + +class FFmpegMetadataPP(FFmpegPostProcessor): + def run(self, info): + metadata = {} + + def add(meta_list, info_list=None): + if not info_list: + info_list = meta_list + if not isinstance(meta_list, (list, tuple)): + meta_list = (meta_list,) + if not isinstance(info_list, (list, tuple)): + info_list = (info_list,) + for info_f in info_list: + if info.get(info_f) is not None: + for meta_f in meta_list: + metadata[meta_f] = info[info_f] + break + + add('title', ('track', 'title')) + add('date', 'upload_date') + add(('description', 'comment'), 'description') + add('purl', 'webpage_url') + add('track', 'track_number') + add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) + add('genre') + add('album') + add('album_artist') + add('disc', 'disc_number') + + if not metadata: + self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + in_filenames = [filename] + options = [] + + if info['ext'] == 'm4a': + options.extend(['-vn', '-acodec', 'copy']) + else: + options.extend(['-c', 'copy']) + + for (name, value) in metadata.items(): + options.extend(['-metadata', '%s=%s' % (name, value)]) + + chapters = info.get('chapters', []) + if chapters: + metadata_filename = replace_extension(filename, 'meta') + with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + def ffmpeg_escape(text): + return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text) + + metadata_file_content = ';FFMETADATA1\n' + for chapter in chapters: + metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' + metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) + metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) + chapter_title = chapter.get('title') + if chapter_title: + metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) + f.write(metadata_file_content) + in_filenames.append(metadata_filename) + options.extend(['-map_metadata', '1']) + + self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename) + self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options) + if chapters: + os.remove(metadata_filename) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return [], info + + +class FFmpegMergerPP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] + self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) + self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return info['__files_to_merge'], info + + def can_merge(self): + # TODO: figure out merge-capable ffmpeg version + if self.basename != 'avconv': + return True + + required_version = '10-0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, ' + 'youtube-dl will download single file media. ' + 'Update %s to version %s or newer to fix this.') % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + return False + return True + + +class FFmpegFixupStretchedPP(FFmpegPostProcessor): + def run(self, info): + stretched_ratio = info.get('stretched_ratio') + if stretched_ratio is None or stretched_ratio == 1: + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio] + self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return [], info + + +class FFmpegFixupM4aPP(FFmpegPostProcessor): + def run(self, info): + if info.get('container') != 'm4a_dash': + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4'] + self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return [], info + + +class FFmpegFixupM3u8PP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + if self.get_audio_codec(filename) == 'aac': + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return [], info + + +class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, format=None): + super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) + self.format = format + + def run(self, info): + subs = info.get('requested_subtitles') + filename = info['filepath'] + new_ext = self.format + new_format = new_ext + if new_format == 'vtt': + new_format = 'webvtt' + if subs is None: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') + return [], info + self._downloader.to_screen('[ffmpeg] Converting subtitles') + sub_filenames = [] + for lang, sub in subs.items(): + ext = sub['ext'] + if ext == new_ext: + self._downloader.to_screen( + '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) + continue + old_file = subtitles_filename(filename, lang, ext) + sub_filenames.append(old_file) + new_file = subtitles_filename(filename, lang, new_ext) + + if ext in ('dfxp', 'ttml', 'tt'): + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = old_file + srt_file = subtitles_filename(filename, lang, 'srt') + + with open(dfxp_file, 'rb') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + old_file = srt_file + + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + else: + sub_filenames.append(srt_file) + + self.run_ffmpeg(old_file, new_file, ['-f', new_format]) + + with io.open(new_file, 'rt', encoding='utf-8') as f: + subs[lang] = { + 'ext': new_ext, + 'data': f.read(), + } + + return sub_filenames, info |