diff options
Diffstat (limited to 'youtube_dl/postprocessor')
-rw-r--r-- | youtube_dl/postprocessor/__init__.py | 40 | ||||
-rw-r--r-- | youtube_dl/postprocessor/common.py | 69 | ||||
-rw-r--r-- | youtube_dl/postprocessor/embedthumbnail.py | 93 | ||||
-rw-r--r-- | youtube_dl/postprocessor/execafterdownload.py | 31 | ||||
-rw-r--r-- | youtube_dl/postprocessor/ffmpeg.py | 613 | ||||
-rw-r--r-- | youtube_dl/postprocessor/metadatafromtitle.py | 48 | ||||
-rw-r--r-- | youtube_dl/postprocessor/xattrpp.py | 79 |
7 files changed, 973 insertions, 0 deletions
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dl/postprocessor/__init__.py new file mode 100644 index 0000000..3ea5183 --- /dev/null +++ b/youtube_dl/postprocessor/__init__.py @@ -0,0 +1,40 @@ +from __future__ import unicode_literals + +from .embedthumbnail import EmbedThumbnailPP +from .ffmpeg import ( + FFmpegPostProcessor, + FFmpegEmbedSubtitlePP, + FFmpegExtractAudioPP, + FFmpegFixupStretchedPP, + FFmpegFixupM3u8PP, + FFmpegFixupM4aPP, + FFmpegMergerPP, + FFmpegMetadataPP, + FFmpegVideoConvertorPP, + FFmpegSubtitlesConvertorPP, +) +from .xattrpp import XAttrMetadataPP +from .execafterdownload import ExecAfterDownloadPP +from .metadatafromtitle import MetadataFromTitlePP + + +def get_postprocessor(key): + return globals()[key + 'PP'] + + +__all__ = [ + 'EmbedThumbnailPP', + 'ExecAfterDownloadPP', + 'FFmpegEmbedSubtitlePP', + 'FFmpegExtractAudioPP', + 'FFmpegFixupM3u8PP', + 'FFmpegFixupM4aPP', + 'FFmpegFixupStretchedPP', + 'FFmpegMergerPP', + 'FFmpegMetadataPP', + 'FFmpegPostProcessor', + 'FFmpegSubtitlesConvertorPP', + 'FFmpegVideoConvertorPP', + 'MetadataFromTitlePP', + 'XAttrMetadataPP', +] diff --git a/youtube_dl/postprocessor/common.py b/youtube_dl/postprocessor/common.py new file mode 100644 index 0000000..599dd1d --- /dev/null +++ b/youtube_dl/postprocessor/common.py @@ -0,0 +1,69 @@ +from __future__ import unicode_literals + +import os + +from ..utils import ( + PostProcessingError, + cli_configuration_args, + encodeFilename, +) + + +class PostProcessor(object): + """Post Processor class. + + PostProcessor objects can be added to downloaders with their + add_post_processor() method. When the downloader has finished a + successful download, it will take its internal chain of PostProcessors + and start calling the run() method on each one of them, first with + an initial argument and then with the returned value of the previous + PostProcessor. + + The chain will be stopped if one of them ever returns None or the end + of the chain is reached. + + PostProcessor objects follow a "mutual registration" process similar + to InfoExtractor objects. + + Optionally PostProcessor can use a list of additional command-line arguments + with self._configuration_args. + """ + + _downloader = None + + def __init__(self, downloader=None): + self._downloader = downloader + + def set_downloader(self, downloader): + """Sets the downloader for this PP.""" + self._downloader = downloader + + def run(self, information): + """Run the PostProcessor. + + The "information" argument is a dictionary like the ones + composed by InfoExtractors. The only difference is that this + one has an extra field called "filepath" that points to the + downloaded file. + + This method returns a tuple, the first element is a list of the files + that can be deleted, and the second of which is the updated + information. + + In addition, this method may raise a PostProcessingError + exception if post processing fails. + """ + return [], information # by default, keep file and do nothing + + def try_utime(self, path, atime, mtime, errnote='Cannot update utime of file'): + try: + os.utime(encodeFilename(path), (atime, mtime)) + except Exception: + self._downloader.report_warning(errnote) + + def _configuration_args(self, default=[]): + return cli_configuration_args(self._downloader.params, 'postprocessor_args', default) + + +class AudioConversionError(PostProcessingError): + pass diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py new file mode 100644 index 0000000..56be914 --- /dev/null +++ b/youtube_dl/postprocessor/embedthumbnail.py @@ -0,0 +1,93 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +import os +import subprocess + +from .ffmpeg import FFmpegPostProcessor + +from ..utils import ( + check_executable, + encodeArgument, + encodeFilename, + PostProcessingError, + prepend_extension, + shell_quote +) + + +class EmbedThumbnailPPError(PostProcessingError): + pass + + +class EmbedThumbnailPP(FFmpegPostProcessor): + def __init__(self, downloader=None, already_have_thumbnail=False): + super(EmbedThumbnailPP, self).__init__(downloader) + self._already_have_thumbnail = already_have_thumbnail + + def run(self, info): + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + if not info.get('thumbnails'): + self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed') + return [], info + + thumbnail_filename = info['thumbnails'][-1]['filename'] + + if not os.path.exists(encodeFilename(thumbnail_filename)): + self._downloader.report_warning( + 'Skipping embedding the thumbnail because the file is missing.') + return [], info + + if info['ext'] == 'mp3': + options = [ + '-c', 'copy', '-map', '0', '-map', '1', + '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"'] + + self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename) + + self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options) + + if not self._already_have_thumbnail: + os.remove(encodeFilename(thumbnail_filename)) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + elif info['ext'] in ['m4a', 'mp4']: + if not check_executable('AtomicParsley', ['-v']): + raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.') + + cmd = [encodeFilename('AtomicParsley', True), + encodeFilename(filename, True), + encodeArgument('--artwork'), + encodeFilename(thumbnail_filename, True), + encodeArgument('-o'), + encodeFilename(temp_filename, True)] + + self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename) + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd)) + + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + + if p.returncode != 0: + msg = stderr.decode('utf-8', 'replace').strip() + raise EmbedThumbnailPPError(msg) + + if not self._already_have_thumbnail: + os.remove(encodeFilename(thumbnail_filename)) + # for formats that don't support thumbnails (like 3gp) AtomicParsley + # won't create to the temporary file + if b'No changes' in stdout: + self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail') + else: + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + else: + raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.') + + return [], info diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dl/postprocessor/execafterdownload.py new file mode 100644 index 0000000..64dabe7 --- /dev/null +++ b/youtube_dl/postprocessor/execafterdownload.py @@ -0,0 +1,31 @@ +from __future__ import unicode_literals + +import subprocess + +from .common import PostProcessor +from ..compat import compat_shlex_quote +from ..utils import ( + encodeArgument, + PostProcessingError, +) + + +class ExecAfterDownloadPP(PostProcessor): + def __init__(self, downloader, exec_cmd): + super(ExecAfterDownloadPP, self).__init__(downloader) + self.exec_cmd = exec_cmd + + def run(self, information): + cmd = self.exec_cmd + if '{}' not in cmd: + cmd += ' {}' + + cmd = cmd.replace('{}', compat_shlex_quote(information['filepath'])) + + self._downloader.to_screen('[exec] Executing command: %s' % cmd) + retCode = subprocess.call(encodeArgument(cmd), shell=True) + if retCode != 0: + raise PostProcessingError( + 'Command returned error code %d' % retCode) + + return [], information diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py new file mode 100644 index 0000000..757b496 --- /dev/null +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -0,0 +1,613 @@ +from __future__ import unicode_literals + +import io +import os +import subprocess +import time +import re + + +from .common import AudioConversionError, PostProcessor + +from ..compat import ( + compat_subprocess_get_DEVNULL, +) +from ..utils import ( + encodeArgument, + encodeFilename, + get_exe_version, + is_outdated_version, + PostProcessingError, + prepend_extension, + shell_quote, + subtitles_filename, + dfxp2srt, + ISO639Utils, + replace_extension, +) + + +EXT_TO_OUT_FORMATS = { + 'aac': 'adts', + 'flac': 'flac', + 'm4a': 'ipod', + 'mka': 'matroska', + 'mkv': 'matroska', + 'mpg': 'mpeg', + 'ogv': 'ogg', + 'ts': 'mpegts', + 'wma': 'asf', + 'wmv': 'asf', +} +ACODECS = { + 'mp3': 'libmp3lame', + 'aac': 'aac', + 'flac': 'flac', + 'm4a': 'aac', + 'opus': 'libopus', + 'vorbis': 'libvorbis', + 'wav': None, +} + + +class FFmpegPostProcessorError(PostProcessingError): + pass + + +class FFmpegPostProcessor(PostProcessor): + def __init__(self, downloader=None): + PostProcessor.__init__(self, downloader) + self._determine_executables() + + def check_version(self): + if not self.available: + raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') + + required_version = '10-0' if self.basename == 'avconv' else '1.0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + + @staticmethod + def get_versions(downloader=None): + return FFmpegPostProcessor(downloader)._versions + + def _determine_executables(self): + programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] + prefer_ffmpeg = True + + self.basename = None + self.probe_basename = None + + self._paths = None + self._versions = None + if self._downloader: + prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True) + location = self._downloader.params.get('ffmpeg_location') + if location is not None: + if not os.path.exists(location): + self._downloader.report_warning( + 'ffmpeg-location %s does not exist! ' + 'Continuing without avconv/ffmpeg.' % (location)) + self._versions = {} + return + elif not os.path.isdir(location): + basename = os.path.splitext(os.path.basename(location))[0] + if basename not in programs: + self._downloader.report_warning( + 'Cannot identify executable %s, its basename should be one of %s. ' + 'Continuing without avconv/ffmpeg.' % + (location, ', '.join(programs))) + self._versions = {} + return None + location = os.path.dirname(os.path.abspath(location)) + if basename in ('ffmpeg', 'ffprobe'): + prefer_ffmpeg = True + + self._paths = dict( + (p, os.path.join(location, p)) for p in programs) + self._versions = dict( + (p, get_exe_version(self._paths[p], args=['-version'])) + for p in programs) + if self._versions is None: + self._versions = dict( + (p, get_exe_version(p, args=['-version'])) for p in programs) + self._paths = dict((p, p) for p in programs) + + if prefer_ffmpeg is False: + prefs = ('avconv', 'ffmpeg') + else: + prefs = ('ffmpeg', 'avconv') + for p in prefs: + if self._versions[p]: + self.basename = p + break + + if prefer_ffmpeg is False: + prefs = ('avprobe', 'ffprobe') + else: + prefs = ('ffprobe', 'avprobe') + for p in prefs: + if self._versions[p]: + self.probe_basename = p + break + + @property + def available(self): + return self.basename is not None + + @property + def executable(self): + return self._paths[self.basename] + + @property + def probe_available(self): + return self.probe_basename is not None + + @property + def probe_executable(self): + return self._paths[self.probe_basename] + + def get_audio_codec(self, path): + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + try: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), + encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.decode('ascii', 'ignore').split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): + self.check_version() + + oldest_mtime = min( + os.stat(encodeFilename(path)).st_mtime for path in input_paths) + + opts += self._configuration_args() + + files_cmd = [] + for path in input_paths: + files_cmd.extend([ + encodeArgument('-i'), + encodeFilename(self._ffmpeg_filename_argument(path), True) + ]) + cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + + files_cmd + + [encodeArgument(o) for o in opts] + + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + stderr = stderr.decode('utf-8', 'replace') + msg = stderr.strip().split('\n')[-1] + raise FFmpegPostProcessorError(msg) + self.try_utime(out_path, oldest_mtime, oldest_mtime) + + def run_ffmpeg(self, path, out_path, opts): + self.run_ffmpeg_multiple_files([path], out_path, opts) + + def _ffmpeg_filename_argument(self, fn): + # Always use 'file:' because the filename may contain ':' (ffmpeg + # interprets that as a protocol) or can start with '-' (-- is broken in + # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) + # Also leave '-' intact in order not to break streaming to stdout. + return 'file:' + fn if fn != '-' else fn + + +class FFmpegExtractAudioPP(FFmpegPostProcessor): + def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): + FFmpegPostProcessor.__init__(self, downloader) + if preferredcodec is None: + preferredcodec = 'best' + self._preferredcodec = preferredcodec + self._preferredquality = preferredquality + self._nopostoverwrites = nopostoverwrites + + def run_ffmpeg(self, path, out_path, codec, more_opts): + if codec is None: + acodec_opts = [] + else: + acodec_opts = ['-acodec', codec] + opts = ['-vn'] + acodec_opts + more_opts + try: + FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) + except FFmpegPostProcessorError as err: + raise AudioConversionError(err.msg) + + def run(self, information): + path = information['filepath'] + + filecodec = self.get_audio_codec(path) + if filecodec is None: + raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe') + + more_opts = [] + if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): + if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: + # Lossless, but in another container + acodec = 'copy' + extension = 'm4a' + more_opts = ['-bsf:a', 'aac_adtstoasc'] + elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']: + # Lossless if possible + acodec = 'copy' + extension = filecodec + if filecodec == 'aac': + more_opts = ['-f', 'adts'] + if filecodec == 'vorbis': + extension = 'ogg' + else: + # MP3 otherwise. + acodec = 'libmp3lame' + extension = 'mp3' + more_opts = [] + if self._preferredquality is not None: + if int(self._preferredquality) < 10: + more_opts += ['-q:a', self._preferredquality] + else: + more_opts += ['-b:a', self._preferredquality + 'k'] + else: + # We convert the audio (lossy if codec is lossy) + acodec = ACODECS[self._preferredcodec] + extension = self._preferredcodec + more_opts = [] + if self._preferredquality is not None: + # The opus codec doesn't support the -aq option + if int(self._preferredquality) < 10 and extension != 'opus': + more_opts += ['-q:a', self._preferredquality] + else: + more_opts += ['-b:a', self._preferredquality + 'k'] + if self._preferredcodec == 'aac': + more_opts += ['-f', 'adts'] + if self._preferredcodec == 'm4a': + more_opts += ['-bsf:a', 'aac_adtstoasc'] + if self._preferredcodec == 'vorbis': + extension = 'ogg' + if self._preferredcodec == 'wav': + extension = 'wav' + more_opts += ['-f', 'wav'] + + prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups + new_path = prefix + sep + extension + + information['filepath'] = new_path + information['ext'] = extension + + # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. + if (new_path == path or + (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path) + return [], information + + try: + self._downloader.to_screen('[ffmpeg] Destination: ' + new_path) + self.run_ffmpeg(path, new_path, acodec, more_opts) + except AudioConversionError as e: + raise PostProcessingError( + 'audio conversion failed: ' + e.msg) + except Exception: + raise PostProcessingError('error running ' + self.basename) + + # Try to update the date time for extracted audio file. + if information.get('filetime') is not None: + self.try_utime( + new_path, time.time(), information['filetime'], + errnote='Cannot update utime of audio file') + + return [path], information + + +class FFmpegVideoConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, preferedformat=None): + super(FFmpegVideoConvertorPP, self).__init__(downloader) + self._preferedformat = preferedformat + + def run(self, information): + path = information['filepath'] + if information['ext'] == self._preferedformat: + self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) + return [], information + options = [] + if self._preferedformat == 'avi': + options.extend(['-c:v', 'libxvid', '-vtag', 'XVID']) + prefix, sep, ext = path.rpartition('.') + outpath = prefix + sep + self._preferedformat + self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) + self.run_ffmpeg(path, outpath, options) + information['filepath'] = outpath + information['format'] = self._preferedformat + information['ext'] = self._preferedformat + return [path], information + + +class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): + def run(self, information): + if information['ext'] not in ('mp4', 'webm', 'mkv'): + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files') + return [], information + subtitles = information.get('requested_subtitles') + if not subtitles: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') + return [], information + + filename = information['filepath'] + + ext = information['ext'] + sub_langs = [] + sub_filenames = [] + webm_vtt_warn = False + + for lang, sub_info in subtitles.items(): + sub_ext = sub_info['ext'] + if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + sub_langs.append(lang) + sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) + else: + if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': + webm_vtt_warn = True + self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + + if not sub_langs: + return [], information + + input_files = [filename] + sub_filenames + + opts = [ + '-map', '0', + '-c', 'copy', + # Don't copy the existing subtitles, we may be running the + # postprocessor a second time + '-map', '-0:s', + ] + if information['ext'] == 'mp4': + opts += ['-c:s', 'mov_text'] + for (i, lang) in enumerate(sub_langs): + opts.extend(['-map', '%d:0' % (i + 1)]) + lang_code = ISO639Utils.short2long(lang) + if lang_code is not None: + opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) + + temp_filename = prepend_extension(filename, 'temp') + self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename) + self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return sub_filenames, information + + +class FFmpegMetadataPP(FFmpegPostProcessor): + def run(self, info): + metadata = {} + + def add(meta_list, info_list=None): + if not info_list: + info_list = meta_list + if not isinstance(meta_list, (list, tuple)): + meta_list = (meta_list,) + if not isinstance(info_list, (list, tuple)): + info_list = (info_list,) + for info_f in info_list: + if info.get(info_f) is not None: + for meta_f in meta_list: + metadata[meta_f] = info[info_f] + break + + add('title', ('track', 'title')) + add('date', 'upload_date') + add(('description', 'comment'), 'description') + add('purl', 'webpage_url') + add('track', 'track_number') + add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) + add('genre') + add('album') + add('album_artist') + add('disc', 'disc_number') + + if not metadata: + self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + in_filenames = [filename] + options = [] + + if info['ext'] == 'm4a': + options.extend(['-vn', '-acodec', 'copy']) + else: + options.extend(['-c', 'copy']) + + for (name, value) in metadata.items(): + options.extend(['-metadata', '%s=%s' % (name, value)]) + + chapters = info.get('chapters', []) + if chapters: + metadata_filename = replace_extension(filename, 'meta') + with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + def ffmpeg_escape(text): + return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text) + + metadata_file_content = ';FFMETADATA1\n' + for chapter in chapters: + metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' + metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) + metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) + chapter_title = chapter.get('title') + if chapter_title: + metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) + f.write(metadata_file_content) + in_filenames.append(metadata_filename) + options.extend(['-map_metadata', '1']) + + self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename) + self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options) + if chapters: + os.remove(metadata_filename) + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return [], info + + +class FFmpegMergerPP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] + self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) + self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return info['__files_to_merge'], info + + def can_merge(self): + # TODO: figure out merge-capable ffmpeg version + if self.basename != 'avconv': + return True + + required_version = '10-0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, ' + 'youtube-dl will download single file media. ' + 'Update %s to version %s or newer to fix this.') % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + return False + return True + + +class FFmpegFixupStretchedPP(FFmpegPostProcessor): + def run(self, info): + stretched_ratio = info.get('stretched_ratio') + if stretched_ratio is None or stretched_ratio == 1: + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio] + self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return [], info + + +class FFmpegFixupM4aPP(FFmpegPostProcessor): + def run(self, info): + if info.get('container') != 'm4a_dash': + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4'] + self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return [], info + + +class FFmpegFixupM3u8PP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + if self.get_audio_codec(filename) == 'aac': + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return [], info + + +class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, format=None): + super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) + self.format = format + + def run(self, info): + subs = info.get('requested_subtitles') + filename = info['filepath'] + new_ext = self.format + new_format = new_ext + if new_format == 'vtt': + new_format = 'webvtt' + if subs is None: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') + return [], info + self._downloader.to_screen('[ffmpeg] Converting subtitles') + sub_filenames = [] + for lang, sub in subs.items(): + ext = sub['ext'] + if ext == new_ext: + self._downloader.to_screen( + '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext) + continue + old_file = subtitles_filename(filename, lang, ext) + sub_filenames.append(old_file) + new_file = subtitles_filename(filename, lang, new_ext) + + if ext in ('dfxp', 'ttml', 'tt'): + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = old_file + srt_file = subtitles_filename(filename, lang, 'srt') + + with open(dfxp_file, 'rb') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + old_file = srt_file + + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + else: + sub_filenames.append(srt_file) + + self.run_ffmpeg(old_file, new_file, ['-f', new_format]) + + with io.open(new_file, 'rt', encoding='utf-8') as f: + subs[lang] = { + 'ext': new_ext, + 'data': f.read(), + } + + return sub_filenames, info diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dl/postprocessor/metadatafromtitle.py new file mode 100644 index 0000000..f5c14d9 --- /dev/null +++ b/youtube_dl/postprocessor/metadatafromtitle.py @@ -0,0 +1,48 @@ +from __future__ import unicode_literals + +import re + +from .common import PostProcessor + + +class MetadataFromTitlePP(PostProcessor): + def __init__(self, downloader, titleformat): + super(MetadataFromTitlePP, self).__init__(downloader) + self._titleformat = titleformat + self._titleregex = (self.format_to_regex(titleformat) + if re.search(r'%\(\w+\)s', titleformat) + else titleformat) + + def format_to_regex(self, fmt): + r""" + Converts a string like + '%(title)s - %(artist)s' + to a regex like + '(?P<title>.+)\ \-\ (?P<artist>.+)' + """ + lastpos = 0 + regex = '' + # replace %(..)s with regex group and escape other string parts + for match in re.finditer(r'%\((\w+)\)s', fmt): + regex += re.escape(fmt[lastpos:match.start()]) + regex += r'(?P<' + match.group(1) + '>.+)' + lastpos = match.end() + if lastpos < len(fmt): + regex += re.escape(fmt[lastpos:]) + return regex + + def run(self, info): + title = info['title'] + match = re.match(self._titleregex, title) + if match is None: + self._downloader.to_screen( + '[fromtitle] Could not interpret title of video as "%s"' + % self._titleformat) + return [], info + for attribute, value in match.groupdict().items(): + info[attribute] = value + self._downloader.to_screen( + '[fromtitle] parsed %s: %s' + % (attribute, value if value is not None else 'NA')) + + return [], info diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dl/postprocessor/xattrpp.py new file mode 100644 index 0000000..b0aed9c --- /dev/null +++ b/youtube_dl/postprocessor/xattrpp.py @@ -0,0 +1,79 @@ +from __future__ import unicode_literals + +from .common import PostProcessor +from ..compat import compat_os_name +from ..utils import ( + hyphenate_date, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, +) + + +class XAttrMetadataPP(PostProcessor): + + # + # More info about extended attributes for media: + # http://freedesktop.org/wiki/CommonExtendedAttributes/ + # http://www.freedesktop.org/wiki/PhreedomDraft/ + # http://dublincore.org/documents/usageguide/elements.shtml + # + # TODO: + # * capture youtube keywords and put them in 'user.dublincore.subject' (comma-separated) + # * figure out which xattrs can be used for 'duration', 'thumbnail', 'resolution' + # + + def run(self, info): + """ Set extended attributes on downloaded file (if xattr support is found). """ + + # Write the metadata to the file's xattrs + self._downloader.to_screen('[metadata] Writing metadata to file\'s xattrs') + + filename = info['filepath'] + + try: + xattr_mapping = { + 'user.xdg.referrer.url': 'webpage_url', + # 'user.xdg.comment': 'description', + 'user.dublincore.title': 'title', + 'user.dublincore.date': 'upload_date', + 'user.dublincore.description': 'description', + 'user.dublincore.contributor': 'uploader', + 'user.dublincore.format': 'format', + } + + num_written = 0 + for xattrname, infoname in xattr_mapping.items(): + + value = info.get(infoname) + + if value: + if infoname == 'upload_date': + value = hyphenate_date(value) + + byte_value = value.encode('utf-8') + write_xattr(filename, xattrname, byte_value) + num_written += 1 + + return [], info + + except XAttrUnavailableError as e: + self._downloader.report_error(str(e)) + return [], info + + except XAttrMetadataError as e: + if e.reason == 'NO_SPACE': + self._downloader.report_warning( + 'There\'s no disk space left, disk quota exceeded or filesystem xattr limit exceeded. ' + + (('Some ' if num_written else '') + 'extended attributes are not written.').capitalize()) + elif e.reason == 'VALUE_TOO_LONG': + self._downloader.report_warning( + 'Unable to write extended attributes due to too long values.') + else: + msg = 'This filesystem doesn\'t support extended attributes. ' + if compat_os_name == 'nt': + msg += 'You need to use NTFS.' + else: + msg += '(You may have to enable them in your /etc/fstab)' + self._downloader.report_error(msg) + return [], info |