diff options
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r-- | youtube_dl/downloader/__init__.py | 61 | ||||
-rw-r--r-- | youtube_dl/downloader/common.py | 389 | ||||
-rw-r--r-- | youtube_dl/downloader/dash.py | 80 | ||||
-rw-r--r-- | youtube_dl/downloader/external.py | 354 | ||||
-rw-r--r-- | youtube_dl/downloader/f4m.py | 438 | ||||
-rw-r--r-- | youtube_dl/downloader/fragment.py | 268 | ||||
-rw-r--r-- | youtube_dl/downloader/hls.py | 204 | ||||
-rw-r--r-- | youtube_dl/downloader/http.py | 354 | ||||
-rw-r--r-- | youtube_dl/downloader/ism.py | 259 | ||||
-rw-r--r-- | youtube_dl/downloader/rtmp.py | 214 | ||||
-rw-r--r-- | youtube_dl/downloader/rtsp.py | 47 |
11 files changed, 2668 insertions, 0 deletions
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py new file mode 100644 index 0000000..2e485df --- /dev/null +++ b/youtube_dl/downloader/__init__.py @@ -0,0 +1,61 @@ +from __future__ import unicode_literals + +from .common import FileDownloader +from .f4m import F4mFD +from .hls import HlsFD +from .http import HttpFD +from .rtmp import RtmpFD +from .dash import DashSegmentsFD +from .rtsp import RtspFD +from .ism import IsmFD +from .external import ( + get_external_downloader, + FFmpegFD, +) + +from ..utils import ( + determine_protocol, +) + +PROTOCOL_MAP = { + 'rtmp': RtmpFD, + 'm3u8_native': HlsFD, + 'm3u8': FFmpegFD, + 'mms': RtspFD, + 'rtsp': RtspFD, + 'f4m': F4mFD, + 'http_dash_segments': DashSegmentsFD, + 'ism': IsmFD, +} + + +def get_suitable_downloader(info_dict, params={}): + """Get the downloader class that can handle the info dict.""" + protocol = determine_protocol(info_dict) + info_dict['protocol'] = protocol + + # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict): + # return FFmpegFD + + external_downloader = params.get('external_downloader') + if external_downloader is not None: + ed = get_external_downloader(external_downloader) + if ed.can_download(info_dict): + return ed + + if protocol.startswith('m3u8') and info_dict.get('is_live'): + return FFmpegFD + + if protocol == 'm3u8' and params.get('hls_prefer_native') is True: + return HlsFD + + if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False: + return FFmpegFD + + return PROTOCOL_MAP.get(protocol, HttpFD) + + +__all__ = [ + 'get_suitable_downloader', + 'FileDownloader', +] diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py new file mode 100644 index 0000000..5979833 --- /dev/null +++ b/youtube_dl/downloader/common.py @@ -0,0 +1,389 @@ +from __future__ import division, unicode_literals + +import os +import re +import sys +import time +import random + +from ..compat import compat_os_name +from ..utils import ( + decodeArgument, + encodeFilename, + error_to_compat_str, + format_bytes, + shell_quote, + timeconvert, +) + + +class FileDownloader(object): + """File Downloader class. + + File downloader objects are the ones responsible of downloading the + actual video file and writing it to disk. + + File downloaders accept a lot of parameters. In order not to saturate + the object constructor with arguments, it receives a dictionary of + options instead. + + Available options: + + verbose: Print additional info to stdout. + quiet: Do not print messages to stdout. + ratelimit: Download speed limit, in bytes/sec. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + test: Download only first bytes to test the downloader. + min_filesize: Skip files smaller than this size + max_filesize: Skip files larger than this size + xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. + external_downloader_args: A list of additional command-line arguments for the + external downloader. + hls_use_mpegts: Use the mpegts container for HLS videos. + http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be + useful for bypassing bandwidth throttling imposed by + a webserver (experimental) + + Subclasses of this one must re-define the real_download method. + """ + + _TEST_FILE_SIZE = 10241 + params = None + + def __init__(self, ydl, params): + """Create a FileDownloader object with the given options.""" + self.ydl = ydl + self._progress_hooks = [] + self.params = params + self.add_progress_hook(self.report_progress) + + @staticmethod + def format_seconds(seconds): + (mins, secs) = divmod(seconds, 60) + (hours, mins) = divmod(mins, 60) + if hours > 99: + return '--:--:--' + if hours == 0: + return '%02d:%02d' % (mins, secs) + else: + return '%02d:%02d:%02d' % (hours, mins, secs) + + @staticmethod + def calc_percent(byte_counter, data_len): + if data_len is None: + return None + return float(byte_counter) / float(data_len) * 100.0 + + @staticmethod + def format_percent(percent): + if percent is None: + return '---.-%' + return '%6s' % ('%3.1f%%' % percent) + + @staticmethod + def calc_eta(start, now, total, current): + if total is None: + return None + if now is None: + now = time.time() + dif = now - start + if current == 0 or dif < 0.001: # One millisecond + return None + rate = float(current) / dif + return int((float(total) - float(current)) / rate) + + @staticmethod + def format_eta(eta): + if eta is None: + return '--:--' + return FileDownloader.format_seconds(eta) + + @staticmethod + def calc_speed(start, now, bytes): + dif = now - start + if bytes == 0 or dif < 0.001: # One millisecond + return None + return float(bytes) / dif + + @staticmethod + def format_speed(speed): + if speed is None: + return '%10s' % '---b/s' + return '%10s' % ('%s/s' % format_bytes(speed)) + + @staticmethod + def format_retries(retries): + return 'inf' if retries == float('inf') else '%.0f' % retries + + @staticmethod + def best_block_size(elapsed_time, bytes): + new_min = max(bytes / 2.0, 1.0) + new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB + if elapsed_time < 0.001: + return int(new_max) + rate = bytes / elapsed_time + if rate > new_max: + return int(new_max) + if rate < new_min: + return int(new_min) + return int(rate) + + @staticmethod + def parse_bytes(bytestr): + """Parse a string indicating a byte quantity into an integer.""" + matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) + if matchobj is None: + return None + number = float(matchobj.group(1)) + multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) + return int(round(number * multiplier)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, **kargs) + + def to_stderr(self, message): + self.ydl.to_screen(message) + + def to_console_title(self, message): + self.ydl.to_console_title(message) + + def trouble(self, *args, **kargs): + self.ydl.trouble(*args, **kargs) + + def report_warning(self, *args, **kargs): + self.ydl.report_warning(*args, **kargs) + + def report_error(self, *args, **kargs): + self.ydl.report_error(*args, **kargs) + + def slow_down(self, start_time, now, byte_counter): + """Sleep if the download speed is over the rate limit.""" + rate_limit = self.params.get('ratelimit') + if rate_limit is None or byte_counter == 0: + return + if now is None: + now = time.time() + elapsed = now - start_time + if elapsed <= 0.0: + return + speed = float(byte_counter) / elapsed + if speed > rate_limit: + time.sleep(max((byte_counter // rate_limit) - elapsed, 0)) + + def temp_name(self, filename): + """Returns a temporary filename for the given filename.""" + if self.params.get('nopart', False) or filename == '-' or \ + (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): + return filename + return filename + '.part' + + def undo_temp_name(self, filename): + if filename.endswith('.part'): + return filename[:-len('.part')] + return filename + + def ytdl_filename(self, filename): + return filename + '.ytdl' + + def try_rename(self, old_filename, new_filename): + try: + if old_filename == new_filename: + return + os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) + except (IOError, OSError) as err: + self.report_error('unable to rename file: %s' % error_to_compat_str(err)) + + def try_utime(self, filename, last_modified_hdr): + """Try to set the last-modified time of the given file.""" + if last_modified_hdr is None: + return + if not os.path.isfile(encodeFilename(filename)): + return + timestr = last_modified_hdr + if timestr is None: + return + filetime = timeconvert(timestr) + if filetime is None: + return filetime + # Ignore obviously invalid dates + if filetime == 0: + return + try: + os.utime(filename, (time.time(), filetime)) + except Exception: + pass + return filetime + + def report_destination(self, filename): + """Report destination filename.""" + self.to_screen('[download] Destination: ' + filename) + + def _report_progress_status(self, msg, is_last_line=False): + fullmsg = '[download] ' + msg + if self.params.get('progress_with_newline', False): + self.to_screen(fullmsg) + else: + if compat_os_name == 'nt': + prev_len = getattr(self, '_report_progress_prev_line_length', + 0) + if prev_len > len(fullmsg): + fullmsg += ' ' * (prev_len - len(fullmsg)) + self._report_progress_prev_line_length = len(fullmsg) + clear_line = '\r' + else: + clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') + self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) + self.to_console_title('youtube-dl ' + msg) + + def report_progress(self, s): + if s['status'] == 'finished': + if self.params.get('noprogress', False): + self.to_screen('[download] Download completed') + else: + msg_template = '100%%' + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template += ' of %(_total_bytes_str)s' + if s.get('elapsed') is not None: + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template += ' in %(_elapsed_str)s' + self._report_progress_status( + msg_template % s, is_last_line=True) + + if self.params.get('noprogress'): + return + + if s['status'] != 'downloading': + return + + if s.get('eta') is not None: + s['_eta_str'] = self.format_eta(s['eta']) + else: + s['_eta_str'] = 'Unknown ETA' + + if s.get('total_bytes') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) + elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: + s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) + else: + if s.get('downloaded_bytes') == 0: + s['_percent_str'] = self.format_percent(0) + else: + s['_percent_str'] = 'Unknown %' + + if s.get('speed') is not None: + s['_speed_str'] = self.format_speed(s['speed']) + else: + s['_speed_str'] = 'Unknown speed' + + if s.get('total_bytes') is not None: + s['_total_bytes_str'] = format_bytes(s['total_bytes']) + msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' + elif s.get('total_bytes_estimate') is not None: + s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) + msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' + else: + if s.get('downloaded_bytes') is not None: + s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) + if s.get('elapsed'): + s['_elapsed_str'] = self.format_seconds(s['elapsed']) + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' + else: + msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' + else: + msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' + + self._report_progress_status(msg_template % s) + + def report_resuming_byte(self, resume_len): + """Report attempt to resume at given byte.""" + self.to_screen('[download] Resuming download at byte %s' % resume_len) + + def report_retry(self, err, count, retries): + """Report retry in case of HTTP error 5xx""" + self.to_screen( + '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...' + % (error_to_compat_str(err), count, self.format_retries(retries))) + + def report_file_already_downloaded(self, file_name): + """Report file has already been fully downloaded.""" + try: + self.to_screen('[download] %s has already been downloaded' % file_name) + except UnicodeEncodeError: + self.to_screen('[download] The file has already been downloaded') + + def report_unable_to_resume(self): + """Report it was impossible to resume download.""" + self.to_screen('[download] Unable to resume') + + def download(self, filename, info_dict): + """Download to a filename using the info from info_dict + Return True on success and False otherwise + """ + + nooverwrites_and_exists = ( + self.params.get('nooverwrites', False) and + os.path.exists(encodeFilename(filename)) + ) + + if not hasattr(filename, 'write'): + continuedl_and_exists = ( + self.params.get('continuedl', True) and + os.path.isfile(encodeFilename(filename)) and + not self.params.get('nopart', False) + ) + + # Check file already present + if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): + self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }) + return True + + min_sleep_interval = self.params.get('sleep_interval') + if min_sleep_interval: + max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) + sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) + self.to_screen( + '[download] Sleeping %s seconds...' % ( + int(sleep_interval) if sleep_interval.is_integer() + else '%.2f' % sleep_interval)) + time.sleep(sleep_interval) + + return self.real_download(filename, info_dict) + + def real_download(self, filename, info_dict): + """Real download process. Redefine in subclasses.""" + raise NotImplementedError('This method must be implemented by subclasses') + + def _hook_progress(self, status): + for ph in self._progress_hooks: + ph(status) + + def add_progress_hook(self, ph): + # See YoutubeDl.py (search for progress_hooks) for a description of + # this interface + self._progress_hooks.append(ph) + + def _debug_cmd(self, args, exe=None): + if not self.params.get('verbose', False): + return + + str_args = [decodeArgument(a) for a in args] + + if exe is None: + exe = os.path.basename(str_args[0]) + + self.to_screen('[debug] %s command line: %s' % ( + exe, shell_quote(str_args))) diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 0000000..eaa7adf --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,80 @@ +from __future__ import unicode_literals + +from .fragment import FragmentFD +from ..compat import compat_urllib_error +from ..utils import ( + DownloadError, + urljoin, +) + + +class DashSegmentsFD(FragmentFD): + """ + Download segments in a DASH manifest + """ + + FD_NAME = 'dashsegments' + + def real_download(self, filename, info_dict): + fragment_base_url = info_dict.get('fragment_base_url') + fragments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + + ctx = { + 'filename': filename, + 'total_frags': len(fragments), + } + + self._prepare_and_start_frag_download(ctx) + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + frag_index = 0 + for i, fragment in enumerate(fragments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + # In DASH, the first segment contains necessary headers to + # generate a valid MP4 file, so always abort for the first segment + fatal = i == 0 or not skip_unavailable_fragments + count = 0 + while count <= fragment_retries: + try: + fragment_url = fragment.get('url') + if not fragment_url: + assert fragment_base_url + fragment_url = urljoin(fragment_base_url, fragment['path']) + success, frag_content = self._download_fragment(ctx, fragment_url, info_dict) + if not success: + return False + self._append_fragment(ctx, frag_content) + break + except compat_urllib_error.HTTPError as err: + # YouTube may often return 404 HTTP error for a fragment causing the + # whole download to fail. However if the same fragment is immediately + # retried with the same request data this usually succeeds (1-2 attemps + # is usually enough) thus allowing to download the whole file successfully. + # To be future-proof we will retry all fragments that fail with any + # HTTP error. + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + except DownloadError: + # Don't retry fragment if error occurred during HTTP downloading + # itself since it has own retry settings + if not fatal: + self.report_skip_fragment(frag_index) + break + raise + + if count > fragment_retries: + if not fatal: + self.report_skip_fragment(frag_index) + continue + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py new file mode 100644 index 0000000..958d00a --- /dev/null +++ b/youtube_dl/downloader/external.py @@ -0,0 +1,354 @@ +from __future__ import unicode_literals + +import os.path +import re +import subprocess +import sys +import time + +from .common import FileDownloader +from ..compat import ( + compat_setenv, + compat_str, +) +from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS +from ..utils import ( + cli_option, + cli_valueless_option, + cli_bool_option, + cli_configuration_args, + encodeFilename, + encodeArgument, + handle_youtubedl_headers, + check_executable, + is_outdated_version, +) + + +class ExternalFD(FileDownloader): + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + try: + started = time.time() + retval = self._call_downloader(tmpfilename, info_dict) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + # Live stream downloading cancellation should be considered as + # correct and expected termination thus all postprocessing + # should take place + retval = 0 + self.to_screen('[%s] Interrupted by user' % self.get_basename()) + + if retval == 0: + status = { + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + } + if filename != '-': + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) + self.try_rename(tmpfilename, filename) + status.update({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + }) + self._hook_progress(status) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % ( + self.get_basename(), retval)) + return False + + @classmethod + def get_basename(cls): + return cls.__name__[:-2].lower() + + @property + def exe(self): + return self.params.get('external_downloader') + + @classmethod + def available(cls): + return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT]) + + @classmethod + def supports(cls, info_dict): + return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') + + @classmethod + def can_download(cls, info_dict): + return cls.available() and cls.supports(info_dict) + + def _option(self, command_option, param): + return cli_option(self.params, command_option, param) + + def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): + return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) + + def _valueless_option(self, command_option, param, expected_value=True): + return cli_valueless_option(self.params, command_option, param, expected_value) + + def _configuration_args(self, default=[]): + return cli_configuration_args(self.params, 'external_downloader_args', default) + + def _call_downloader(self, tmpfilename, info_dict): + """ Either overwrite this or implement _make_cmd """ + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) + + p = subprocess.Popen( + cmd, stderr=subprocess.PIPE) + _, stderr = p.communicate() + if p.returncode != 0: + self.to_stderr(stderr.decode('utf-8', 'replace')) + return p.returncode + + +class CurlFD(ExternalFD): + AVAILABLE_OPT = '-V' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '--location', '-o', tmpfilename] + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') + cmd += self._valueless_option('--silent', 'noprogress') + cmd += self._valueless_option('--verbose', 'verbose') + cmd += self._option('--limit-rate', 'ratelimit') + cmd += self._option('--retry', 'retries') + cmd += self._option('--max-filesize', 'max_filesize') + cmd += self._option('--interface', 'source_address') + cmd += self._option('--proxy', 'proxy') + cmd += self._valueless_option('--insecure', 'nocheckcertificate') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + def _call_downloader(self, tmpfilename, info_dict): + cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] + + self._debug_cmd(cmd) + + # curl writes the progress to stderr so don't capture it. + p = subprocess.Popen(cmd) + p.communicate() + return p.returncode + + +class AxelFD(ExternalFD): + AVAILABLE_OPT = '-V' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class WgetFD(ExternalFD): + AVAILABLE_OPT = '--version' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--bind-address', 'source_address') + cmd += self._option('--proxy', 'proxy') + cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class Aria2cFD(ExternalFD): + AVAILABLE_OPT = '-v' + + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-c'] + cmd += self._configuration_args([ + '--min-split-size', '1M', '--max-connection-per-server', '4']) + dn = os.path.dirname(tmpfilename) + if dn: + cmd += ['--dir', dn] + cmd += ['--out', os.path.basename(tmpfilename)] + for key, val in info_dict['http_headers'].items(): + cmd += ['--header', '%s: %s' % (key, val)] + cmd += self._option('--interface', 'source_address') + cmd += self._option('--all-proxy', 'proxy') + cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') + cmd += ['--', info_dict['url']] + return cmd + + +class HttpieFD(ExternalFD): + @classmethod + def available(cls): + return check_executable('http', ['--version']) + + def _make_cmd(self, tmpfilename, info_dict): + cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] + for key, val in info_dict['http_headers'].items(): + cmd += ['%s:%s' % (key, val)] + return cmd + + +class FFmpegFD(ExternalFD): + @classmethod + def supports(cls, info_dict): + return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms') + + @classmethod + def available(cls): + return FFmpegPostProcessor().available + + def _call_downloader(self, tmpfilename, info_dict): + url = info_dict['url'] + ffpp = FFmpegPostProcessor(downloader=self) + if not ffpp.available: + self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') + return False + ffpp.check_version() + + args = [ffpp.executable, '-y'] + + for log_level in ('quiet', 'verbose'): + if self.params.get(log_level, False): + args += ['-loglevel', log_level] + break + + seekable = info_dict.get('_seekable') + if seekable is not None: + # setting -seekable prevents ffmpeg from guessing if the server + # supports seeking(by adding the header `Range: bytes=0-`), which + # can cause problems in some cases + # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127 + # http://trac.ffmpeg.org/ticket/6125#comment:10 + args += ['-seekable', '1' if seekable else '0'] + + args += self._configuration_args() + + # start_time = info_dict.get('start_time') or 0 + # if start_time: + # args += ['-ss', compat_str(start_time)] + # end_time = info_dict.get('end_time') + # if end_time: + # args += ['-t', compat_str(end_time - start_time)] + + if info_dict['http_headers'] and re.match(r'^https?://', url): + # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: + # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. + headers = handle_youtubedl_headers(info_dict['http_headers']) + args += [ + '-headers', + ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + + if proxy.startswith('socks'): + self.report_warning( + '%s does not support SOCKS proxies. Downloading is likely to fail. ' + 'Consider adding --hls-prefer-native to your command.' % self.get_basename()) + + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + compat_setenv('HTTP_PROXY', proxy, env=env) + compat_setenv('http_proxy', proxy, env=env) + + protocol = info_dict.get('protocol') + + if protocol == 'rtmp': + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + if player_url is not None: + args += ['-rtmp_swfverify', player_url] + if page_url is not None: + args += ['-rtmp_pageurl', page_url] + if app is not None: + args += ['-rtmp_app', app] + if play_path is not None: + args += ['-rtmp_playpath', play_path] + if tc_url is not None: + args += ['-rtmp_tcurl', tc_url] + if flash_version is not None: + args += ['-rtmp_flashver', flash_version] + if live: + args += ['-rtmp_live', 'live'] + + args += ['-i', url, '-c', 'copy'] + + if self.params.get('test', False): + args += ['-fs', compat_str(self._TEST_FILE_SIZE)] + + if protocol in ('m3u8', 'm3u8_native'): + if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': + args += ['-f', 'mpegts'] + else: + args += ['-f', 'mp4'] + if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): + args += ['-bsf:a', 'aac_adtstoasc'] + elif protocol == 'rtmp': + args += ['-f', 'flv'] + else: + args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])] + + args = [encodeArgument(opt) for opt in args] + args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) + + self._debug_cmd(args) + + proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) + try: + retval = proc.wait() + except KeyboardInterrupt: + # subprocces.run would send the SIGKILL signal to ffmpeg and the + # mp4 file couldn't be played, but if we ask ffmpeg to quit it + # produces a file that is playable (this is mostly useful for live + # streams). Note that Windows is not affected and produces playable + # files (see https://github.com/rg3/youtube-dl/issues/8300). + if sys.platform != 'win32': + proc.communicate(b'q') + raise + return retval + + +class AVconvFD(FFmpegFD): + pass + + +_BY_NAME = dict( + (klass.get_basename(), klass) + for name, klass in globals().items() + if name.endswith('FD') and name != 'ExternalFD' +) + + +def list_external_downloaders(): + return sorted(_BY_NAME.keys()) + + +def get_external_downloader(external_downloader): + """ Given the name of the executable, see whether we support the given + downloader . """ + # Drop .exe extension on Windows + bn = os.path.splitext(os.path.basename(external_downloader))[0] + return _BY_NAME[bn] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py new file mode 100644 index 0000000..15e71be --- /dev/null +++ b/youtube_dl/downloader/f4m.py @@ -0,0 +1,438 @@ +from __future__ import division, unicode_literals + +import io +import itertools +import time + +from .fragment import FragmentFD +from ..compat import ( + compat_b64decode, + compat_etree_fromstring, + compat_urlparse, + compat_urllib_error, + compat_urllib_parse_urlparse, + compat_struct_pack, + compat_struct_unpack, +) +from ..utils import ( + fix_xml_ampersands, + xpath_text, +) + + +class DataTruncatedError(Exception): + pass + + +class FlvReader(io.BytesIO): + """ + Reader for Flv files + The file format is documented in https://www.adobe.com/devnet/f4v.html + """ + + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + + # Utility functions for reading numbers and strings + def read_unsigned_long_long(self): + return compat_struct_unpack('!Q', self.read_bytes(8))[0] + + def read_unsigned_int(self): + return compat_struct_unpack('!I', self.read_bytes(4))[0] + + def read_unsigned_char(self): + return compat_struct_unpack('!B', self.read_bytes(1))[0] + + def read_string(self): + res = b'' + while True: + char = self.read_bytes(1) + if char == b'\x00': + break + res += char + return res + + def read_box_info(self): + """ + Read a box and return the info as a tuple: (box_size, box_type, box_data) + """ + real_size = size = self.read_unsigned_int() + box_type = self.read_bytes(4) + header_end = 8 + if size == 1: + real_size = self.read_unsigned_long_long() + header_end = 16 + return real_size, box_type, self.read_bytes(real_size - header_end) + + def read_asrt(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + quality_entry_count = self.read_unsigned_char() + # QualityEntryCount + for i in range(quality_entry_count): + self.read_string() + + segment_run_count = self.read_unsigned_int() + segments = [] + for i in range(segment_run_count): + first_segment = self.read_unsigned_int() + fragments_per_segment = self.read_unsigned_int() + segments.append((first_segment, fragments_per_segment)) + + return { + 'segment_run': segments, + } + + def read_afrt(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + # time scale + self.read_unsigned_int() + + quality_entry_count = self.read_unsigned_char() + # QualitySegmentUrlModifiers + for i in range(quality_entry_count): + self.read_string() + + fragments_count = self.read_unsigned_int() + fragments = [] + for i in range(fragments_count): + first = self.read_unsigned_int() + first_ts = self.read_unsigned_long_long() + duration = self.read_unsigned_int() + if duration == 0: + discontinuity_indicator = self.read_unsigned_char() + else: + discontinuity_indicator = None + fragments.append({ + 'first': first, + 'ts': first_ts, + 'duration': duration, + 'discontinuity_indicator': discontinuity_indicator, + }) + + return { + 'fragments': fragments, + } + + def read_abst(self): + # version + self.read_unsigned_char() + # flags + self.read_bytes(3) + + self.read_unsigned_int() # BootstrapinfoVersion + # Profile,Live,Update,Reserved + flags = self.read_unsigned_char() + live = flags & 0x20 != 0 + # time scale + self.read_unsigned_int() + # CurrentMediaTime + self.read_unsigned_long_long() + # SmpteTimeCodeOffset + self.read_unsigned_long_long() + + self.read_string() # MovieIdentifier + server_count = self.read_unsigned_char() + # ServerEntryTable + for i in range(server_count): + self.read_string() + quality_count = self.read_unsigned_char() + # QualityEntryTable + for i in range(quality_count): + self.read_string() + # DrmData + self.read_string() + # MetaData + self.read_string() + + segments_count = self.read_unsigned_char() + segments = [] + for i in range(segments_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'asrt' + segment = FlvReader(box_data).read_asrt() + segments.append(segment) + fragments_run_count = self.read_unsigned_char() + fragments = [] + for i in range(fragments_run_count): + box_size, box_type, box_data = self.read_box_info() + assert box_type == b'afrt' + fragments.append(FlvReader(box_data).read_afrt()) + + return { + 'segments': segments, + 'fragments': fragments, + 'live': live, + } + + def read_bootstrap_info(self): + total_size, box_type, box_data = self.read_box_info() + assert box_type == b'abst' + return FlvReader(box_data).read_abst() + + +def read_bootstrap_info(bootstrap_bytes): + return FlvReader(bootstrap_bytes).read_bootstrap_info() + + +def build_fragments_list(boot_info): + """ Return a list of (segment, fragment) for each fragment in the video """ + res = [] + segment_run_table = boot_info['segments'][0] + fragment_run_entry_table = boot_info['fragments'][0]['fragments'] + first_frag_number = fragment_run_entry_table[0]['first'] + fragments_counter = itertools.count(first_frag_number) + for segment, fragments_count in segment_run_table['segment_run']: + # In some live HDS streams (for example Rai), `fragments_count` is + # abnormal and causing out-of-memory errors. It's OK to change the + # number of fragments for live streams as they are updated periodically + if fragments_count == 4294967295 and boot_info['live']: + fragments_count = 2 + for _ in range(fragments_count): + res.append((segment, next(fragments_counter))) + + if boot_info['live']: + res = res[-2:] + + return res + + +def write_unsigned_int(stream, val): + stream.write(compat_struct_pack('!I', val)) + + +def write_unsigned_int_24(stream, val): + stream.write(compat_struct_pack('!I', val)[1:]) + + +def write_flv_header(stream): + """Writes the FLV header to stream""" + # FLV header + stream.write(b'FLV\x01') + stream.write(b'\x05') + stream.write(b'\x00\x00\x00\x09') + stream.write(b'\x00\x00\x00\x00') + + +def write_metadata_tag(stream, metadata): + """Writes optional metadata tag to stream""" + SCRIPT_TAG = b'\x12' + FLV_TAG_HEADER_LEN = 11 + + if metadata: + stream.write(SCRIPT_TAG) + write_unsigned_int_24(stream, len(metadata)) + stream.write(b'\x00\x00\x00\x00\x00\x00\x00') + stream.write(metadata) + write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) + + +def remove_encrypted_media(media): + return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and + 'drmAdditionalHeaderSetId' not in e.attrib, + media)) + + +def _add_ns(prop, ver=1): + return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) + + +def get_base_url(manifest): + base_url = xpath_text( + manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() + return base_url + + +class F4mFD(FragmentFD): + """ + A downloader for f4m manifests or AdobeHDS. + """ + + FD_NAME = 'f4m' + + def _get_unencrypted_media(self, doc): + media = doc.findall(_add_ns('media')) + if not media: + self.report_error('No media found') + for e in (doc.findall(_add_ns('drmAdditionalHeader')) + + doc.findall(_add_ns('drmAdditionalHeaderSet'))): + # If id attribute is missing it's valid for all media nodes + # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute + if 'id' not in e.attrib: + self.report_error('Missing ID in f4m DRM') + media = remove_encrypted_media(media) + if not media: + self.report_error('Unsupported DRM') + return media + + def _get_bootstrap_from_url(self, bootstrap_url): + bootstrap = self.ydl.urlopen(bootstrap_url).read() + return read_bootstrap_info(bootstrap) + + def _update_live_fragments(self, bootstrap_url, latest_fragment): + fragments_list = [] + retries = 30 + while (not fragments_list) and (retries > 0): + boot_info = self._get_bootstrap_from_url(bootstrap_url) + fragments_list = build_fragments_list(boot_info) + fragments_list = [f for f in fragments_list if f[1] > latest_fragment] + if not fragments_list: + # Retry after a while + time.sleep(5.0) + retries -= 1 + + if not fragments_list: + self.report_error('Failed to update fragments') + + return fragments_list + + def _parse_bootstrap_node(self, node, base_url): + # Sometimes non empty inline bootstrap info can be specified along + # with bootstrap url attribute (e.g. dummy inline bootstrap info + # contains whitespace characters in [1]). We will prefer bootstrap + # url over inline bootstrap info when present. + # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m + bootstrap_url = node.get('url') + if bootstrap_url: + bootstrap_url = compat_urlparse.urljoin( + base_url, bootstrap_url) + boot_info = self._get_bootstrap_from_url(bootstrap_url) + else: + bootstrap_url = None + bootstrap = compat_b64decode(node.text) + boot_info = read_bootstrap_info(bootstrap) + return boot_info, bootstrap_url + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + requested_bitrate = info_dict.get('tbr') + self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + # Some manifests may be malformed, e.g. prosiebensat1 generated manifests + # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244 + # and https://github.com/rg3/youtube-dl/issues/7823) + manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() + + doc = compat_etree_fromstring(manifest) + formats = [(int(f.attrib.get('bitrate', -1)), f) + for f in self._get_unencrypted_media(doc)] + if requested_bitrate is None or len(formats) == 1: + # get the best format + formats = sorted(formats, key=lambda f: f[0]) + rate, media = formats[-1] + else: + rate, media = list(filter( + lambda f: int(f[0]) == requested_bitrate, formats))[0] + + # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. + man_base_url = get_base_url(doc) or man_url + + base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url']) + bootstrap_node = doc.find(_add_ns('bootstrapInfo')) + boot_info, bootstrap_url = self._parse_bootstrap_node( + bootstrap_node, man_base_url) + live = boot_info['live'] + metadata_node = media.find(_add_ns('metadata')) + if metadata_node is not None: + metadata = compat_b64decode(metadata_node.text) + else: + metadata = None + + fragments_list = build_fragments_list(boot_info) + test = self.params.get('test', False) + if test: + # We only download the first fragment + fragments_list = fragments_list[:1] + total_frags = len(fragments_list) + # For some akamai manifests we'll need to add a query to the fragment url + akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) + + ctx = { + 'filename': filename, + 'total_frags': total_frags, + 'live': live, + } + + self._prepare_frag_download(ctx) + + dest_stream = ctx['dest_stream'] + + if ctx['complete_frags_downloaded_bytes'] == 0: + write_flv_header(dest_stream) + if not live: + write_metadata_tag(dest_stream, metadata) + + base_url_parsed = compat_urllib_parse_urlparse(base_url) + + self._start_frag_download(ctx) + + frag_index = 0 + while fragments_list: + seg_i, frag_i = fragments_list.pop(0) + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + name = 'Seg%d-Frag%d' % (seg_i, frag_i) + query = [] + if base_url_parsed.query: + query.append(base_url_parsed.query) + if akamai_pv: + query.append(akamai_pv.strip(';')) + if info_dict.get('extra_param_to_segment_url'): + query.append(info_dict['extra_param_to_segment_url']) + url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) + try: + success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) + if not success: + return False + reader = FlvReader(down_data) + while True: + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/rg3/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise + if box_type == b'mdat': + self._append_fragment(ctx, box_data) + break + except (compat_urllib_error.HTTPError, ) as err: + if live and (err.code == 404 or err.code == 410): + # We didn't keep up with the live window. Continue + # with the next available fragment. + msg = 'Fragment %d unavailable' % frag_i + self.report_warning(msg) + fragments_list = [] + else: + raise + + if not fragments_list and not test and live and bootstrap_url: + fragments_list = self._update_live_fragments(bootstrap_url, frag_i) + total_frags += len(fragments_list) + if fragments_list and (fragments_list[0][1] > frag_i + 1): + msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) + self.report_warning(msg) + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py new file mode 100644 index 0000000..917f6dc --- /dev/null +++ b/youtube_dl/downloader/fragment.py @@ -0,0 +1,268 @@ +from __future__ import division, unicode_literals + +import os +import time +import json + +from .common import FileDownloader +from .http import HttpFD +from ..utils import ( + error_to_compat_str, + encodeFilename, + sanitize_open, + sanitized_Request, +) + + +class HttpQuietDownloader(HttpFD): + def to_screen(self, *args, **kargs): + pass + + +class FragmentFD(FileDownloader): + """ + A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). + + Available options: + + fragment_retries: Number of times to retry a fragment for HTTP error (DASH + and hlsnative only) + skip_unavailable_fragments: + Skip unavailable fragments (DASH and hlsnative only) + keep_fragments: Keep downloaded fragments on disk after downloading is + finished + + For each incomplete fragment download youtube-dl keeps on disk a special + bookkeeping file with download state and metadata (in future such files will + be used for any incomplete download handled by youtube-dl). This file is + used to properly handle resuming, check download file consistency and detect + potential errors. The file has a .ytdl extension and represents a standard + JSON file of the following format: + + extractor: + Dictionary of extractor related data. TBD. + + downloader: + Dictionary of downloader related data. May contain following data: + current_fragment: + Dictionary with current (being downloaded) fragment data: + index: 0-based index of current fragment among all fragments + fragment_count: + Total count of fragments + + This feature is experimental and file format may change in future. + """ + + def report_retry_fragment(self, err, frag_index, count, retries): + self.to_screen( + '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' + % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) + + def report_skip_fragment(self, frag_index): + self.to_screen('[download] Skipping fragment %d...' % frag_index) + + def _prepare_url(self, info_dict, url): + headers = info_dict.get('http_headers') + return sanitized_Request(url, None, headers) if headers else url + + def _prepare_and_start_frag_download(self, ctx): + self._prepare_frag_download(ctx) + self._start_frag_download(ctx) + + @staticmethod + def __do_ytdl_file(ctx): + return not ctx['live'] and not ctx['tmpfilename'] == '-' + + def _read_ytdl_file(self, ctx): + assert 'ytdl_corrupt' not in ctx + stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') + try: + ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] + except Exception: + ctx['ytdl_corrupt'] = True + finally: + stream.close() + + def _write_ytdl_file(self, ctx): + frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + frag_index_stream.close() + + def _download_fragment(self, ctx, frag_url, info_dict, headers=None): + fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) + success = ctx['dl'].download(fragment_filename, { + 'url': frag_url, + 'http_headers': headers or info_dict.get('http_headers'), + }) + if not success: + return False, None + down, frag_sanitized = sanitize_open(fragment_filename, 'rb') + ctx['fragment_filename_sanitized'] = frag_sanitized + frag_content = down.read() + down.close() + return True, frag_content + + def _append_fragment(self, ctx, frag_content): + try: + ctx['dest_stream'].write(frag_content) + ctx['dest_stream'].flush() + finally: + if self.__do_ytdl_file(ctx): + self._write_ytdl_file(ctx) + if not self.params.get('keep_fragments', False): + os.remove(encodeFilename(ctx['fragment_filename_sanitized'])) + del ctx['fragment_filename_sanitized'] + + def _prepare_frag_download(self, ctx): + if 'live' not in ctx: + ctx['live'] = False + if not ctx['live']: + total_frags_str = '%d' % ctx['total_frags'] + ad_frags = ctx.get('ad_frags', 0) + if ad_frags: + total_frags_str += ' (not including %d ad)' % ad_frags + else: + total_frags_str = 'unknown (live)' + self.to_screen( + '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str)) + self.report_destination(ctx['filename']) + dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit'), + 'retries': self.params.get('retries', 0), + 'nopart': self.params.get('nopart', False), + 'test': self.params.get('test', False), + } + ) + tmpfilename = self.temp_name(ctx['filename']) + open_mode = 'wb' + resume_len = 0 + + # Establish possible resume length + if os.path.isfile(encodeFilename(tmpfilename)): + open_mode = 'ab' + resume_len = os.path.getsize(encodeFilename(tmpfilename)) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + if self.__do_ytdl_file(ctx): + if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): + self._read_ytdl_file(ctx) + is_corrupt = ctx.get('ytdl_corrupt') is True + is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 + if is_corrupt or is_inconsistent: + message = ( + '.ytdl file is corrupt' if is_corrupt else + 'Inconsistent state of incomplete fragment download') + self.report_warning( + '%s. Restarting from the beginning...' % message) + ctx['fragment_index'] = resume_len = 0 + if 'ytdl_corrupt' in ctx: + del ctx['ytdl_corrupt'] + self._write_ytdl_file(ctx) + else: + self._write_ytdl_file(ctx) + assert ctx['fragment_index'] == 0 + + dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) + + ctx.update({ + 'dl': dl, + 'dest_stream': dest_stream, + 'tmpfilename': tmpfilename, + # Total complete fragments downloaded so far in bytes + 'complete_frags_downloaded_bytes': resume_len, + }) + + def _start_frag_download(self, ctx): + total_frags = ctx['total_frags'] + # This dict stores the download progress, it's updated by the progress + # hook + state = { + 'status': 'downloading', + 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], + 'fragment_index': ctx['fragment_index'], + 'fragment_count': total_frags, + 'filename': ctx['filename'], + 'tmpfilename': ctx['tmpfilename'], + } + + start = time.time() + ctx.update({ + 'started': start, + # Amount of fragment's bytes downloaded by the time of the previous + # frag progress hook invocation + 'prev_frag_downloaded_bytes': 0, + }) + + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + time_now = time.time() + state['elapsed'] = time_now - start + frag_total_bytes = s.get('total_bytes') or 0 + if not ctx['live']: + estimated_size = ( + (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / + (state['fragment_index'] + 1) * total_frags) + state['total_bytes_estimate'] = estimated_size + + if s['status'] == 'finished': + state['fragment_index'] += 1 + ctx['fragment_index'] = state['fragment_index'] + state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] + ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] + ctx['prev_frag_downloaded_bytes'] = 0 + else: + frag_downloaded_bytes = s['downloaded_bytes'] + state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] + if not ctx['live']: + state['eta'] = self.calc_eta( + start, time_now, estimated_size, + state['downloaded_bytes']) + state['speed'] = s.get('speed') or ctx.get('speed') + ctx['speed'] = state['speed'] + ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes + self._hook_progress(state) + + ctx['dl'].add_progress_hook(frag_progress_hook) + + return start + + def _finish_frag_download(self, ctx): + ctx['dest_stream'].close() + if self.__do_ytdl_file(ctx): + ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) + if os.path.isfile(ytdl_filename): + os.remove(ytdl_filename) + elapsed = time.time() - ctx['started'] + + if ctx['tmpfilename'] == '-': + downloaded_bytes = ctx['complete_frags_downloaded_bytes'] + else: + self.try_rename(ctx['tmpfilename'], ctx['filename']) + downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) + + self._hook_progress({ + 'downloaded_bytes': downloaded_bytes, + 'total_bytes': downloaded_bytes, + 'filename': ctx['filename'], + 'status': 'finished', + 'elapsed': elapsed, + }) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py new file mode 100644 index 0000000..fd30452 --- /dev/null +++ b/youtube_dl/downloader/hls.py @@ -0,0 +1,204 @@ +from __future__ import unicode_literals + +import re +import binascii +try: + from Crypto.Cipher import AES + can_decrypt_frag = True +except ImportError: + can_decrypt_frag = False + +from .fragment import FragmentFD +from .external import FFmpegFD + +from ..compat import ( + compat_urllib_error, + compat_urlparse, + compat_struct_pack, +) +from ..utils import ( + parse_m3u8_attributes, + update_url_query, +) + + +class HlsFD(FragmentFD): + """ A limited implementation that does not require ffmpeg """ + + FD_NAME = 'hlsnative' + + @staticmethod + def can_download(manifest, info_dict): + UNSUPPORTED_FEATURES = ( + r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1] + # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # # event media playlists [4] + + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 + # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 + ) + check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] + is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest + check_results.append(can_decrypt_frag or not is_aes128_enc) + check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest)) + check_results.append(not info_dict.get('is_live')) + return all(check_results) + + def real_download(self, filename, info_dict): + man_url = info_dict['url'] + self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + + urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) + man_url = urlh.geturl() + s = urlh.read().decode('utf-8', 'ignore') + + if not self.can_download(s, info_dict): + if info_dict.get('extra_param_to_segment_url'): + self.report_error('pycrypto not found. Please install it.') + return False + self.report_warning( + 'hlsnative has detected features it does not support, ' + 'extraction will be delegated to ffmpeg') + fd = FFmpegFD(self.ydl, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + + def is_ad_fragment(s): + return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or + s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) + + media_frags = 0 + ad_frags = 0 + ad_frag_next = False + for line in s.splitlines(): + line = line.strip() + if not line: + continue + if line.startswith('#'): + if is_ad_fragment(line): + ad_frags += 1 + ad_frag_next = True + continue + if ad_frag_next: + ad_frag_next = False + continue + media_frags += 1 + + ctx = { + 'filename': filename, + 'total_frags': media_frags, + 'ad_frags': ad_frags, + } + + self._prepare_and_start_frag_download(ctx) + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + test = self.params.get('test', False) + + extra_query = None + extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') + if extra_param_to_segment_url: + extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url) + i = 0 + media_sequence = 0 + decrypt_info = {'METHOD': 'NONE'} + byte_range = {} + frag_index = 0 + ad_frag_next = False + for line in s.splitlines(): + line = line.strip() + if line: + if not line.startswith('#'): + if ad_frag_next: + ad_frag_next = False + continue + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + frag_url = ( + line + if re.match(r'^https?://', line) + else compat_urlparse.urljoin(man_url, line)) + if extra_query: + frag_url = update_url_query(frag_url, extra_query) + count = 0 + headers = info_dict.get('http_headers', {}) + if byte_range: + headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) + while count <= fragment_retries: + try: + success, frag_content = self._download_fragment( + ctx, frag_url, info_dict, headers) + if not success: + return False + break + except compat_urllib_error.HTTPError as err: + # Unavailable (possibly temporary) fragments may be served. + # First we try to retry then either skip or abort. + # See https://github.com/rg3/youtube-dl/issues/10165, + # https://github.com/rg3/youtube-dl/issues/10448). + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + i += 1 + media_sequence += 1 + self.report_skip_fragment(frag_index) + continue + self.report_error( + 'giving up after %s fragment retries' % fragment_retries) + return False + if decrypt_info['METHOD'] == 'AES-128': + iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) + decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen( + self._prepare_url(info_dict, decrypt_info['URI'])).read() + frag_content = AES.new( + decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) + self._append_fragment(ctx, frag_content) + # We only download the first fragment during the test + if test: + break + i += 1 + media_sequence += 1 + elif line.startswith('#EXT-X-KEY'): + decrypt_url = decrypt_info.get('URI') + decrypt_info = parse_m3u8_attributes(line[11:]) + if decrypt_info['METHOD'] == 'AES-128': + if 'IV' in decrypt_info: + decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) + if not re.match(r'^https?://', decrypt_info['URI']): + decrypt_info['URI'] = compat_urlparse.urljoin( + man_url, decrypt_info['URI']) + if extra_query: + decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None + elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): + media_sequence = int(line[22:]) + elif line.startswith('#EXT-X-BYTERANGE'): + splitted_byte_range = line[17:].split('@') + sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] + byte_range = { + 'start': sub_range_start, + 'end': sub_range_start + int(splitted_byte_range[0]), + } + elif is_ad_fragment(line): + ad_frag_next = True + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py new file mode 100644 index 0000000..5b1e960 --- /dev/null +++ b/youtube_dl/downloader/http.py @@ -0,0 +1,354 @@ +from __future__ import unicode_literals + +import errno +import os +import socket +import time +import random +import re + +from .common import FileDownloader +from ..compat import ( + compat_str, + compat_urllib_error, +) +from ..utils import ( + ContentTooShortError, + encodeFilename, + int_or_none, + sanitize_open, + sanitized_Request, + write_xattr, + XAttrMetadataError, + XAttrUnavailableError, +) + + +class HttpFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + + class DownloadContext(dict): + __getattr__ = dict.get + __setattr__ = dict.__setitem__ + __delattr__ = dict.__delitem__ + + ctx = DownloadContext() + ctx.filename = filename + ctx.tmpfilename = self.temp_name(filename) + ctx.stream = None + + # Do not include the Accept-Encoding header + headers = {'Youtubedl-no-compression': 'True'} + add_headers = info_dict.get('http_headers') + if add_headers: + headers.update(add_headers) + + is_test = self.params.get('test', False) + chunk_size = self._TEST_FILE_SIZE if is_test else ( + info_dict.get('downloader_options', {}).get('http_chunk_size') or + self.params.get('http_chunk_size') or 0) + + ctx.open_mode = 'wb' + ctx.resume_len = 0 + ctx.data_len = None + ctx.block_size = self.params.get('buffersize', 1024) + ctx.start_time = time.time() + ctx.chunk_size = None + + if self.params.get('continuedl', True): + # Establish possible resume length + if os.path.isfile(encodeFilename(ctx.tmpfilename)): + ctx.resume_len = os.path.getsize( + encodeFilename(ctx.tmpfilename)) + + ctx.is_resume = ctx.resume_len > 0 + + count = 0 + retries = self.params.get('retries', 0) + + class SucceedDownload(Exception): + pass + + class RetryDownload(Exception): + def __init__(self, source_error): + self.source_error = source_error + + class NextFragment(Exception): + pass + + def set_range(req, start, end): + range_header = 'bytes=%d-' % start + if end: + range_header += compat_str(end) + req.add_header('Range', range_header) + + def establish_connection(): + ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) + if not is_test and chunk_size else chunk_size) + if ctx.resume_len > 0: + range_start = ctx.resume_len + if ctx.is_resume: + self.report_resuming_byte(ctx.resume_len) + ctx.open_mode = 'ab' + elif ctx.chunk_size > 0: + range_start = 0 + else: + range_start = None + ctx.is_resume = False + range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None + if range_end and ctx.data_len is not None and range_end >= ctx.data_len: + range_end = ctx.data_len - 1 + has_range = range_start is not None + ctx.has_range = has_range + request = sanitized_Request(url, None, headers) + if has_range: + set_range(request, range_start, range_end) + # Establish connection + try: + ctx.data = self.ydl.urlopen(request) + # When trying to resume, Content-Range HTTP header of response has to be checked + # to match the value of requested Range HTTP header. This is due to a webservers + # that don't support resuming and serve a whole file with no Content-Range + # set in response despite of requested Range (see + # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) + if has_range: + content_range = ctx.data.headers.get('Content-Range') + if content_range: + content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) + # Content-Range is present and matches requested Range, resume is possible + if content_range_m: + if range_start == int(content_range_m.group(1)): + content_range_end = int_or_none(content_range_m.group(2)) + content_len = int_or_none(content_range_m.group(3)) + accept_content_len = ( + # Non-chunked download + not ctx.chunk_size or + # Chunked download and requested piece or + # its part is promised to be served + content_range_end == range_end or + content_len < range_end) + if accept_content_len: + ctx.data_len = content_len + return + # Content-Range is either not present or invalid. Assuming remote webserver is + # trying to send the whole file, resume is not possible, so wiping the local file + # and performing entire redownload + self.report_unable_to_resume() + ctx.resume_len = 0 + ctx.open_mode = 'wb' + ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None)) + return + except (compat_urllib_error.HTTPError, ) as err: + if err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + ctx.data = self.ydl.urlopen( + sanitized_Request(url, None, headers)) + content_length = ctx.data.info()['Content-Length'] + except (compat_urllib_error.HTTPError, ) as err: + if err.code < 500 or err.code >= 600: + raise + else: + # Examine the reported length + if (content_length is not None and + (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(ctx.filename) + self.try_rename(ctx.tmpfilename, ctx.filename) + self._hook_progress({ + 'filename': ctx.filename, + 'status': 'finished', + 'downloaded_bytes': ctx.resume_len, + 'total_bytes': ctx.resume_len, + }) + raise SucceedDownload() + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + ctx.resume_len = 0 + ctx.open_mode = 'wb' + return + elif err.code < 500 or err.code >= 600: + # Unexpected HTTP error + raise + raise RetryDownload(err) + except socket.error as err: + if err.errno != errno.ECONNRESET: + # Connection reset is no problem, just retry + raise + raise RetryDownload(err) + + def download(): + data_len = ctx.data.info().get('Content-length', None) + + # Range HTTP header may be ignored/unsupported by a webserver + # (e.g. extractor/scivee.py, extractor/bambuser.py). + # However, for a test we still would like to download just a piece of a file. + # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control + # block size when downloading a file. + if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): + data_len = self._TEST_FILE_SIZE + + if data_len is not None: + data_len = int(data_len) + ctx.resume_len + min_data_len = self.params.get('min_filesize') + max_data_len = self.params.get('max_filesize') + if min_data_len is not None and data_len < min_data_len: + self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) + return False + if max_data_len is not None and data_len > max_data_len: + self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) + return False + + byte_counter = 0 + ctx.resume_len + block_size = ctx.block_size + start = time.time() + + # measure time over whole while-loop, so slow_down() and best_block_size() work together properly + now = None # needed for slow_down() in the first loop run + before = start # start measuring + + def retry(e): + to_stdout = ctx.tmpfilename == '-' + if not to_stdout: + ctx.stream.close() + ctx.stream = None + ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename)) + raise RetryDownload(e) + + while True: + try: + # Download and write + data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) + # socket.timeout is a subclass of socket.error but may not have + # errno set + except socket.timeout as e: + retry(e) + except socket.error as e: + if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT): + raise + retry(e) + + byte_counter += len(data_block) + + # exit loop when download is finished + if len(data_block) == 0: + break + + # Open destination file just in time + if ctx.stream is None: + try: + ctx.stream, ctx.tmpfilename = sanitize_open( + ctx.tmpfilename, ctx.open_mode) + assert ctx.stream is not None + ctx.filename = self.undo_temp_name(ctx.tmpfilename) + self.report_destination(ctx.filename) + except (OSError, IOError) as err: + self.report_error('unable to open for writing: %s' % str(err)) + return False + + if self.params.get('xattr_set_filesize', False) and data_len is not None: + try: + write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8')) + except (XAttrUnavailableError, XAttrMetadataError) as err: + self.report_error('unable to set filesize xattr: %s' % str(err)) + + try: + ctx.stream.write(data_block) + except (IOError, OSError) as err: + self.to_stderr('\n') + self.report_error('unable to write data: %s' % str(err)) + return False + + # Apply rate limit + self.slow_down(start, now, byte_counter - ctx.resume_len) + + # end measuring of one loop run + now = time.time() + after = now + + # Adjust block size + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) + + before = after + + # Progress message + speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) + if ctx.data_len is None: + eta = None + else: + eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) + + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': byte_counter, + 'total_bytes': ctx.data_len, + 'tmpfilename': ctx.tmpfilename, + 'filename': ctx.filename, + 'eta': eta, + 'speed': speed, + 'elapsed': now - ctx.start_time, + }) + + if is_test and byte_counter == data_len: + break + + if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len: + ctx.resume_len = byte_counter + # ctx.block_size = block_size + raise NextFragment() + + if ctx.stream is None: + self.to_stderr('\n') + self.report_error('Did not get any data blocks') + return False + if ctx.tmpfilename != '-': + ctx.stream.close() + + if data_len is not None and byte_counter != data_len: + err = ContentTooShortError(byte_counter, int(data_len)) + if count <= retries: + retry(err) + raise err + + self.try_rename(ctx.tmpfilename, ctx.filename) + + # Update file modification time + if self.params.get('updatetime', True): + info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None)) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': ctx.filename, + 'status': 'finished', + 'elapsed': time.time() - ctx.start_time, + }) + + return True + + while count <= retries: + try: + establish_connection() + return download() + except RetryDownload as e: + count += 1 + if count <= retries: + self.report_retry(e.source_error, count, retries) + continue + except NextFragment: + continue + except SucceedDownload: + return True + + self.report_error('giving up after %s retries' % retries) + return False diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py new file mode 100644 index 0000000..063fcf4 --- /dev/null +++ b/youtube_dl/downloader/ism.py @@ -0,0 +1,259 @@ +from __future__ import unicode_literals + +import time +import binascii +import io + +from .fragment import FragmentFD +from ..compat import ( + compat_Struct, + compat_urllib_error, +) + + +u8 = compat_Struct('>B') +u88 = compat_Struct('>Bx') +u16 = compat_Struct('>H') +u1616 = compat_Struct('>Hxx') +u32 = compat_Struct('>I') +u64 = compat_Struct('>Q') + +s88 = compat_Struct('>bx') +s16 = compat_Struct('>h') +s1616 = compat_Struct('>hxx') +s32 = compat_Struct('>i') + +unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) + +TRACK_ENABLED = 0x1 +TRACK_IN_MOVIE = 0x2 +TRACK_IN_PREVIEW = 0x4 + +SELF_CONTAINED = 0x1 + + +def box(box_type, payload): + return u32.pack(8 + len(payload)) + box_type + payload + + +def full_box(box_type, version, flags, payload): + return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) + + +def write_piff_header(stream, params): + track_id = params['track_id'] + fourcc = params['fourcc'] + duration = params['duration'] + timescale = params.get('timescale', 10000000) + language = params.get('language', 'und') + height = params.get('height', 0) + width = params.get('width', 0) + is_audio = width == 0 and height == 0 + creation_time = modification_time = int(time.time()) + + ftyp_payload = b'isml' # major brand + ftyp_payload += u32.pack(1) # minor version + ftyp_payload += b'piff' + b'iso2' # compatible brands + stream.write(box(b'ftyp', ftyp_payload)) # File Type Box + + mvhd_payload = u64.pack(creation_time) + mvhd_payload += u64.pack(modification_time) + mvhd_payload += u32.pack(timescale) + mvhd_payload += u64.pack(duration) + mvhd_payload += s1616.pack(1) # rate + mvhd_payload += s88.pack(1) # volume + mvhd_payload += u16.pack(0) # reserved + mvhd_payload += u32.pack(0) * 2 # reserved + mvhd_payload += unity_matrix + mvhd_payload += u32.pack(0) * 6 # pre defined + mvhd_payload += u32.pack(0xffffffff) # next track id + moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box + + tkhd_payload = u64.pack(creation_time) + tkhd_payload += u64.pack(modification_time) + tkhd_payload += u32.pack(track_id) # track id + tkhd_payload += u32.pack(0) # reserved + tkhd_payload += u64.pack(duration) + tkhd_payload += u32.pack(0) * 2 # reserved + tkhd_payload += s16.pack(0) # layer + tkhd_payload += s16.pack(0) # alternate group + tkhd_payload += s88.pack(1 if is_audio else 0) # volume + tkhd_payload += u16.pack(0) # reserved + tkhd_payload += unity_matrix + tkhd_payload += u1616.pack(width) + tkhd_payload += u1616.pack(height) + trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box + + mdhd_payload = u64.pack(creation_time) + mdhd_payload += u64.pack(modification_time) + mdhd_payload += u32.pack(timescale) + mdhd_payload += u64.pack(duration) + mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) + mdhd_payload += u16.pack(0) # pre defined + mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box + + hdlr_payload = u32.pack(0) # pre defined + hdlr_payload += b'soun' if is_audio else b'vide' # handler type + hdlr_payload += u32.pack(0) * 3 # reserved + hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name + mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box + + if is_audio: + smhd_payload = s88.pack(0) # balance + smhd_payload += u16.pack(0) # reserved + media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header + else: + vmhd_payload = u16.pack(0) # graphics mode + vmhd_payload += u16.pack(0) * 3 # opcolor + media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header + minf_payload = media_header_box + + dref_payload = u32.pack(1) # entry count + dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box + dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box + minf_payload += box(b'dinf', dinf_payload) # Data Information Box + + stsd_payload = u32.pack(1) # entry count + + sample_entry_payload = u8.pack(0) * 6 # reserved + sample_entry_payload += u16.pack(1) # data reference index + if is_audio: + sample_entry_payload += u32.pack(0) * 2 # reserved + sample_entry_payload += u16.pack(params.get('channels', 2)) + sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u1616.pack(params['sampling_rate']) + + if fourcc == 'AACL': + sample_entry_box = box(b'mp4a', sample_entry_payload) + else: + sample_entry_payload += u16.pack(0) # pre defined + sample_entry_payload += u16.pack(0) # reserved + sample_entry_payload += u32.pack(0) * 3 # pre defined + sample_entry_payload += u16.pack(width) + sample_entry_payload += u16.pack(height) + sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi + sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi + sample_entry_payload += u32.pack(0) # reserved + sample_entry_payload += u16.pack(1) # frame count + sample_entry_payload += u8.pack(0) * 32 # compressor name + sample_entry_payload += u16.pack(0x18) # depth + sample_entry_payload += s16.pack(-1) # pre defined + + codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8')) + if fourcc in ('H264', 'AVC1'): + sps, pps = codec_private_data.split(u32.pack(1))[1:] + avcc_payload = u8.pack(1) # configuration version + avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication + avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one + avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) + avcc_payload += u16.pack(len(sps)) + avcc_payload += sps + avcc_payload += u8.pack(1) # number of pps + avcc_payload += u16.pack(len(pps)) + avcc_payload += pps + sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record + sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry + stsd_payload += sample_entry_box + + stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box + + stts_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box + + stsc_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box + + stco_payload = u32.pack(0) # entry count + stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box + + minf_payload += box(b'stbl', stbl_payload) # Sample Table Box + + mdia_payload += box(b'minf', minf_payload) # Media Information Box + + trak_payload += box(b'mdia', mdia_payload) # Media Box + + moov_payload += box(b'trak', trak_payload) # Track Box + + mehd_payload = u64.pack(duration) + mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box + + trex_payload = u32.pack(track_id) # track id + trex_payload += u32.pack(1) # default sample description index + trex_payload += u32.pack(0) # default sample duration + trex_payload += u32.pack(0) # default sample size + trex_payload += u32.pack(0) # default sample flags + mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box + + moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box + stream.write(box(b'moov', moov_payload)) # Movie Box + + +def extract_box_data(data, box_sequence): + data_reader = io.BytesIO(data) + while True: + box_size = u32.unpack(data_reader.read(4))[0] + box_type = data_reader.read(4) + if box_type == box_sequence[0]: + box_data = data_reader.read(box_size - 8) + if len(box_sequence) == 1: + return box_data + return extract_box_data(box_data, box_sequence[1:]) + data_reader.seek(box_size - 8, 1) + + +class IsmFD(FragmentFD): + """ + Download segments in a ISM manifest + """ + + FD_NAME = 'ism' + + def real_download(self, filename, info_dict): + segments = info_dict['fragments'][:1] if self.params.get( + 'test', False) else info_dict['fragments'] + + ctx = { + 'filename': filename, + 'total_frags': len(segments), + } + + self._prepare_and_start_frag_download(ctx) + + fragment_retries = self.params.get('fragment_retries', 0) + skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) + + track_written = False + frag_index = 0 + for i, segment in enumerate(segments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue + count = 0 + while count <= fragment_retries: + try: + success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) + if not success: + return False + if not track_written: + tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) + info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] + write_piff_header(ctx['dest_stream'], info_dict['_download_params']) + track_written = True + self._append_fragment(ctx, frag_content) + break + except compat_urllib_error.HTTPError as err: + count += 1 + if count <= fragment_retries: + self.report_retry_fragment(err, frag_index, count, fragment_retries) + if count > fragment_retries: + if skip_unavailable_fragments: + self.report_skip_fragment(frag_index) + continue + self.report_error('giving up after %s fragment retries' % fragment_retries) + return False + + self._finish_frag_download(ctx) + + return True diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py new file mode 100644 index 0000000..fbb7f51 --- /dev/null +++ b/youtube_dl/downloader/rtmp.py @@ -0,0 +1,214 @@ +from __future__ import unicode_literals + +import os +import re +import subprocess +import time + +from .common import FileDownloader +from ..compat import compat_str +from ..utils import ( + check_executable, + encodeFilename, + encodeArgument, + get_exe_version, +) + + +def rtmpdump_version(): + return get_exe_version( + 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') + + +class RtmpFD(FileDownloader): + def real_download(self, filename, info_dict): + def run_rtmpdump(args): + start = time.time() + resume_percent = None + resume_downloaded_data_len = None + proc = subprocess.Popen(args, stderr=subprocess.PIPE) + cursor_in_new_line = True + proc_stderr_closed = False + try: + while not proc_stderr_closed: + # read line from stderr + line = '' + while True: + char = proc.stderr.read(1) + if not char: + proc_stderr_closed = True + break + if char in [b'\r', b'\n']: + break + line += char.decode('ascii', 'replace') + if not line: + # proc_stderr_closed is True + continue + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1)) * 1024) + percent = float(mobj.group(2)) + if not resume_percent: + resume_percent = percent + resume_downloaded_data_len = downloaded_data_len + time_now = time.time() + eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) + speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) + data_len = None + if percent > 0: + data_len = int(downloaded_data_len * 100 / percent) + self._hook_progress({ + 'status': 'downloading', + 'downloaded_bytes': downloaded_data_len, + 'total_bytes_estimate': data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'eta': eta, + 'elapsed': time_now - start, + 'speed': speed, + }) + cursor_in_new_line = False + else: + # no percent for live streams + mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) + if mobj: + downloaded_data_len = int(float(mobj.group(1)) * 1024) + time_now = time.time() + speed = self.calc_speed(start, time_now, downloaded_data_len) + self._hook_progress({ + 'downloaded_bytes': downloaded_data_len, + 'tmpfilename': tmpfilename, + 'filename': filename, + 'status': 'downloading', + 'elapsed': time_now - start, + 'speed': speed, + }) + cursor_in_new_line = False + elif self.params.get('verbose', False): + if not cursor_in_new_line: + self.to_screen('') + cursor_in_new_line = True + self.to_screen('[rtmpdump] ' + line) + finally: + proc.wait() + if not cursor_in_new_line: + self.to_screen('') + return proc.returncode + + url = info_dict['url'] + player_url = info_dict.get('player_url') + page_url = info_dict.get('page_url') + app = info_dict.get('app') + play_path = info_dict.get('play_path') + tc_url = info_dict.get('tc_url') + flash_version = info_dict.get('flash_version') + live = info_dict.get('rtmp_live', False) + conn = info_dict.get('rtmp_conn') + protocol = info_dict.get('rtmp_protocol') + real_time = info_dict.get('rtmp_real_time', False) + no_resume = info_dict.get('no_resume', False) + continue_dl = self.params.get('continuedl', True) + + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + test = self.params.get('test', False) + + # Check for rtmpdump first + if not check_executable('rtmpdump', ['-h']): + self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.') + return False + + # Download using rtmpdump. rtmpdump returns exit code 2 when + # the connection was interrupted and resuming appears to be + # possible. This is part of rtmpdump's normal usage, AFAIK. + basic_args = [ + 'rtmpdump', '--verbose', '-r', url, + '-o', tmpfilename] + if player_url is not None: + basic_args += ['--swfVfy', player_url] + if page_url is not None: + basic_args += ['--pageUrl', page_url] + if app is not None: + basic_args += ['--app', app] + if play_path is not None: + basic_args += ['--playpath', play_path] + if tc_url is not None: + basic_args += ['--tcUrl', tc_url] + if test: + basic_args += ['--stop', '1'] + if flash_version is not None: + basic_args += ['--flashVer', flash_version] + if live: + basic_args += ['--live'] + if isinstance(conn, list): + for entry in conn: + basic_args += ['--conn', entry] + elif isinstance(conn, compat_str): + basic_args += ['--conn', conn] + if protocol is not None: + basic_args += ['--protocol', protocol] + if real_time: + basic_args += ['--realtime'] + + args = basic_args + if not no_resume and continue_dl and not live: + args += ['--resume'] + if not live and continue_dl: + args += ['--skip', '1'] + + args = [encodeArgument(a) for a in args] + + self._debug_cmd(args, exe='rtmpdump') + + RD_SUCCESS = 0 + RD_FAILED = 1 + RD_INCOMPLETE = 2 + RD_NO_CONNECT = 3 + + started = time.time() + + try: + retval = run_rtmpdump(args) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + retval = RD_SUCCESS + self.to_screen('\n[rtmpdump] Interrupted by user') + + if retval == RD_NO_CONNECT: + self.report_error('[rtmpdump] Could not connect to RTMP server.') + return False + + while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: + prevsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize) + time.sleep(5.0) # This seems to be needed + args = basic_args + ['--resume'] + if retval == RD_FAILED: + args += ['--skip', '1'] + args = [encodeArgument(a) for a in args] + retval = run_rtmpdump(args) + cursize = os.path.getsize(encodeFilename(tmpfilename)) + if prevsize == cursize and retval == RD_FAILED: + break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: + self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = RD_SUCCESS + break + if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + 'elapsed': time.time() - started, + }) + return True + else: + self.to_stderr('\n') + self.report_error('rtmpdump exited with code %d' % retval) + return False diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py new file mode 100644 index 0000000..939358b --- /dev/null +++ b/youtube_dl/downloader/rtsp.py @@ -0,0 +1,47 @@ +from __future__ import unicode_literals + +import os +import subprocess + +from .common import FileDownloader +from ..utils import ( + check_executable, + encodeFilename, +) + + +class RtspFD(FileDownloader): + def real_download(self, filename, info_dict): + url = info_dict['url'] + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + + if check_executable('mplayer', ['-h']): + args = [ + 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', + '-dumpstream', '-dumpfile', tmpfilename, url] + elif check_executable('mpv', ['-h']): + args = [ + 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] + else: + self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.') + return False + + self._debug_cmd(args) + + retval = subprocess.call(args) + if retval == 0: + fsize = os.path.getsize(encodeFilename(tmpfilename)) + self.to_screen('\r[%s] %s bytes' % (args[0], fsize)) + self.try_rename(tmpfilename, filename) + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': filename, + 'status': 'finished', + }) + return True + else: + self.to_stderr('\n') + self.report_error('%s exited with code %d' % (args[0], retval)) + return False |