aboutsummaryrefslogtreecommitdiffstats
path: root/youtube_dl/downloader
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-12-19 21:33:54 -0800
committerJames Taylor <user234683@users.noreply.github.com>2019-12-19 21:33:54 -0800
commitb4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch)
tree4de0082ac9eb26a05188dd424835ea50b1483113 /youtube_dl/downloader
parentb614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff)
parent6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff)
downloadyt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip
Merge branch 'modular-data-extract'
Commits in this branch are prefixed with "Extraction:" This branch refactors data extraction. All such functionality has been moved to the yt_data_extract module. Responses from requests are given to the module and it parses them into a consistent, more useful format. The dependency on youtube-dl has also been dropped and this functionality has been built from scratch for these reasons: (1) I've noticed youtube-dl breaks more often than invidious (which uses watch page extraction built from scratch) in response to changes from Youtube, so I'm hoping what I wrote will also be less brittle. (2) Such breakage is inconvenient because I have to manually merge the fixes since I had to make changes to youtube-dl to make it do things such as extracting related videos. (3) I have no control over error handling and request pooling with youtube-dl, since it does all the requests (these would require intrusive changes I don't want to maintain). (4) I will now be able to finally display the number of comments and whether comments are disabled without making additional requests.
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r--youtube_dl/downloader/__init__.py61
-rw-r--r--youtube_dl/downloader/common.py389
-rw-r--r--youtube_dl/downloader/dash.py80
-rw-r--r--youtube_dl/downloader/external.py354
-rw-r--r--youtube_dl/downloader/f4m.py438
-rw-r--r--youtube_dl/downloader/fragment.py268
-rw-r--r--youtube_dl/downloader/hls.py204
-rw-r--r--youtube_dl/downloader/http.py354
-rw-r--r--youtube_dl/downloader/ism.py259
-rw-r--r--youtube_dl/downloader/rtmp.py214
-rw-r--r--youtube_dl/downloader/rtsp.py47
11 files changed, 0 insertions, 2668 deletions
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
deleted file mode 100644
index 2e485df..0000000
--- a/youtube_dl/downloader/__init__.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import FileDownloader
-from .f4m import F4mFD
-from .hls import HlsFD
-from .http import HttpFD
-from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
-from .rtsp import RtspFD
-from .ism import IsmFD
-from .external import (
- get_external_downloader,
- FFmpegFD,
-)
-
-from ..utils import (
- determine_protocol,
-)
-
-PROTOCOL_MAP = {
- 'rtmp': RtmpFD,
- 'm3u8_native': HlsFD,
- 'm3u8': FFmpegFD,
- 'mms': RtspFD,
- 'rtsp': RtspFD,
- 'f4m': F4mFD,
- 'http_dash_segments': DashSegmentsFD,
- 'ism': IsmFD,
-}
-
-
-def get_suitable_downloader(info_dict, params={}):
- """Get the downloader class that can handle the info dict."""
- protocol = determine_protocol(info_dict)
- info_dict['protocol'] = protocol
-
- # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
- # return FFmpegFD
-
- external_downloader = params.get('external_downloader')
- if external_downloader is not None:
- ed = get_external_downloader(external_downloader)
- if ed.can_download(info_dict):
- return ed
-
- if protocol.startswith('m3u8') and info_dict.get('is_live'):
- return FFmpegFD
-
- if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
- return HlsFD
-
- if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
- return FFmpegFD
-
- return PROTOCOL_MAP.get(protocol, HttpFD)
-
-
-__all__ = [
- 'get_suitable_downloader',
- 'FileDownloader',
-]
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
deleted file mode 100644
index 5979833..0000000
--- a/youtube_dl/downloader/common.py
+++ /dev/null
@@ -1,389 +0,0 @@
-from __future__ import division, unicode_literals
-
-import os
-import re
-import sys
-import time
-import random
-
-from ..compat import compat_os_name
-from ..utils import (
- decodeArgument,
- encodeFilename,
- error_to_compat_str,
- format_bytes,
- shell_quote,
- timeconvert,
-)
-
-
-class FileDownloader(object):
- """File Downloader class.
-
- File downloader objects are the ones responsible of downloading the
- actual video file and writing it to disk.
-
- File downloaders accept a lot of parameters. In order not to saturate
- the object constructor with arguments, it receives a dictionary of
- options instead.
-
- Available options:
-
- verbose: Print additional info to stdout.
- quiet: Do not print messages to stdout.
- ratelimit: Download speed limit, in bytes/sec.
- retries: Number of times to retry for HTTP error 5xx
- buffersize: Size of download buffer in bytes.
- noresizebuffer: Do not automatically resize the download buffer.
- continuedl: Try to continue downloads if possible.
- noprogress: Do not print the progress bar.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
- nopart: Do not use temporary .part files.
- updatetime: Use the Last-modified header to set output file timestamps.
- test: Download only first bytes to test the downloader.
- min_filesize: Skip files smaller than this size
- max_filesize: Skip files larger than this size
- xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- external_downloader_args: A list of additional command-line arguments for the
- external downloader.
- hls_use_mpegts: Use the mpegts container for HLS videos.
- http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
- useful for bypassing bandwidth throttling imposed by
- a webserver (experimental)
-
- Subclasses of this one must re-define the real_download method.
- """
-
- _TEST_FILE_SIZE = 10241
- params = None
-
- def __init__(self, ydl, params):
- """Create a FileDownloader object with the given options."""
- self.ydl = ydl
- self._progress_hooks = []
- self.params = params
- self.add_progress_hook(self.report_progress)
-
- @staticmethod
- def format_seconds(seconds):
- (mins, secs) = divmod(seconds, 60)
- (hours, mins) = divmod(mins, 60)
- if hours > 99:
- return '--:--:--'
- if hours == 0:
- return '%02d:%02d' % (mins, secs)
- else:
- return '%02d:%02d:%02d' % (hours, mins, secs)
-
- @staticmethod
- def calc_percent(byte_counter, data_len):
- if data_len is None:
- return None
- return float(byte_counter) / float(data_len) * 100.0
-
- @staticmethod
- def format_percent(percent):
- if percent is None:
- return '---.-%'
- return '%6s' % ('%3.1f%%' % percent)
-
- @staticmethod
- def calc_eta(start, now, total, current):
- if total is None:
- return None
- if now is None:
- now = time.time()
- dif = now - start
- if current == 0 or dif < 0.001: # One millisecond
- return None
- rate = float(current) / dif
- return int((float(total) - float(current)) / rate)
-
- @staticmethod
- def format_eta(eta):
- if eta is None:
- return '--:--'
- return FileDownloader.format_seconds(eta)
-
- @staticmethod
- def calc_speed(start, now, bytes):
- dif = now - start
- if bytes == 0 or dif < 0.001: # One millisecond
- return None
- return float(bytes) / dif
-
- @staticmethod
- def format_speed(speed):
- if speed is None:
- return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % format_bytes(speed))
-
- @staticmethod
- def format_retries(retries):
- return 'inf' if retries == float('inf') else '%.0f' % retries
-
- @staticmethod
- def best_block_size(elapsed_time, bytes):
- new_min = max(bytes / 2.0, 1.0)
- new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
- if elapsed_time < 0.001:
- return int(new_max)
- rate = bytes / elapsed_time
- if rate > new_max:
- return int(new_max)
- if rate < new_min:
- return int(new_min)
- return int(rate)
-
- @staticmethod
- def parse_bytes(bytestr):
- """Parse a string indicating a byte quantity into an integer."""
- matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
- if matchobj is None:
- return None
- number = float(matchobj.group(1))
- multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
- return int(round(number * multiplier))
-
- def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, **kargs)
-
- def to_stderr(self, message):
- self.ydl.to_screen(message)
-
- def to_console_title(self, message):
- self.ydl.to_console_title(message)
-
- def trouble(self, *args, **kargs):
- self.ydl.trouble(*args, **kargs)
-
- def report_warning(self, *args, **kargs):
- self.ydl.report_warning(*args, **kargs)
-
- def report_error(self, *args, **kargs):
- self.ydl.report_error(*args, **kargs)
-
- def slow_down(self, start_time, now, byte_counter):
- """Sleep if the download speed is over the rate limit."""
- rate_limit = self.params.get('ratelimit')
- if rate_limit is None or byte_counter == 0:
- return
- if now is None:
- now = time.time()
- elapsed = now - start_time
- if elapsed <= 0.0:
- return
- speed = float(byte_counter) / elapsed
- if speed > rate_limit:
- time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
-
- def temp_name(self, filename):
- """Returns a temporary filename for the given filename."""
- if self.params.get('nopart', False) or filename == '-' or \
- (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
- return filename
- return filename + '.part'
-
- def undo_temp_name(self, filename):
- if filename.endswith('.part'):
- return filename[:-len('.part')]
- return filename
-
- def ytdl_filename(self, filename):
- return filename + '.ytdl'
-
- def try_rename(self, old_filename, new_filename):
- try:
- if old_filename == new_filename:
- return
- os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
- except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % error_to_compat_str(err))
-
- def try_utime(self, filename, last_modified_hdr):
- """Try to set the last-modified time of the given file."""
- if last_modified_hdr is None:
- return
- if not os.path.isfile(encodeFilename(filename)):
- return
- timestr = last_modified_hdr
- if timestr is None:
- return
- filetime = timeconvert(timestr)
- if filetime is None:
- return filetime
- # Ignore obviously invalid dates
- if filetime == 0:
- return
- try:
- os.utime(filename, (time.time(), filetime))
- except Exception:
- pass
- return filetime
-
- def report_destination(self, filename):
- """Report destination filename."""
- self.to_screen('[download] Destination: ' + filename)
-
- def _report_progress_status(self, msg, is_last_line=False):
- fullmsg = '[download] ' + msg
- if self.params.get('progress_with_newline', False):
- self.to_screen(fullmsg)
- else:
- if compat_os_name == 'nt':
- prev_len = getattr(self, '_report_progress_prev_line_length',
- 0)
- if prev_len > len(fullmsg):
- fullmsg += ' ' * (prev_len - len(fullmsg))
- self._report_progress_prev_line_length = len(fullmsg)
- clear_line = '\r'
- else:
- clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
- self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
- self.to_console_title('youtube-dl ' + msg)
-
- def report_progress(self, s):
- if s['status'] == 'finished':
- if self.params.get('noprogress', False):
- self.to_screen('[download] Download completed')
- else:
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- self._report_progress_status(
- msg_template % s, is_last_line=True)
-
- if self.params.get('noprogress'):
- return
-
- if s['status'] != 'downloading':
- return
-
- if s.get('eta') is not None:
- s['_eta_str'] = self.format_eta(s['eta'])
- else:
- s['_eta_str'] = 'Unknown ETA'
-
- if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
- elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
- else:
- if s.get('downloaded_bytes') == 0:
- s['_percent_str'] = self.format_percent(0)
- else:
- s['_percent_str'] = 'Unknown %'
-
- if s.get('speed') is not None:
- s['_speed_str'] = self.format_speed(s['speed'])
- else:
- s['_speed_str'] = 'Unknown speed'
-
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
- elif s.get('total_bytes_estimate') is not None:
- s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
- msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
- else:
- if s.get('downloaded_bytes') is not None:
- s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
- if s.get('elapsed'):
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
- else:
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
- else:
- msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
-
- self._report_progress_status(msg_template % s)
-
- def report_resuming_byte(self, resume_len):
- """Report attempt to resume at given byte."""
- self.to_screen('[download] Resuming download at byte %s' % resume_len)
-
- def report_retry(self, err, count, retries):
- """Report retry in case of HTTP error 5xx"""
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
- % (error_to_compat_str(err), count, self.format_retries(retries)))
-
- def report_file_already_downloaded(self, file_name):
- """Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
-
- def report_unable_to_resume(self):
- """Report it was impossible to resume download."""
- self.to_screen('[download] Unable to resume')
-
- def download(self, filename, info_dict):
- """Download to a filename using the info from info_dict
- Return True on success and False otherwise
- """
-
- nooverwrites_and_exists = (
- self.params.get('nooverwrites', False) and
- os.path.exists(encodeFilename(filename))
- )
-
- if not hasattr(filename, 'write'):
- continuedl_and_exists = (
- self.params.get('continuedl', True) and
- os.path.isfile(encodeFilename(filename)) and
- not self.params.get('nopart', False)
- )
-
- # Check file already present
- if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
- self.report_file_already_downloaded(filename)
- self._hook_progress({
- 'filename': filename,
- 'status': 'finished',
- 'total_bytes': os.path.getsize(encodeFilename(filename)),
- })
- return True
-
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
- return self.real_download(filename, info_dict)
-
- def real_download(self, filename, info_dict):
- """Real download process. Redefine in subclasses."""
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def _hook_progress(self, status):
- for ph in self._progress_hooks:
- ph(status)
-
- def add_progress_hook(self, ph):
- # See YoutubeDl.py (search for progress_hooks) for a description of
- # this interface
- self._progress_hooks.append(ph)
-
- def _debug_cmd(self, args, exe=None):
- if not self.params.get('verbose', False):
- return
-
- str_args = [decodeArgument(a) for a in args]
-
- if exe is None:
- exe = os.path.basename(str_args[0])
-
- self.to_screen('[debug] %s command line: %s' % (
- exe, shell_quote(str_args)))
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
deleted file mode 100644
index eaa7adf..0000000
--- a/youtube_dl/downloader/dash.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import unicode_literals
-
-from .fragment import FragmentFD
-from ..compat import compat_urllib_error
-from ..utils import (
- DownloadError,
- urljoin,
-)
-
-
-class DashSegmentsFD(FragmentFD):
- """
- Download segments in a DASH manifest
- """
-
- FD_NAME = 'dashsegments'
-
- def real_download(self, filename, info_dict):
- fragment_base_url = info_dict.get('fragment_base_url')
- fragments = info_dict['fragments'][:1] if self.params.get(
- 'test', False) else info_dict['fragments']
-
- ctx = {
- 'filename': filename,
- 'total_frags': len(fragments),
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
-
- frag_index = 0
- for i, fragment in enumerate(fragments):
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- # In DASH, the first segment contains necessary headers to
- # generate a valid MP4 file, so always abort for the first segment
- fatal = i == 0 or not skip_unavailable_fragments
- count = 0
- while count <= fragment_retries:
- try:
- fragment_url = fragment.get('url')
- if not fragment_url:
- assert fragment_base_url
- fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
- if not success:
- return False
- self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- # YouTube may often return 404 HTTP error for a fragment causing the
- # whole download to fail. However if the same fragment is immediately
- # retried with the same request data this usually succeeds (1-2 attemps
- # is usually enough) thus allowing to download the whole file successfully.
- # To be future-proof we will retry all fragments that fail with any
- # HTTP error.
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- except DownloadError:
- # Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- self.report_skip_fragment(frag_index)
- break
- raise
-
- if count > fragment_retries:
- if not fatal:
- self.report_skip_fragment(frag_index)
- continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
deleted file mode 100644
index 958d00a..0000000
--- a/youtube_dl/downloader/external.py
+++ /dev/null
@@ -1,354 +0,0 @@
-from __future__ import unicode_literals
-
-import os.path
-import re
-import subprocess
-import sys
-import time
-
-from .common import FileDownloader
-from ..compat import (
- compat_setenv,
- compat_str,
-)
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
-from ..utils import (
- cli_option,
- cli_valueless_option,
- cli_bool_option,
- cli_configuration_args,
- encodeFilename,
- encodeArgument,
- handle_youtubedl_headers,
- check_executable,
- is_outdated_version,
-)
-
-
-class ExternalFD(FileDownloader):
- def real_download(self, filename, info_dict):
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
-
- try:
- started = time.time()
- retval = self._call_downloader(tmpfilename, info_dict)
- except KeyboardInterrupt:
- if not info_dict.get('is_live'):
- raise
- # Live stream downloading cancellation should be considered as
- # correct and expected termination thus all postprocessing
- # should take place
- retval = 0
- self.to_screen('[%s] Interrupted by user' % self.get_basename())
-
- if retval == 0:
- status = {
- 'filename': filename,
- 'status': 'finished',
- 'elapsed': time.time() - started,
- }
- if filename != '-':
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
- self.try_rename(tmpfilename, filename)
- status.update({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- })
- self._hook_progress(status)
- return True
- else:
- self.to_stderr('\n')
- self.report_error('%s exited with code %d' % (
- self.get_basename(), retval))
- return False
-
- @classmethod
- def get_basename(cls):
- return cls.__name__[:-2].lower()
-
- @property
- def exe(self):
- return self.params.get('external_downloader')
-
- @classmethod
- def available(cls):
- return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
-
- @classmethod
- def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
-
- @classmethod
- def can_download(cls, info_dict):
- return cls.available() and cls.supports(info_dict)
-
- def _option(self, command_option, param):
- return cli_option(self.params, command_option, param)
-
- def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
- return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
-
- def _valueless_option(self, command_option, param, expected_value=True):
- return cli_valueless_option(self.params, command_option, param, expected_value)
-
- def _configuration_args(self, default=[]):
- return cli_configuration_args(self.params, 'external_downloader_args', default)
-
- def _call_downloader(self, tmpfilename, info_dict):
- """ Either overwrite this or implement _make_cmd """
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- p = subprocess.Popen(
- cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate()
- if p.returncode != 0:
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
-
-
-class CurlFD(ExternalFD):
- AVAILABLE_OPT = '-V'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '--location', '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
- cmd += self._valueless_option('--silent', 'noprogress')
- cmd += self._valueless_option('--verbose', 'verbose')
- cmd += self._option('--limit-rate', 'ratelimit')
- cmd += self._option('--retry', 'retries')
- cmd += self._option('--max-filesize', 'max_filesize')
- cmd += self._option('--interface', 'source_address')
- cmd += self._option('--proxy', 'proxy')
- cmd += self._valueless_option('--insecure', 'nocheckcertificate')
- cmd += self._configuration_args()
- cmd += ['--', info_dict['url']]
- return cmd
-
- def _call_downloader(self, tmpfilename, info_dict):
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- # curl writes the progress to stderr so don't capture it.
- p = subprocess.Popen(cmd)
- p.communicate()
- return p.returncode
-
-
-class AxelFD(ExternalFD):
- AVAILABLE_OPT = '-V'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-o', tmpfilename]
- for key, val in info_dict['http_headers'].items():
- cmd += ['-H', '%s: %s' % (key, val)]
- cmd += self._configuration_args()
- cmd += ['--', info_dict['url']]
- return cmd
-
-
-class WgetFD(ExternalFD):
- AVAILABLE_OPT = '--version'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._option('--bind-address', 'source_address')
- cmd += self._option('--proxy', 'proxy')
- cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
- cmd += self._configuration_args()
- cmd += ['--', info_dict['url']]
- return cmd
-
-
-class Aria2cFD(ExternalFD):
- AVAILABLE_OPT = '-v'
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-c']
- cmd += self._configuration_args([
- '--min-split-size', '1M', '--max-connection-per-server', '4'])
- dn = os.path.dirname(tmpfilename)
- if dn:
- cmd += ['--dir', dn]
- cmd += ['--out', os.path.basename(tmpfilename)]
- for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._option('--interface', 'source_address')
- cmd += self._option('--all-proxy', 'proxy')
- cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
- cmd += ['--', info_dict['url']]
- return cmd
-
-
-class HttpieFD(ExternalFD):
- @classmethod
- def available(cls):
- return check_executable('http', ['--version'])
-
- def _make_cmd(self, tmpfilename, info_dict):
- cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
- for key, val in info_dict['http_headers'].items():
- cmd += ['%s:%s' % (key, val)]
- return cmd
-
-
-class FFmpegFD(ExternalFD):
- @classmethod
- def supports(cls, info_dict):
- return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
-
- @classmethod
- def available(cls):
- return FFmpegPostProcessor().available
-
- def _call_downloader(self, tmpfilename, info_dict):
- url = info_dict['url']
- ffpp = FFmpegPostProcessor(downloader=self)
- if not ffpp.available:
- self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
- return False
- ffpp.check_version()
-
- args = [ffpp.executable, '-y']
-
- for log_level in ('quiet', 'verbose'):
- if self.params.get(log_level, False):
- args += ['-loglevel', log_level]
- break
-
- seekable = info_dict.get('_seekable')
- if seekable is not None:
- # setting -seekable prevents ffmpeg from guessing if the server
- # supports seeking(by adding the header `Range: bytes=0-`), which
- # can cause problems in some cases
- # https://github.com/rg3/youtube-dl/issues/11800#issuecomment-275037127
- # http://trac.ffmpeg.org/ticket/6125#comment:10
- args += ['-seekable', '1' if seekable else '0']
-
- args += self._configuration_args()
-
- # start_time = info_dict.get('start_time') or 0
- # if start_time:
- # args += ['-ss', compat_str(start_time)]
- # end_time = info_dict.get('end_time')
- # if end_time:
- # args += ['-t', compat_str(end_time - start_time)]
-
- if info_dict['http_headers'] and re.match(r'^https?://', url):
- # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
- # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
- headers = handle_youtubedl_headers(info_dict['http_headers'])
- args += [
- '-headers',
- ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
-
- env = None
- proxy = self.params.get('proxy')
- if proxy:
- if not re.match(r'^[\da-zA-Z]+://', proxy):
- proxy = 'http://%s' % proxy
-
- if proxy.startswith('socks'):
- self.report_warning(
- '%s does not support SOCKS proxies. Downloading is likely to fail. '
- 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
-
- # Since December 2015 ffmpeg supports -http_proxy option (see
- # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
- # We could switch to the following code if we are able to detect version properly
- # args += ['-http_proxy', proxy]
- env = os.environ.copy()
- compat_setenv('HTTP_PROXY', proxy, env=env)
- compat_setenv('http_proxy', proxy, env=env)
-
- protocol = info_dict.get('protocol')
-
- if protocol == 'rtmp':
- player_url = info_dict.get('player_url')
- page_url = info_dict.get('page_url')
- app = info_dict.get('app')
- play_path = info_dict.get('play_path')
- tc_url = info_dict.get('tc_url')
- flash_version = info_dict.get('flash_version')
- live = info_dict.get('rtmp_live', False)
- if player_url is not None:
- args += ['-rtmp_swfverify', player_url]
- if page_url is not None:
- args += ['-rtmp_pageurl', page_url]
- if app is not None:
- args += ['-rtmp_app', app]
- if play_path is not None:
- args += ['-rtmp_playpath', play_path]
- if tc_url is not None:
- args += ['-rtmp_tcurl', tc_url]
- if flash_version is not None:
- args += ['-rtmp_flashver', flash_version]
- if live:
- args += ['-rtmp_live', 'live']
-
- args += ['-i', url, '-c', 'copy']
-
- if self.params.get('test', False):
- args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
-
- if protocol in ('m3u8', 'm3u8_native'):
- if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
- args += ['-f', 'mpegts']
- else:
- args += ['-f', 'mp4']
- if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
- args += ['-bsf:a', 'aac_adtstoasc']
- elif protocol == 'rtmp':
- args += ['-f', 'flv']
- else:
- args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
-
- args = [encodeArgument(opt) for opt in args]
- args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
-
- self._debug_cmd(args)
-
- proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
- try:
- retval = proc.wait()
- except KeyboardInterrupt:
- # subprocces.run would send the SIGKILL signal to ffmpeg and the
- # mp4 file couldn't be played, but if we ask ffmpeg to quit it
- # produces a file that is playable (this is mostly useful for live
- # streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/rg3/youtube-dl/issues/8300).
- if sys.platform != 'win32':
- proc.communicate(b'q')
- raise
- return retval
-
-
-class AVconvFD(FFmpegFD):
- pass
-
-
-_BY_NAME = dict(
- (klass.get_basename(), klass)
- for name, klass in globals().items()
- if name.endswith('FD') and name != 'ExternalFD'
-)
-
-
-def list_external_downloaders():
- return sorted(_BY_NAME.keys())
-
-
-def get_external_downloader(external_downloader):
- """ Given the name of the executable, see whether we support the given
- downloader . """
- # Drop .exe extension on Windows
- bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME[bn]
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
deleted file mode 100644
index 15e71be..0000000
--- a/youtube_dl/downloader/f4m.py
+++ /dev/null
@@ -1,438 +0,0 @@
-from __future__ import division, unicode_literals
-
-import io
-import itertools
-import time
-
-from .fragment import FragmentFD
-from ..compat import (
- compat_b64decode,
- compat_etree_fromstring,
- compat_urlparse,
- compat_urllib_error,
- compat_urllib_parse_urlparse,
- compat_struct_pack,
- compat_struct_unpack,
-)
-from ..utils import (
- fix_xml_ampersands,
- xpath_text,
-)
-
-
-class DataTruncatedError(Exception):
- pass
-
-
-class FlvReader(io.BytesIO):
- """
- Reader for Flv files
- The file format is documented in https://www.adobe.com/devnet/f4v.html
- """
-
- def read_bytes(self, n):
- data = self.read(n)
- if len(data) < n:
- raise DataTruncatedError(
- 'FlvReader error: need %d bytes while only %d bytes got' % (
- n, len(data)))
- return data
-
- # Utility functions for reading numbers and strings
- def read_unsigned_long_long(self):
- return compat_struct_unpack('!Q', self.read_bytes(8))[0]
-
- def read_unsigned_int(self):
- return compat_struct_unpack('!I', self.read_bytes(4))[0]
-
- def read_unsigned_char(self):
- return compat_struct_unpack('!B', self.read_bytes(1))[0]
-
- def read_string(self):
- res = b''
- while True:
- char = self.read_bytes(1)
- if char == b'\x00':
- break
- res += char
- return res
-
- def read_box_info(self):
- """
- Read a box and return the info as a tuple: (box_size, box_type, box_data)
- """
- real_size = size = self.read_unsigned_int()
- box_type = self.read_bytes(4)
- header_end = 8
- if size == 1:
- real_size = self.read_unsigned_long_long()
- header_end = 16
- return real_size, box_type, self.read_bytes(real_size - header_end)
-
- def read_asrt(self):
- # version
- self.read_unsigned_char()
- # flags
- self.read_bytes(3)
- quality_entry_count = self.read_unsigned_char()
- # QualityEntryCount
- for i in range(quality_entry_count):
- self.read_string()
-
- segment_run_count = self.read_unsigned_int()
- segments = []
- for i in range(segment_run_count):
- first_segment = self.read_unsigned_int()
- fragments_per_segment = self.read_unsigned_int()
- segments.append((first_segment, fragments_per_segment))
-
- return {
- 'segment_run': segments,
- }
-
- def read_afrt(self):
- # version
- self.read_unsigned_char()
- # flags
- self.read_bytes(3)
- # time scale
- self.read_unsigned_int()
-
- quality_entry_count = self.read_unsigned_char()
- # QualitySegmentUrlModifiers
- for i in range(quality_entry_count):
- self.read_string()
-
- fragments_count = self.read_unsigned_int()
- fragments = []
- for i in range(fragments_count):
- first = self.read_unsigned_int()
- first_ts = self.read_unsigned_long_long()
- duration = self.read_unsigned_int()
- if duration == 0:
- discontinuity_indicator = self.read_unsigned_char()
- else:
- discontinuity_indicator = None
- fragments.append({
- 'first': first,
- 'ts': first_ts,
- 'duration': duration,
- 'discontinuity_indicator': discontinuity_indicator,
- })
-
- return {
- 'fragments': fragments,
- }
-
- def read_abst(self):
- # version
- self.read_unsigned_char()
- # flags
- self.read_bytes(3)
-
- self.read_unsigned_int() # BootstrapinfoVersion
- # Profile,Live,Update,Reserved
- flags = self.read_unsigned_char()
- live = flags & 0x20 != 0
- # time scale
- self.read_unsigned_int()
- # CurrentMediaTime
- self.read_unsigned_long_long()
- # SmpteTimeCodeOffset
- self.read_unsigned_long_long()
-
- self.read_string() # MovieIdentifier
- server_count = self.read_unsigned_char()
- # ServerEntryTable
- for i in range(server_count):
- self.read_string()
- quality_count = self.read_unsigned_char()
- # QualityEntryTable
- for i in range(quality_count):
- self.read_string()
- # DrmData
- self.read_string()
- # MetaData
- self.read_string()
-
- segments_count = self.read_unsigned_char()
- segments = []
- for i in range(segments_count):
- box_size, box_type, box_data = self.read_box_info()
- assert box_type == b'asrt'
- segment = FlvReader(box_data).read_asrt()
- segments.append(segment)
- fragments_run_count = self.read_unsigned_char()
- fragments = []
- for i in range(fragments_run_count):
- box_size, box_type, box_data = self.read_box_info()
- assert box_type == b'afrt'
- fragments.append(FlvReader(box_data).read_afrt())
-
- return {
- 'segments': segments,
- 'fragments': fragments,
- 'live': live,
- }
-
- def read_bootstrap_info(self):
- total_size, box_type, box_data = self.read_box_info()
- assert box_type == b'abst'
- return FlvReader(box_data).read_abst()
-
-
-def read_bootstrap_info(bootstrap_bytes):
- return FlvReader(bootstrap_bytes).read_bootstrap_info()
-
-
-def build_fragments_list(boot_info):
- """ Return a list of (segment, fragment) for each fragment in the video """
- res = []
- segment_run_table = boot_info['segments'][0]
- fragment_run_entry_table = boot_info['fragments'][0]['fragments']
- first_frag_number = fragment_run_entry_table[0]['first']
- fragments_counter = itertools.count(first_frag_number)
- for segment, fragments_count in segment_run_table['segment_run']:
- # In some live HDS streams (for example Rai), `fragments_count` is
- # abnormal and causing out-of-memory errors. It's OK to change the
- # number of fragments for live streams as they are updated periodically
- if fragments_count == 4294967295 and boot_info['live']:
- fragments_count = 2
- for _ in range(fragments_count):
- res.append((segment, next(fragments_counter)))
-
- if boot_info['live']:
- res = res[-2:]
-
- return res
-
-
-def write_unsigned_int(stream, val):
- stream.write(compat_struct_pack('!I', val))
-
-
-def write_unsigned_int_24(stream, val):
- stream.write(compat_struct_pack('!I', val)[1:])
-
-
-def write_flv_header(stream):
- """Writes the FLV header to stream"""
- # FLV header
- stream.write(b'FLV\x01')
- stream.write(b'\x05')
- stream.write(b'\x00\x00\x00\x09')
- stream.write(b'\x00\x00\x00\x00')
-
-
-def write_metadata_tag(stream, metadata):
- """Writes optional metadata tag to stream"""
- SCRIPT_TAG = b'\x12'
- FLV_TAG_HEADER_LEN = 11
-
- if metadata:
- stream.write(SCRIPT_TAG)
- write_unsigned_int_24(stream, len(metadata))
- stream.write(b'\x00\x00\x00\x00\x00\x00\x00')
- stream.write(metadata)
- write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata))
-
-
-def remove_encrypted_media(media):
- return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and
- 'drmAdditionalHeaderSetId' not in e.attrib,
- media))
-
-
-def _add_ns(prop, ver=1):
- return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
-
-
-def get_base_url(manifest):
- base_url = xpath_text(
- manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
- 'base URL', default=None)
- if base_url:
- base_url = base_url.strip()
- return base_url
-
-
-class F4mFD(FragmentFD):
- """
- A downloader for f4m manifests or AdobeHDS.
- """
-
- FD_NAME = 'f4m'
-
- def _get_unencrypted_media(self, doc):
- media = doc.findall(_add_ns('media'))
- if not media:
- self.report_error('No media found')
- for e in (doc.findall(_add_ns('drmAdditionalHeader')) +
- doc.findall(_add_ns('drmAdditionalHeaderSet'))):
- # If id attribute is missing it's valid for all media nodes
- # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute
- if 'id' not in e.attrib:
- self.report_error('Missing ID in f4m DRM')
- media = remove_encrypted_media(media)
- if not media:
- self.report_error('Unsupported DRM')
- return media
-
- def _get_bootstrap_from_url(self, bootstrap_url):
- bootstrap = self.ydl.urlopen(bootstrap_url).read()
- return read_bootstrap_info(bootstrap)
-
- def _update_live_fragments(self, bootstrap_url, latest_fragment):
- fragments_list = []
- retries = 30
- while (not fragments_list) and (retries > 0):
- boot_info = self._get_bootstrap_from_url(bootstrap_url)
- fragments_list = build_fragments_list(boot_info)
- fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
- if not fragments_list:
- # Retry after a while
- time.sleep(5.0)
- retries -= 1
-
- if not fragments_list:
- self.report_error('Failed to update fragments')
-
- return fragments_list
-
- def _parse_bootstrap_node(self, node, base_url):
- # Sometimes non empty inline bootstrap info can be specified along
- # with bootstrap url attribute (e.g. dummy inline bootstrap info
- # contains whitespace characters in [1]). We will prefer bootstrap
- # url over inline bootstrap info when present.
- # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
- bootstrap_url = node.get('url')
- if bootstrap_url:
- bootstrap_url = compat_urlparse.urljoin(
- base_url, bootstrap_url)
- boot_info = self._get_bootstrap_from_url(bootstrap_url)
- else:
- bootstrap_url = None
- bootstrap = compat_b64decode(node.text)
- boot_info = read_bootstrap_info(bootstrap)
- return boot_info, bootstrap_url
-
- def real_download(self, filename, info_dict):
- man_url = info_dict['url']
- requested_bitrate = info_dict.get('tbr')
- self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
-
- urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
- man_url = urlh.geturl()
- # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244
- # and https://github.com/rg3/youtube-dl/issues/7823)
- manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip()
-
- doc = compat_etree_fromstring(manifest)
- formats = [(int(f.attrib.get('bitrate', -1)), f)
- for f in self._get_unencrypted_media(doc)]
- if requested_bitrate is None or len(formats) == 1:
- # get the best format
- formats = sorted(formats, key=lambda f: f[0])
- rate, media = formats[-1]
- else:
- rate, media = list(filter(
- lambda f: int(f[0]) == requested_bitrate, formats))[0]
-
- # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
- man_base_url = get_base_url(doc) or man_url
-
- base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
- bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- boot_info, bootstrap_url = self._parse_bootstrap_node(
- bootstrap_node, man_base_url)
- live = boot_info['live']
- metadata_node = media.find(_add_ns('metadata'))
- if metadata_node is not None:
- metadata = compat_b64decode(metadata_node.text)
- else:
- metadata = None
-
- fragments_list = build_fragments_list(boot_info)
- test = self.params.get('test', False)
- if test:
- # We only download the first fragment
- fragments_list = fragments_list[:1]
- total_frags = len(fragments_list)
- # For some akamai manifests we'll need to add a query to the fragment url
- akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
-
- ctx = {
- 'filename': filename,
- 'total_frags': total_frags,
- 'live': live,
- }
-
- self._prepare_frag_download(ctx)
-
- dest_stream = ctx['dest_stream']
-
- if ctx['complete_frags_downloaded_bytes'] == 0:
- write_flv_header(dest_stream)
- if not live:
- write_metadata_tag(dest_stream, metadata)
-
- base_url_parsed = compat_urllib_parse_urlparse(base_url)
-
- self._start_frag_download(ctx)
-
- frag_index = 0
- while fragments_list:
- seg_i, frag_i = fragments_list.pop(0)
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- name = 'Seg%d-Frag%d' % (seg_i, frag_i)
- query = []
- if base_url_parsed.query:
- query.append(base_url_parsed.query)
- if akamai_pv:
- query.append(akamai_pv.strip(';'))
- if info_dict.get('extra_param_to_segment_url'):
- query.append(info_dict['extra_param_to_segment_url'])
- url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
- try:
- success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
- if not success:
- return False
- reader = FlvReader(down_data)
- while True:
- try:
- _, box_type, box_data = reader.read_box_info()
- except DataTruncatedError:
- if test:
- # In tests, segments may be truncated, and thus
- # FlvReader may not be able to parse the whole
- # chunk. If so, write the segment as is
- # See https://github.com/rg3/youtube-dl/issues/9214
- dest_stream.write(down_data)
- break
- raise
- if box_type == b'mdat':
- self._append_fragment(ctx, box_data)
- break
- except (compat_urllib_error.HTTPError, ) as err:
- if live and (err.code == 404 or err.code == 410):
- # We didn't keep up with the live window. Continue
- # with the next available fragment.
- msg = 'Fragment %d unavailable' % frag_i
- self.report_warning(msg)
- fragments_list = []
- else:
- raise
-
- if not fragments_list and not test and live and bootstrap_url:
- fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
- total_frags += len(fragments_list)
- if fragments_list and (fragments_list[0][1] > frag_i + 1):
- msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
- self.report_warning(msg)
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
deleted file mode 100644
index 917f6dc..0000000
--- a/youtube_dl/downloader/fragment.py
+++ /dev/null
@@ -1,268 +0,0 @@
-from __future__ import division, unicode_literals
-
-import os
-import time
-import json
-
-from .common import FileDownloader
-from .http import HttpFD
-from ..utils import (
- error_to_compat_str,
- encodeFilename,
- sanitize_open,
- sanitized_Request,
-)
-
-
-class HttpQuietDownloader(HttpFD):
- def to_screen(self, *args, **kargs):
- pass
-
-
-class FragmentFD(FileDownloader):
- """
- A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
-
- Available options:
-
- fragment_retries: Number of times to retry a fragment for HTTP error (DASH
- and hlsnative only)
- skip_unavailable_fragments:
- Skip unavailable fragments (DASH and hlsnative only)
- keep_fragments: Keep downloaded fragments on disk after downloading is
- finished
-
- For each incomplete fragment download youtube-dl keeps on disk a special
- bookkeeping file with download state and metadata (in future such files will
- be used for any incomplete download handled by youtube-dl). This file is
- used to properly handle resuming, check download file consistency and detect
- potential errors. The file has a .ytdl extension and represents a standard
- JSON file of the following format:
-
- extractor:
- Dictionary of extractor related data. TBD.
-
- downloader:
- Dictionary of downloader related data. May contain following data:
- current_fragment:
- Dictionary with current (being downloaded) fragment data:
- index: 0-based index of current fragment among all fragments
- fragment_count:
- Total count of fragments
-
- This feature is experimental and file format may change in future.
- """
-
- def report_retry_fragment(self, err, frag_index, count, retries):
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
- % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
-
- def report_skip_fragment(self, frag_index):
- self.to_screen('[download] Skipping fragment %d...' % frag_index)
-
- def _prepare_url(self, info_dict, url):
- headers = info_dict.get('http_headers')
- return sanitized_Request(url, None, headers) if headers else url
-
- def _prepare_and_start_frag_download(self, ctx):
- self._prepare_frag_download(ctx)
- self._start_frag_download(ctx)
-
- @staticmethod
- def __do_ytdl_file(ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-'
-
- def _read_ytdl_file(self, ctx):
- assert 'ytdl_corrupt' not in ctx
- stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
- try:
- ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
- except Exception:
- ctx['ytdl_corrupt'] = True
- finally:
- stream.close()
-
- def _write_ytdl_file(self, ctx):
- frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
- downloader = {
- 'current_fragment': {
- 'index': ctx['fragment_index'],
- },
- }
- if ctx.get('fragment_count') is not None:
- downloader['fragment_count'] = ctx['fragment_count']
- frag_index_stream.write(json.dumps({'downloader': downloader}))
- frag_index_stream.close()
-
- def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
- fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
- success = ctx['dl'].download(fragment_filename, {
- 'url': frag_url,
- 'http_headers': headers or info_dict.get('http_headers'),
- })
- if not success:
- return False, None
- down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
- ctx['fragment_filename_sanitized'] = frag_sanitized
- frag_content = down.read()
- down.close()
- return True, frag_content
-
- def _append_fragment(self, ctx, frag_content):
- try:
- ctx['dest_stream'].write(frag_content)
- ctx['dest_stream'].flush()
- finally:
- if self.__do_ytdl_file(ctx):
- self._write_ytdl_file(ctx)
- if not self.params.get('keep_fragments', False):
- os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
- del ctx['fragment_filename_sanitized']
-
- def _prepare_frag_download(self, ctx):
- if 'live' not in ctx:
- ctx['live'] = False
- if not ctx['live']:
- total_frags_str = '%d' % ctx['total_frags']
- ad_frags = ctx.get('ad_frags', 0)
- if ad_frags:
- total_frags_str += ' (not including %d ad)' % ad_frags
- else:
- total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
- self.report_destination(ctx['filename'])
- dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': True,
- 'quiet': True,
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit'),
- 'retries': self.params.get('retries', 0),
- 'nopart': self.params.get('nopart', False),
- 'test': self.params.get('test', False),
- }
- )
- tmpfilename = self.temp_name(ctx['filename'])
- open_mode = 'wb'
- resume_len = 0
-
- # Establish possible resume length
- if os.path.isfile(encodeFilename(tmpfilename)):
- open_mode = 'ab'
- resume_len = os.path.getsize(encodeFilename(tmpfilename))
-
- # Should be initialized before ytdl file check
- ctx.update({
- 'tmpfilename': tmpfilename,
- 'fragment_index': 0,
- })
-
- if self.__do_ytdl_file(ctx):
- if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
- self._read_ytdl_file(ctx)
- is_corrupt = ctx.get('ytdl_corrupt') is True
- is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
- if is_corrupt or is_inconsistent:
- message = (
- '.ytdl file is corrupt' if is_corrupt else
- 'Inconsistent state of incomplete fragment download')
- self.report_warning(
- '%s. Restarting from the beginning...' % message)
- ctx['fragment_index'] = resume_len = 0
- if 'ytdl_corrupt' in ctx:
- del ctx['ytdl_corrupt']
- self._write_ytdl_file(ctx)
- else:
- self._write_ytdl_file(ctx)
- assert ctx['fragment_index'] == 0
-
- dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
-
- ctx.update({
- 'dl': dl,
- 'dest_stream': dest_stream,
- 'tmpfilename': tmpfilename,
- # Total complete fragments downloaded so far in bytes
- 'complete_frags_downloaded_bytes': resume_len,
- })
-
- def _start_frag_download(self, ctx):
- total_frags = ctx['total_frags']
- # This dict stores the download progress, it's updated by the progress
- # hook
- state = {
- 'status': 'downloading',
- 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'],
- 'fragment_index': ctx['fragment_index'],
- 'fragment_count': total_frags,
- 'filename': ctx['filename'],
- 'tmpfilename': ctx['tmpfilename'],
- }
-
- start = time.time()
- ctx.update({
- 'started': start,
- # Amount of fragment's bytes downloaded by the time of the previous
- # frag progress hook invocation
- 'prev_frag_downloaded_bytes': 0,
- })
-
- def frag_progress_hook(s):
- if s['status'] not in ('downloading', 'finished'):
- return
-
- time_now = time.time()
- state['elapsed'] = time_now - start
- frag_total_bytes = s.get('total_bytes') or 0
- if not ctx['live']:
- estimated_size = (
- (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) /
- (state['fragment_index'] + 1) * total_frags)
- state['total_bytes_estimate'] = estimated_size
-
- if s['status'] == 'finished':
- state['fragment_index'] += 1
- ctx['fragment_index'] = state['fragment_index']
- state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
- ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
- ctx['prev_frag_downloaded_bytes'] = 0
- else:
- frag_downloaded_bytes = s['downloaded_bytes']
- state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
- if not ctx['live']:
- state['eta'] = self.calc_eta(
- start, time_now, estimated_size,
- state['downloaded_bytes'])
- state['speed'] = s.get('speed') or ctx.get('speed')
- ctx['speed'] = state['speed']
- ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
- self._hook_progress(state)
-
- ctx['dl'].add_progress_hook(frag_progress_hook)
-
- return start
-
- def _finish_frag_download(self, ctx):
- ctx['dest_stream'].close()
- if self.__do_ytdl_file(ctx):
- ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
- if os.path.isfile(ytdl_filename):
- os.remove(ytdl_filename)
- elapsed = time.time() - ctx['started']
-
- if ctx['tmpfilename'] == '-':
- downloaded_bytes = ctx['complete_frags_downloaded_bytes']
- else:
- self.try_rename(ctx['tmpfilename'], ctx['filename'])
- downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
-
- self._hook_progress({
- 'downloaded_bytes': downloaded_bytes,
- 'total_bytes': downloaded_bytes,
- 'filename': ctx['filename'],
- 'status': 'finished',
- 'elapsed': elapsed,
- })
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
deleted file mode 100644
index fd30452..0000000
--- a/youtube_dl/downloader/hls.py
+++ /dev/null
@@ -1,204 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import binascii
-try:
- from Crypto.Cipher import AES
- can_decrypt_frag = True
-except ImportError:
- can_decrypt_frag = False
-
-from .fragment import FragmentFD
-from .external import FFmpegFD
-
-from ..compat import (
- compat_urllib_error,
- compat_urlparse,
- compat_struct_pack,
-)
-from ..utils import (
- parse_m3u8_attributes,
- update_url_query,
-)
-
-
-class HlsFD(FragmentFD):
- """ A limited implementation that does not require ffmpeg """
-
- FD_NAME = 'hlsnative'
-
- @staticmethod
- def can_download(manifest, info_dict):
- UNSUPPORTED_FEATURES = (
- r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
- # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
-
- # Live streams heuristic does not always work (e.g. geo restricted to Germany
- # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
- # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
-
- # This heuristic also is not correct since segments may not be appended as well.
- # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
- # no segments will definitely be appended to the end of the playlist.
- # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
- # # event media playlists [4]
-
- # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
- # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
- # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
- # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
- )
- check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
- is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
- check_results.append(can_decrypt_frag or not is_aes128_enc)
- check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
- check_results.append(not info_dict.get('is_live'))
- return all(check_results)
-
- def real_download(self, filename, info_dict):
- man_url = info_dict['url']
- self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
-
- urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
- man_url = urlh.geturl()
- s = urlh.read().decode('utf-8', 'ignore')
-
- if not self.can_download(s, info_dict):
- if info_dict.get('extra_param_to_segment_url'):
- self.report_error('pycrypto not found. Please install it.')
- return False
- self.report_warning(
- 'hlsnative has detected features it does not support, '
- 'extraction will be delegated to ffmpeg')
- fd = FFmpegFD(self.ydl, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- return fd.real_download(filename, info_dict)
-
- def is_ad_fragment(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or
- s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
-
- media_frags = 0
- ad_frags = 0
- ad_frag_next = False
- for line in s.splitlines():
- line = line.strip()
- if not line:
- continue
- if line.startswith('#'):
- if is_ad_fragment(line):
- ad_frags += 1
- ad_frag_next = True
- continue
- if ad_frag_next:
- ad_frag_next = False
- continue
- media_frags += 1
-
- ctx = {
- 'filename': filename,
- 'total_frags': media_frags,
- 'ad_frags': ad_frags,
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- test = self.params.get('test', False)
-
- extra_query = None
- extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
- if extra_param_to_segment_url:
- extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
- i = 0
- media_sequence = 0
- decrypt_info = {'METHOD': 'NONE'}
- byte_range = {}
- frag_index = 0
- ad_frag_next = False
- for line in s.splitlines():
- line = line.strip()
- if line:
- if not line.startswith('#'):
- if ad_frag_next:
- ad_frag_next = False
- continue
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- frag_url = (
- line
- if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
- if extra_query:
- frag_url = update_url_query(frag_url, extra_query)
- count = 0
- headers = info_dict.get('http_headers', {})
- if byte_range:
- headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(
- ctx, frag_url, info_dict, headers)
- if not success:
- return False
- break
- except compat_urllib_error.HTTPError as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/rg3/youtube-dl/issues/10165,
- # https://github.com/rg3/youtube-dl/issues/10448).
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- i += 1
- media_sequence += 1
- self.report_skip_fragment(frag_index)
- continue
- self.report_error(
- 'giving up after %s fragment retries' % fragment_retries)
- return False
- if decrypt_info['METHOD'] == 'AES-128':
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
- decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
- self._prepare_url(info_dict, decrypt_info['URI'])).read()
- frag_content = AES.new(
- decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
- self._append_fragment(ctx, frag_content)
- # We only download the first fragment during the test
- if test:
- break
- i += 1
- media_sequence += 1
- elif line.startswith('#EXT-X-KEY'):
- decrypt_url = decrypt_info.get('URI')
- decrypt_info = parse_m3u8_attributes(line[11:])
- if decrypt_info['METHOD'] == 'AES-128':
- if 'IV' in decrypt_info:
- decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
- if not re.match(r'^https?://', decrypt_info['URI']):
- decrypt_info['URI'] = compat_urlparse.urljoin(
- man_url, decrypt_info['URI'])
- if extra_query:
- decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
- if decrypt_url != decrypt_info['URI']:
- decrypt_info['KEY'] = None
- elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
- media_sequence = int(line[22:])
- elif line.startswith('#EXT-X-BYTERANGE'):
- splitted_byte_range = line[17:].split('@')
- sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
- byte_range = {
- 'start': sub_range_start,
- 'end': sub_range_start + int(splitted_byte_range[0]),
- }
- elif is_ad_fragment(line):
- ad_frag_next = True
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
deleted file mode 100644
index 5b1e960..0000000
--- a/youtube_dl/downloader/http.py
+++ /dev/null
@@ -1,354 +0,0 @@
-from __future__ import unicode_literals
-
-import errno
-import os
-import socket
-import time
-import random
-import re
-
-from .common import FileDownloader
-from ..compat import (
- compat_str,
- compat_urllib_error,
-)
-from ..utils import (
- ContentTooShortError,
- encodeFilename,
- int_or_none,
- sanitize_open,
- sanitized_Request,
- write_xattr,
- XAttrMetadataError,
- XAttrUnavailableError,
-)
-
-
-class HttpFD(FileDownloader):
- def real_download(self, filename, info_dict):
- url = info_dict['url']
-
- class DownloadContext(dict):
- __getattr__ = dict.get
- __setattr__ = dict.__setitem__
- __delattr__ = dict.__delitem__
-
- ctx = DownloadContext()
- ctx.filename = filename
- ctx.tmpfilename = self.temp_name(filename)
- ctx.stream = None
-
- # Do not include the Accept-Encoding header
- headers = {'Youtubedl-no-compression': 'True'}
- add_headers = info_dict.get('http_headers')
- if add_headers:
- headers.update(add_headers)
-
- is_test = self.params.get('test', False)
- chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size') or
- self.params.get('http_chunk_size') or 0)
-
- ctx.open_mode = 'wb'
- ctx.resume_len = 0
- ctx.data_len = None
- ctx.block_size = self.params.get('buffersize', 1024)
- ctx.start_time = time.time()
- ctx.chunk_size = None
-
- if self.params.get('continuedl', True):
- # Establish possible resume length
- if os.path.isfile(encodeFilename(ctx.tmpfilename)):
- ctx.resume_len = os.path.getsize(
- encodeFilename(ctx.tmpfilename))
-
- ctx.is_resume = ctx.resume_len > 0
-
- count = 0
- retries = self.params.get('retries', 0)
-
- class SucceedDownload(Exception):
- pass
-
- class RetryDownload(Exception):
- def __init__(self, source_error):
- self.source_error = source_error
-
- class NextFragment(Exception):
- pass
-
- def set_range(req, start, end):
- range_header = 'bytes=%d-' % start
- if end:
- range_header += compat_str(end)
- req.add_header('Range', range_header)
-
- def establish_connection():
- ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
- if not is_test and chunk_size else chunk_size)
- if ctx.resume_len > 0:
- range_start = ctx.resume_len
- if ctx.is_resume:
- self.report_resuming_byte(ctx.resume_len)
- ctx.open_mode = 'ab'
- elif ctx.chunk_size > 0:
- range_start = 0
- else:
- range_start = None
- ctx.is_resume = False
- range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
- if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
- range_end = ctx.data_len - 1
- has_range = range_start is not None
- ctx.has_range = has_range
- request = sanitized_Request(url, None, headers)
- if has_range:
- set_range(request, range_start, range_end)
- # Establish connection
- try:
- ctx.data = self.ydl.urlopen(request)
- # When trying to resume, Content-Range HTTP header of response has to be checked
- # to match the value of requested Range HTTP header. This is due to a webservers
- # that don't support resuming and serve a whole file with no Content-Range
- # set in response despite of requested Range (see
- # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
- if has_range:
- content_range = ctx.data.headers.get('Content-Range')
- if content_range:
- content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
- # Content-Range is present and matches requested Range, resume is possible
- if content_range_m:
- if range_start == int(content_range_m.group(1)):
- content_range_end = int_or_none(content_range_m.group(2))
- content_len = int_or_none(content_range_m.group(3))
- accept_content_len = (
- # Non-chunked download
- not ctx.chunk_size or
- # Chunked download and requested piece or
- # its part is promised to be served
- content_range_end == range_end or
- content_len < range_end)
- if accept_content_len:
- ctx.data_len = content_len
- return
- # Content-Range is either not present or invalid. Assuming remote webserver is
- # trying to send the whole file, resume is not possible, so wiping the local file
- # and performing entire redownload
- self.report_unable_to_resume()
- ctx.resume_len = 0
- ctx.open_mode = 'wb'
- ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
- return
- except (compat_urllib_error.HTTPError, ) as err:
- if err.code == 416:
- # Unable to resume (requested range not satisfiable)
- try:
- # Open the connection again without the range header
- ctx.data = self.ydl.urlopen(
- sanitized_Request(url, None, headers))
- content_length = ctx.data.info()['Content-Length']
- except (compat_urllib_error.HTTPError, ) as err:
- if err.code < 500 or err.code >= 600:
- raise
- else:
- # Examine the reported length
- if (content_length is not None and
- (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
- # The file had already been fully downloaded.
- # Explanation to the above condition: in issue #175 it was revealed that
- # YouTube sometimes adds or removes a few bytes from the end of the file,
- # changing the file size slightly and causing problems for some users. So
- # I decided to implement a suggested change and consider the file
- # completely downloaded if the file size differs less than 100 bytes from
- # the one in the hard drive.
- self.report_file_already_downloaded(ctx.filename)
- self.try_rename(ctx.tmpfilename, ctx.filename)
- self._hook_progress({
- 'filename': ctx.filename,
- 'status': 'finished',
- 'downloaded_bytes': ctx.resume_len,
- 'total_bytes': ctx.resume_len,
- })
- raise SucceedDownload()
- else:
- # The length does not match, we start the download over
- self.report_unable_to_resume()
- ctx.resume_len = 0
- ctx.open_mode = 'wb'
- return
- elif err.code < 500 or err.code >= 600:
- # Unexpected HTTP error
- raise
- raise RetryDownload(err)
- except socket.error as err:
- if err.errno != errno.ECONNRESET:
- # Connection reset is no problem, just retry
- raise
- raise RetryDownload(err)
-
- def download():
- data_len = ctx.data.info().get('Content-length', None)
-
- # Range HTTP header may be ignored/unsupported by a webserver
- # (e.g. extractor/scivee.py, extractor/bambuser.py).
- # However, for a test we still would like to download just a piece of a file.
- # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
- # block size when downloading a file.
- if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
- data_len = self._TEST_FILE_SIZE
-
- if data_len is not None:
- data_len = int(data_len) + ctx.resume_len
- min_data_len = self.params.get('min_filesize')
- max_data_len = self.params.get('max_filesize')
- if min_data_len is not None and data_len < min_data_len:
- self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
- return False
- if max_data_len is not None and data_len > max_data_len:
- self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
- return False
-
- byte_counter = 0 + ctx.resume_len
- block_size = ctx.block_size
- start = time.time()
-
- # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
- now = None # needed for slow_down() in the first loop run
- before = start # start measuring
-
- def retry(e):
- to_stdout = ctx.tmpfilename == '-'
- if not to_stdout:
- ctx.stream.close()
- ctx.stream = None
- ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
- raise RetryDownload(e)
-
- while True:
- try:
- # Download and write
- data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- # socket.timeout is a subclass of socket.error but may not have
- # errno set
- except socket.timeout as e:
- retry(e)
- except socket.error as e:
- if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
- raise
- retry(e)
-
- byte_counter += len(data_block)
-
- # exit loop when download is finished
- if len(data_block) == 0:
- break
-
- # Open destination file just in time
- if ctx.stream is None:
- try:
- ctx.stream, ctx.tmpfilename = sanitize_open(
- ctx.tmpfilename, ctx.open_mode)
- assert ctx.stream is not None
- ctx.filename = self.undo_temp_name(ctx.tmpfilename)
- self.report_destination(ctx.filename)
- except (OSError, IOError) as err:
- self.report_error('unable to open for writing: %s' % str(err))
- return False
-
- if self.params.get('xattr_set_filesize', False) and data_len is not None:
- try:
- write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
- except (XAttrUnavailableError, XAttrMetadataError) as err:
- self.report_error('unable to set filesize xattr: %s' % str(err))
-
- try:
- ctx.stream.write(data_block)
- except (IOError, OSError) as err:
- self.to_stderr('\n')
- self.report_error('unable to write data: %s' % str(err))
- return False
-
- # Apply rate limit
- self.slow_down(start, now, byte_counter - ctx.resume_len)
-
- # end measuring of one loop run
- now = time.time()
- after = now
-
- # Adjust block size
- if not self.params.get('noresizebuffer', False):
- block_size = self.best_block_size(after - before, len(data_block))
-
- before = after
-
- # Progress message
- speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
- if ctx.data_len is None:
- eta = None
- else:
- eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
-
- self._hook_progress({
- 'status': 'downloading',
- 'downloaded_bytes': byte_counter,
- 'total_bytes': ctx.data_len,
- 'tmpfilename': ctx.tmpfilename,
- 'filename': ctx.filename,
- 'eta': eta,
- 'speed': speed,
- 'elapsed': now - ctx.start_time,
- })
-
- if is_test and byte_counter == data_len:
- break
-
- if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
- ctx.resume_len = byte_counter
- # ctx.block_size = block_size
- raise NextFragment()
-
- if ctx.stream is None:
- self.to_stderr('\n')
- self.report_error('Did not get any data blocks')
- return False
- if ctx.tmpfilename != '-':
- ctx.stream.close()
-
- if data_len is not None and byte_counter != data_len:
- err = ContentTooShortError(byte_counter, int(data_len))
- if count <= retries:
- retry(err)
- raise err
-
- self.try_rename(ctx.tmpfilename, ctx.filename)
-
- # Update file modification time
- if self.params.get('updatetime', True):
- info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
-
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': byte_counter,
- 'filename': ctx.filename,
- 'status': 'finished',
- 'elapsed': time.time() - ctx.start_time,
- })
-
- return True
-
- while count <= retries:
- try:
- establish_connection()
- return download()
- except RetryDownload as e:
- count += 1
- if count <= retries:
- self.report_retry(e.source_error, count, retries)
- continue
- except NextFragment:
- continue
- except SucceedDownload:
- return True
-
- self.report_error('giving up after %s retries' % retries)
- return False
diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py
deleted file mode 100644
index 063fcf4..0000000
--- a/youtube_dl/downloader/ism.py
+++ /dev/null
@@ -1,259 +0,0 @@
-from __future__ import unicode_literals
-
-import time
-import binascii
-import io
-
-from .fragment import FragmentFD
-from ..compat import (
- compat_Struct,
- compat_urllib_error,
-)
-
-
-u8 = compat_Struct('>B')
-u88 = compat_Struct('>Bx')
-u16 = compat_Struct('>H')
-u1616 = compat_Struct('>Hxx')
-u32 = compat_Struct('>I')
-u64 = compat_Struct('>Q')
-
-s88 = compat_Struct('>bx')
-s16 = compat_Struct('>h')
-s1616 = compat_Struct('>hxx')
-s32 = compat_Struct('>i')
-
-unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
-
-TRACK_ENABLED = 0x1
-TRACK_IN_MOVIE = 0x2
-TRACK_IN_PREVIEW = 0x4
-
-SELF_CONTAINED = 0x1
-
-
-def box(box_type, payload):
- return u32.pack(8 + len(payload)) + box_type + payload
-
-
-def full_box(box_type, version, flags, payload):
- return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload)
-
-
-def write_piff_header(stream, params):
- track_id = params['track_id']
- fourcc = params['fourcc']
- duration = params['duration']
- timescale = params.get('timescale', 10000000)
- language = params.get('language', 'und')
- height = params.get('height', 0)
- width = params.get('width', 0)
- is_audio = width == 0 and height == 0
- creation_time = modification_time = int(time.time())
-
- ftyp_payload = b'isml' # major brand
- ftyp_payload += u32.pack(1) # minor version
- ftyp_payload += b'piff' + b'iso2' # compatible brands
- stream.write(box(b'ftyp', ftyp_payload)) # File Type Box
-
- mvhd_payload = u64.pack(creation_time)
- mvhd_payload += u64.pack(modification_time)
- mvhd_payload += u32.pack(timescale)
- mvhd_payload += u64.pack(duration)
- mvhd_payload += s1616.pack(1) # rate
- mvhd_payload += s88.pack(1) # volume
- mvhd_payload += u16.pack(0) # reserved
- mvhd_payload += u32.pack(0) * 2 # reserved
- mvhd_payload += unity_matrix
- mvhd_payload += u32.pack(0) * 6 # pre defined
- mvhd_payload += u32.pack(0xffffffff) # next track id
- moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box
-
- tkhd_payload = u64.pack(creation_time)
- tkhd_payload += u64.pack(modification_time)
- tkhd_payload += u32.pack(track_id) # track id
- tkhd_payload += u32.pack(0) # reserved
- tkhd_payload += u64.pack(duration)
- tkhd_payload += u32.pack(0) * 2 # reserved
- tkhd_payload += s16.pack(0) # layer
- tkhd_payload += s16.pack(0) # alternate group
- tkhd_payload += s88.pack(1 if is_audio else 0) # volume
- tkhd_payload += u16.pack(0) # reserved
- tkhd_payload += unity_matrix
- tkhd_payload += u1616.pack(width)
- tkhd_payload += u1616.pack(height)
- trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box
-
- mdhd_payload = u64.pack(creation_time)
- mdhd_payload += u64.pack(modification_time)
- mdhd_payload += u32.pack(timescale)
- mdhd_payload += u64.pack(duration)
- mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60))
- mdhd_payload += u16.pack(0) # pre defined
- mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box
-
- hdlr_payload = u32.pack(0) # pre defined
- hdlr_payload += b'soun' if is_audio else b'vide' # handler type
- hdlr_payload += u32.pack(0) * 3 # reserved
- hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0' # name
- mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box
-
- if is_audio:
- smhd_payload = s88.pack(0) # balance
- smhd_payload += u16.pack(0) # reserved
- media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header
- else:
- vmhd_payload = u16.pack(0) # graphics mode
- vmhd_payload += u16.pack(0) * 3 # opcolor
- media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header
- minf_payload = media_header_box
-
- dref_payload = u32.pack(1) # entry count
- dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box
- dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box
- minf_payload += box(b'dinf', dinf_payload) # Data Information Box
-
- stsd_payload = u32.pack(1) # entry count
-
- sample_entry_payload = u8.pack(0) * 6 # reserved
- sample_entry_payload += u16.pack(1) # data reference index
- if is_audio:
- sample_entry_payload += u32.pack(0) * 2 # reserved
- sample_entry_payload += u16.pack(params.get('channels', 2))
- sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
- sample_entry_payload += u16.pack(0) # pre defined
- sample_entry_payload += u16.pack(0) # reserved
- sample_entry_payload += u1616.pack(params['sampling_rate'])
-
- if fourcc == 'AACL':
- sample_entry_box = box(b'mp4a', sample_entry_payload)
- else:
- sample_entry_payload += u16.pack(0) # pre defined
- sample_entry_payload += u16.pack(0) # reserved
- sample_entry_payload += u32.pack(0) * 3 # pre defined
- sample_entry_payload += u16.pack(width)
- sample_entry_payload += u16.pack(height)
- sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi
- sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi
- sample_entry_payload += u32.pack(0) # reserved
- sample_entry_payload += u16.pack(1) # frame count
- sample_entry_payload += u8.pack(0) * 32 # compressor name
- sample_entry_payload += u16.pack(0x18) # depth
- sample_entry_payload += s16.pack(-1) # pre defined
-
- codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
- if fourcc in ('H264', 'AVC1'):
- sps, pps = codec_private_data.split(u32.pack(1))[1:]
- avcc_payload = u8.pack(1) # configuration version
- avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication
- avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete represenation (1) + reserved (11111) + length size minus one
- avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001)
- avcc_payload += u16.pack(len(sps))
- avcc_payload += sps
- avcc_payload += u8.pack(1) # number of pps
- avcc_payload += u16.pack(len(pps))
- avcc_payload += pps
- sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record
- sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry
- stsd_payload += sample_entry_box
-
- stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box
-
- stts_payload = u32.pack(0) # entry count
- stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box
-
- stsc_payload = u32.pack(0) # entry count
- stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box
-
- stco_payload = u32.pack(0) # entry count
- stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box
-
- minf_payload += box(b'stbl', stbl_payload) # Sample Table Box
-
- mdia_payload += box(b'minf', minf_payload) # Media Information Box
-
- trak_payload += box(b'mdia', mdia_payload) # Media Box
-
- moov_payload += box(b'trak', trak_payload) # Track Box
-
- mehd_payload = u64.pack(duration)
- mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box
-
- trex_payload = u32.pack(track_id) # track id
- trex_payload += u32.pack(1) # default sample description index
- trex_payload += u32.pack(0) # default sample duration
- trex_payload += u32.pack(0) # default sample size
- trex_payload += u32.pack(0) # default sample flags
- mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box
-
- moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box
- stream.write(box(b'moov', moov_payload)) # Movie Box
-
-
-def extract_box_data(data, box_sequence):
- data_reader = io.BytesIO(data)
- while True:
- box_size = u32.unpack(data_reader.read(4))[0]
- box_type = data_reader.read(4)
- if box_type == box_sequence[0]:
- box_data = data_reader.read(box_size - 8)
- if len(box_sequence) == 1:
- return box_data
- return extract_box_data(box_data, box_sequence[1:])
- data_reader.seek(box_size - 8, 1)
-
-
-class IsmFD(FragmentFD):
- """
- Download segments in a ISM manifest
- """
-
- FD_NAME = 'ism'
-
- def real_download(self, filename, info_dict):
- segments = info_dict['fragments'][:1] if self.params.get(
- 'test', False) else info_dict['fragments']
-
- ctx = {
- 'filename': filename,
- 'total_frags': len(segments),
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
-
- track_written = False
- frag_index = 0
- for i, segment in enumerate(segments):
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- count = 0
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
- if not success:
- return False
- if not track_written:
- tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
- info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
- write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
- track_written = True
- self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- self.report_skip_fragment(frag_index)
- continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
deleted file mode 100644
index fbb7f51..0000000
--- a/youtube_dl/downloader/rtmp.py
+++ /dev/null
@@ -1,214 +0,0 @@
-from __future__ import unicode_literals
-
-import os
-import re
-import subprocess
-import time
-
-from .common import FileDownloader
-from ..compat import compat_str
-from ..utils import (
- check_executable,
- encodeFilename,
- encodeArgument,
- get_exe_version,
-)
-
-
-def rtmpdump_version():
- return get_exe_version(
- 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)')
-
-
-class RtmpFD(FileDownloader):
- def real_download(self, filename, info_dict):
- def run_rtmpdump(args):
- start = time.time()
- resume_percent = None
- resume_downloaded_data_len = None
- proc = subprocess.Popen(args, stderr=subprocess.PIPE)
- cursor_in_new_line = True
- proc_stderr_closed = False
- try:
- while not proc_stderr_closed:
- # read line from stderr
- line = ''
- while True:
- char = proc.stderr.read(1)
- if not char:
- proc_stderr_closed = True
- break
- if char in [b'\r', b'\n']:
- break
- line += char.decode('ascii', 'replace')
- if not line:
- # proc_stderr_closed is True
- continue
- mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
- if mobj:
- downloaded_data_len = int(float(mobj.group(1)) * 1024)
- percent = float(mobj.group(2))
- if not resume_percent:
- resume_percent = percent
- resume_downloaded_data_len = downloaded_data_len
- time_now = time.time()
- eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
- speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
- data_len = None
- if percent > 0:
- data_len = int(downloaded_data_len * 100 / percent)
- self._hook_progress({
- 'status': 'downloading',
- 'downloaded_bytes': downloaded_data_len,
- 'total_bytes_estimate': data_len,
- 'tmpfilename': tmpfilename,
- 'filename': filename,
- 'eta': eta,
- 'elapsed': time_now - start,
- 'speed': speed,
- })
- cursor_in_new_line = False
- else:
- # no percent for live streams
- mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
- if mobj:
- downloaded_data_len = int(float(mobj.group(1)) * 1024)
- time_now = time.time()
- speed = self.calc_speed(start, time_now, downloaded_data_len)
- self._hook_progress({
- 'downloaded_bytes': downloaded_data_len,
- 'tmpfilename': tmpfilename,
- 'filename': filename,
- 'status': 'downloading',
- 'elapsed': time_now - start,
- 'speed': speed,
- })
- cursor_in_new_line = False
- elif self.params.get('verbose', False):
- if not cursor_in_new_line:
- self.to_screen('')
- cursor_in_new_line = True
- self.to_screen('[rtmpdump] ' + line)
- finally:
- proc.wait()
- if not cursor_in_new_line:
- self.to_screen('')
- return proc.returncode
-
- url = info_dict['url']
- player_url = info_dict.get('player_url')
- page_url = info_dict.get('page_url')
- app = info_dict.get('app')
- play_path = info_dict.get('play_path')
- tc_url = info_dict.get('tc_url')
- flash_version = info_dict.get('flash_version')
- live = info_dict.get('rtmp_live', False)
- conn = info_dict.get('rtmp_conn')
- protocol = info_dict.get('rtmp_protocol')
- real_time = info_dict.get('rtmp_real_time', False)
- no_resume = info_dict.get('no_resume', False)
- continue_dl = self.params.get('continuedl', True)
-
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
- test = self.params.get('test', False)
-
- # Check for rtmpdump first
- if not check_executable('rtmpdump', ['-h']):
- self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install it.')
- return False
-
- # Download using rtmpdump. rtmpdump returns exit code 2 when
- # the connection was interrupted and resuming appears to be
- # possible. This is part of rtmpdump's normal usage, AFAIK.
- basic_args = [
- 'rtmpdump', '--verbose', '-r', url,
- '-o', tmpfilename]
- if player_url is not None:
- basic_args += ['--swfVfy', player_url]
- if page_url is not None:
- basic_args += ['--pageUrl', page_url]
- if app is not None:
- basic_args += ['--app', app]
- if play_path is not None:
- basic_args += ['--playpath', play_path]
- if tc_url is not None:
- basic_args += ['--tcUrl', tc_url]
- if test:
- basic_args += ['--stop', '1']
- if flash_version is not None:
- basic_args += ['--flashVer', flash_version]
- if live:
- basic_args += ['--live']
- if isinstance(conn, list):
- for entry in conn:
- basic_args += ['--conn', entry]
- elif isinstance(conn, compat_str):
- basic_args += ['--conn', conn]
- if protocol is not None:
- basic_args += ['--protocol', protocol]
- if real_time:
- basic_args += ['--realtime']
-
- args = basic_args
- if not no_resume and continue_dl and not live:
- args += ['--resume']
- if not live and continue_dl:
- args += ['--skip', '1']
-
- args = [encodeArgument(a) for a in args]
-
- self._debug_cmd(args, exe='rtmpdump')
-
- RD_SUCCESS = 0
- RD_FAILED = 1
- RD_INCOMPLETE = 2
- RD_NO_CONNECT = 3
-
- started = time.time()
-
- try:
- retval = run_rtmpdump(args)
- except KeyboardInterrupt:
- if not info_dict.get('is_live'):
- raise
- retval = RD_SUCCESS
- self.to_screen('\n[rtmpdump] Interrupted by user')
-
- if retval == RD_NO_CONNECT:
- self.report_error('[rtmpdump] Could not connect to RTMP server.')
- return False
-
- while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live:
- prevsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('[rtmpdump] Downloaded %s bytes' % prevsize)
- time.sleep(5.0) # This seems to be needed
- args = basic_args + ['--resume']
- if retval == RD_FAILED:
- args += ['--skip', '1']
- args = [encodeArgument(a) for a in args]
- retval = run_rtmpdump(args)
- cursize = os.path.getsize(encodeFilename(tmpfilename))
- if prevsize == cursize and retval == RD_FAILED:
- break
- # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
- if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024:
- self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
- retval = RD_SUCCESS
- break
- if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE):
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('[rtmpdump] Downloaded %s bytes' % fsize)
- self.try_rename(tmpfilename, filename)
- self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- 'filename': filename,
- 'status': 'finished',
- 'elapsed': time.time() - started,
- })
- return True
- else:
- self.to_stderr('\n')
- self.report_error('rtmpdump exited with code %d' % retval)
- return False
diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dl/downloader/rtsp.py
deleted file mode 100644
index 939358b..0000000
--- a/youtube_dl/downloader/rtsp.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from __future__ import unicode_literals
-
-import os
-import subprocess
-
-from .common import FileDownloader
-from ..utils import (
- check_executable,
- encodeFilename,
-)
-
-
-class RtspFD(FileDownloader):
- def real_download(self, filename, info_dict):
- url = info_dict['url']
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
-
- if check_executable('mplayer', ['-h']):
- args = [
- 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
- '-dumpstream', '-dumpfile', tmpfilename, url]
- elif check_executable('mpv', ['-h']):
- args = [
- 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
- else:
- self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
- return False
-
- self._debug_cmd(args)
-
- retval = subprocess.call(args)
- if retval == 0:
- fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
- self.try_rename(tmpfilename, filename)
- self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- 'filename': filename,
- 'status': 'finished',
- })
- return True
- else:
- self.to_stderr('\n')
- self.report_error('%s exited with code %d' % (args[0], retval))
- return False