aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/downloader
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/downloader')
-rw-r--r--hypervideo_dl/downloader/__init__.py31
-rw-r--r--hypervideo_dl/downloader/common.py333
-rw-r--r--hypervideo_dl/downloader/dash.py23
-rw-r--r--hypervideo_dl/downloader/external.py219
-rw-r--r--hypervideo_dl/downloader/f4m.py54
-rw-r--r--hypervideo_dl/downloader/fc2.py11
-rw-r--r--hypervideo_dl/downloader/fragment.py191
-rw-r--r--hypervideo_dl/downloader/hls.py66
-rw-r--r--hypervideo_dl/downloader/http.py98
-rw-r--r--hypervideo_dl/downloader/ism.py64
-rw-r--r--hypervideo_dl/downloader/mhtml.py30
-rw-r--r--hypervideo_dl/downloader/niconico.py13
-rw-r--r--hypervideo_dl/downloader/rtmp.py12
-rw-r--r--hypervideo_dl/downloader/rtsp.py9
-rw-r--r--hypervideo_dl/downloader/websocket.py19
-rw-r--r--hypervideo_dl/downloader/youtube_live_chat.py53
16 files changed, 566 insertions, 660 deletions
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py
index 96d484d..c34dbce 100644
--- a/hypervideo_dl/downloader/__init__.py
+++ b/hypervideo_dl/downloader/__init__.py
@@ -1,10 +1,4 @@
-from __future__ import unicode_literals
-
-from ..compat import compat_str
-from ..utils import (
- determine_protocol,
- NO_DEFAULT
-)
+from ..utils import NO_DEFAULT, determine_protocol
def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False):
@@ -29,21 +23,18 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
# Some of these require get_suitable_downloader
from .common import FileDownloader
from .dash import DashSegmentsFD
+from .external import FFmpegFD, get_external_downloader
from .f4m import F4mFD
from .fc2 import FC2LiveFD
from .hls import HlsFD
from .http import HttpFD
-from .rtmp import RtmpFD
-from .rtsp import RtspFD
from .ism import IsmFD
from .mhtml import MhtmlFD
from .niconico import NiconicoDmcFD
+from .rtmp import RtmpFD
+from .rtsp import RtspFD
from .websocket import WebSocketFragmentFD
from .youtube_live_chat import YoutubeLiveChatFD
-from .external import (
- get_external_downloader,
- FFmpegFD,
-)
PROTOCOL_MAP = {
'rtmp': RtmpFD,
@@ -68,10 +59,11 @@ PROTOCOL_MAP = {
def shorten_protocol_name(proto, simplify=False):
short_protocol_names = {
- 'm3u8_native': 'm3u8_n',
- 'rtmp_ffmpeg': 'rtmp_f',
+ 'm3u8_native': 'm3u8',
+ 'm3u8': 'm3u8F',
+ 'rtmp_ffmpeg': 'rtmpF',
'http_dash_segments': 'dash',
- 'http_dash_segments_generator': 'dash_g',
+ 'http_dash_segments_generator': 'dashG',
'niconico_dmc': 'dmc',
'websocket_frag': 'WSfrag',
}
@@ -79,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False):
short_protocol_names.update({
'https': 'http',
'ftps': 'ftp',
+ 'm3u8': 'm3u8', # Reverse above m3u8 mapping
'm3u8_native': 'm3u8',
'http_dash_segments_generator': 'dash',
'rtmp_ffmpeg': 'rtmp',
@@ -93,13 +86,13 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
if default is NO_DEFAULT:
default = HttpFD
- # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
- # return FFmpegFD
+ if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict):
+ return FFmpegFD
info_dict['protocol'] = protocol
downloaders = params.get('external_downloader')
external_downloader = (
- downloaders if isinstance(downloaders, compat_str) or downloaders is None
+ downloaders if isinstance(downloaders, str) or downloaders is None
else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default')))
if external_downloader is None:
diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py
index 7cef3e8..72d4822 100644
--- a/hypervideo_dl/downloader/common.py
+++ b/hypervideo_dl/downloader/common.py
@@ -1,30 +1,39 @@
-from __future__ import division, unicode_literals
-
+import contextlib
+import errno
+import functools
import os
+import random
import re
import time
-import random
-import errno
+from ..minicurses import (
+ BreaklineStatusPrinter,
+ MultilineLogger,
+ MultilinePrinter,
+ QuietMultilinePrinter,
+)
from ..utils import (
+ IDENTITY,
+ NO_DEFAULT,
+ LockingUnsupportedError,
+ Namespace,
+ RetryManager,
+ classproperty,
decodeArgument,
encodeFilename,
- error_to_compat_str,
format_bytes,
+ join_nonempty,
+ parse_bytes,
+ remove_start,
sanitize_open,
shell_quote,
timeconvert,
timetuple_from_msec,
-)
-from ..minicurses import (
- MultilineLogger,
- MultilinePrinter,
- QuietMultilinePrinter,
- BreaklineStatusPrinter
+ try_call,
)
-class FileDownloader(object):
+class FileDownloader:
"""File Downloader class.
File downloader objects are the ones responsible of downloading the
@@ -39,6 +48,7 @@ class FileDownloader(object):
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
ratelimit: Download speed limit, in bytes/sec.
+ continuedl: Attempt to continue downloads if possible
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
file_access_retries: Number of times to retry on file access error
@@ -62,6 +72,7 @@ class FileDownloader(object):
useful for bypassing bandwidth throttling imposed by
a webserver (experimental)
progress_template: See YoutubeDL.py
+ retry_sleep_functions: See YoutubeDL.py
Subclasses of this one must re-define the real_download method.
"""
@@ -71,21 +82,51 @@ class FileDownloader(object):
def __init__(self, ydl, params):
"""Create a FileDownloader object with the given options."""
- self.ydl = ydl
+ self._set_ydl(ydl)
self._progress_hooks = []
self.params = params
self._prepare_multiline_status()
self.add_progress_hook(self.report_progress)
+ def _set_ydl(self, ydl):
+ self.ydl = ydl
+
+ for func in (
+ 'deprecation_warning',
+ 'deprecated_feature',
+ 'report_error',
+ 'report_file_already_downloaded',
+ 'report_warning',
+ 'to_console_title',
+ 'to_stderr',
+ 'trouble',
+ 'write_debug',
+ ):
+ if not hasattr(self, func):
+ setattr(self, func, getattr(ydl, func))
+
+ def to_screen(self, *args, **kargs):
+ self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
+
+ __to_screen = to_screen
+
+ @classproperty
+ def FD_NAME(cls):
+ return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
+
@staticmethod
def format_seconds(seconds):
+ if seconds is None:
+ return ' Unknown'
time = timetuple_from_msec(seconds * 1000)
if time.hours > 99:
return '--:--:--'
- if not time.hours:
- return '%02d:%02d' % time[1:-1]
return '%02d:%02d:%02d' % time[:-1]
+ @classmethod
+ def format_eta(cls, seconds):
+ return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
+
@staticmethod
def calc_percent(byte_counter, data_len):
if data_len is None:
@@ -94,11 +135,7 @@ class FileDownloader(object):
@staticmethod
def format_percent(percent):
- if percent is None:
- return '---.-%'
- elif percent == 100:
- return '100%'
- return '%6s' % ('%3.1f%%' % percent)
+ return ' N/A%' if percent is None else f'{percent:>5.1f}%'
@staticmethod
def calc_eta(start, now, total, current):
@@ -113,12 +150,6 @@ class FileDownloader(object):
return int((float(total) - float(current)) / rate)
@staticmethod
- def format_eta(eta):
- if eta is None:
- return '--:--'
- return FileDownloader.format_seconds(eta)
-
- @staticmethod
def calc_speed(start, now, bytes):
dif = now - start
if bytes == 0 or dif < 0.001: # One millisecond
@@ -127,13 +158,11 @@ class FileDownloader(object):
@staticmethod
def format_speed(speed):
- if speed is None:
- return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % format_bytes(speed))
+ return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
@staticmethod
def format_retries(retries):
- return 'inf' if retries == float('inf') else '%.0f' % retries
+ return 'inf' if retries == float('inf') else int(retries)
@staticmethod
def best_block_size(elapsed_time, bytes):
@@ -151,33 +180,7 @@ class FileDownloader(object):
@staticmethod
def parse_bytes(bytestr):
"""Parse a string indicating a byte quantity into an integer."""
- matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
- if matchobj is None:
- return None
- number = float(matchobj.group(1))
- multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
- return int(round(number * multiplier))
-
- def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
-
- def to_stderr(self, message):
- self.ydl.to_stderr(message)
-
- def to_console_title(self, message):
- self.ydl.to_console_title(message)
-
- def trouble(self, *args, **kargs):
- self.ydl.trouble(*args, **kargs)
-
- def report_warning(self, *args, **kargs):
- self.ydl.report_warning(*args, **kargs)
-
- def report_error(self, *args, **kargs):
- self.ydl.report_error(*args, **kargs)
-
- def write_debug(self, *args, **kargs):
- self.ydl.write_debug(*args, **kargs)
+ parse_bytes(bytestr)
def slow_down(self, start_time, now, byte_counter):
"""Sleep if the download speed is over the rate limit."""
@@ -211,30 +214,31 @@ class FileDownloader(object):
return filename + '.ytdl'
def wrap_file_access(action, *, fatal=False):
- def outer(func):
- def inner(self, *args, **kwargs):
- file_access_retries = self.params.get('file_access_retries', 0)
- retry = 0
- while True:
- try:
- return func(self, *args, **kwargs)
- except (IOError, OSError) as err:
- retry = retry + 1
- if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
- if not fatal:
- self.report_error(f'unable to {action} file: {err}')
- return
- raise
- self.to_screen(
- f'[download] Unable to {action} file due to file access error. '
- f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
- time.sleep(0.01)
- return inner
- return outer
+ def error_callback(err, count, retries, *, fd):
+ return RetryManager.report_retry(
+ err, count, retries, info=fd.__to_screen,
+ warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
+ error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
+ sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
+
+ def wrapper(self, func, *args, **kwargs):
+ for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
+ try:
+ return func(self, *args, **kwargs)
+ except OSError as err:
+ if err.errno in (errno.EACCES, errno.EINVAL):
+ retry.error = err
+ continue
+ retry.error_callback(err, 1, 0)
+
+ return functools.partial(functools.partialmethod, wrapper)
@wrap_file_access('open', fatal=True)
def sanitize_open(self, filename, open_mode):
- return sanitize_open(filename, open_mode)
+ f, filename = sanitize_open(filename, open_mode)
+ if not getattr(f, 'locked', None):
+ self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
+ return f, filename
@wrap_file_access('remove')
def try_remove(self, filename):
@@ -261,10 +265,8 @@ class FileDownloader(object):
# Ignore obviously invalid dates
if filetime == 0:
return
- try:
+ with contextlib.suppress(Exception):
os.utime(filename, (time.time(), filetime))
- except Exception:
- pass
return filetime
def report_destination(self, filename):
@@ -277,26 +279,26 @@ class FileDownloader(object):
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
- self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
+ self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
else:
- self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
+ self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
def _finish_multiline_status(self):
self._multiline.end()
- _progress_styles = {
- 'downloaded_bytes': 'light blue',
- 'percent': 'light blue',
- 'eta': 'yellow',
- 'speed': 'green',
- 'elapsed': 'bold white',
- 'total_bytes': '',
- 'total_bytes_estimate': '',
- }
+ ProgressStyles = Namespace(
+ downloaded_bytes='light blue',
+ percent='light blue',
+ eta='yellow',
+ speed='green',
+ elapsed='bold white',
+ total_bytes='',
+ total_bytes_estimate='',
+ )
def _report_progress_status(self, s, default_template):
- for name, style in self._progress_styles.items():
+ for name, style in self.ProgressStyles.items_:
name = f'_{name}_str'
if name not in s:
continue
@@ -320,78 +322,73 @@ class FileDownloader(object):
self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
def report_progress(self, s):
+ def with_fields(*tups, default=''):
+ for *fields, tmpl in tups:
+ if all(s.get(f) is not None for f in fields):
+ return tmpl
+ return default
+
+ _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
+
if s['status'] == 'finished':
if self.params.get('noprogress'):
self.to_screen('[download] Download completed')
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- s['_percent_str'] = self.format_percent(100)
- self._report_progress_status(s, msg_template)
- return
+ speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
+ s.update({
+ 'speed': speed,
+ '_speed_str': self.format_speed(speed).strip(),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ '_percent_str': self.format_percent(100),
+ })
+ self._report_progress_status(s, join_nonempty(
+ '100%%',
+ with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
+ with_fields(('elapsed', 'in %(_elapsed_str)s')),
+ with_fields(('speed', 'at %(_speed_str)s')),
+ delim=' '))
if s['status'] != 'downloading':
return
- if s.get('eta') is not None:
- s['_eta_str'] = self.format_eta(s['eta'])
- else:
- s['_eta_str'] = 'Unknown'
-
- if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
- elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
- else:
- if s.get('downloaded_bytes') == 0:
- s['_percent_str'] = self.format_percent(0)
- else:
- s['_percent_str'] = 'Unknown %'
-
- if s.get('speed') is not None:
- s['_speed_str'] = self.format_speed(s['speed'])
- else:
- s['_speed_str'] = 'Unknown speed'
-
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
- elif s.get('total_bytes_estimate') is not None:
- s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
- msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
- else:
- if s.get('downloaded_bytes') is not None:
- s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
- if s.get('elapsed'):
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
- else:
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
- else:
- msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
- if s.get('fragment_index') and s.get('fragment_count'):
- msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
- elif s.get('fragment_index'):
- msg_template += ' (frag %(fragment_index)s)'
+ s.update({
+ '_eta_str': self.format_eta(s.get('eta')).strip(),
+ '_speed_str': self.format_speed(s.get('speed')),
+ '_percent_str': self.format_percent(try_call(
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
+ lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
+ lambda: s['downloaded_bytes'] == 0 and 0)),
+ '_total_bytes_str': _format_bytes('total_bytes'),
+ '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
+ '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
+ '_elapsed_str': self.format_seconds(s.get('elapsed')),
+ })
+
+ msg_template = with_fields(
+ ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
+ ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
+ ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
+ default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
+
+ msg_template += with_fields(
+ ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
+ ('fragment_index', ' (frag %(fragment_index)s)'))
self._report_progress_status(s, msg_template)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
self.to_screen('[download] Resuming download at byte %s' % resume_len)
- def report_retry(self, err, count, retries):
- """Report retry in case of HTTP error 5xx"""
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
- % (error_to_compat_str(err), count, self.format_retries(retries)))
-
- def report_file_already_downloaded(self, *args, **kwargs):
- """Report file has already been fully downloaded."""
- return self.ydl.report_file_already_downloaded(*args, **kwargs)
+ def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
+ """Report retry"""
+ is_frag = False if frag_index is NO_DEFAULT else 'fragment'
+ RetryManager.report_retry(
+ err, count, retries, info=self.__to_screen,
+ warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
+ error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
+ sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
+ suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
def report_unable_to_resume(self):
"""Report it was impossible to resume download."""
@@ -431,25 +428,16 @@ class FileDownloader(object):
self._finish_multiline_status()
return True, False
- if subtitle is False:
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds ...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
+ if subtitle:
+ sleep_interval = self.params.get('sleep_interval_subtitles') or 0
else:
- sleep_interval_sub = 0
- if type(self.params.get('sleep_interval_subtitles')) is int:
- sleep_interval_sub = self.params.get('sleep_interval_subtitles')
- if sleep_interval_sub > 0:
- self.to_screen(
- '[download] Sleeping %s seconds ...' % (
- sleep_interval_sub))
- time.sleep(sleep_interval_sub)
+ min_sleep_interval = self.params.get('sleep_interval') or 0
+ sleep_interval = random.uniform(
+ min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
+ if sleep_interval > 0:
+ self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
+ time.sleep(sleep_interval)
+
ret = self.real_download(filename, info_dict)
self._finish_multiline_status()
return ret, True
@@ -459,8 +447,7 @@ class FileDownloader(object):
raise NotImplementedError('This method must be implemented by subclasses')
def _hook_progress(self, status, info_dict):
- if not self._progress_hooks:
- return
+ # Ideally we want to make a copy of the dict, but that is too slow
status['info_dict'] = info_dict
# youtube-dl passes the same status object to all the hooks.
# Some third party scripts seems to be relying on this.
@@ -482,4 +469,4 @@ class FileDownloader(object):
if exe is None:
exe = os.path.basename(str_args[0])
- self.write_debug('%s command line: %s' % (exe, shell_quote(str_args)))
+ self.write_debug(f'{exe} command line: {shell_quote(str_args)}')
diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py
index a845ee7..4328d73 100644
--- a/hypervideo_dl/downloader/dash.py
+++ b/hypervideo_dl/downloader/dash.py
@@ -1,10 +1,9 @@
-from __future__ import unicode_literals
import time
+import urllib.parse
-from ..downloader import get_suitable_downloader
+from . import get_suitable_downloader
from .fragment import FragmentFD
-
-from ..utils import urljoin
+from ..utils import update_url_query, urljoin
class DashSegmentsFD(FragmentFD):
@@ -42,24 +41,29 @@ class DashSegmentsFD(FragmentFD):
self._prepare_and_start_frag_download(ctx, fmt)
ctx['start'] = real_start
- fragments_to_download = self._get_fragments(fmt, ctx)
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
+
+ fragments_to_download = self._get_fragments(fmt, ctx, extra_query)
if real_downloader:
self.to_screen(
- '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
info_dict['fragments'] = list(fragments_to_download)
fd = real_downloader(self.ydl, self.params)
return fd.real_download(filename, info_dict)
args.append([ctx, fragments_to_download, fmt])
- return self.download_and_append_fragments_multiple(*args)
+ return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0)
def _resolve_fragments(self, fragments, ctx):
fragments = fragments(ctx) if callable(fragments) else fragments
return [next(iter(fragments))] if self.params.get('test') else fragments
- def _get_fragments(self, fmt, ctx):
+ def _get_fragments(self, fmt, ctx, extra_query):
fragment_base_url = fmt.get('fragment_base_url')
fragments = self._resolve_fragments(fmt['fragments'], ctx)
@@ -72,9 +76,12 @@ class DashSegmentsFD(FragmentFD):
if not fragment_url:
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
+ if extra_query:
+ fragment_url = update_url_query(fragment_url, extra_query)
yield {
'frag_index': frag_index,
+ 'fragment_count': fragment.get('fragment_count'),
'index': i,
'url': fragment_url,
}
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py
index b99dc37..75257a7 100644
--- a/hypervideo_dl/downloader/external.py
+++ b/hypervideo_dl/downloader/external.py
@@ -1,5 +1,4 @@
-from __future__ import unicode_literals
-
+import enum
import os.path
import re
import subprocess
@@ -7,30 +6,35 @@ import sys
import time
from .fragment import FragmentFD
-from ..compat import (
- compat_setenv,
- compat_str,
-)
-from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
+from ..compat import functools
+from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor
from ..utils import (
+ Popen,
+ RetryManager,
+ _configuration_args,
+ check_executable,
classproperty,
+ cli_bool_option,
cli_option,
cli_valueless_option,
- cli_bool_option,
- _configuration_args,
determine_ext,
- encodeFilename,
encodeArgument,
+ encodeFilename,
handle_youtubedl_headers,
- check_executable,
- Popen,
remove_end,
+ traverse_obj,
)
+class Features(enum.Enum):
+ TO_STDOUT = enum.auto()
+ MULTIPLE_FORMATS = enum.auto()
+
+
class ExternalFD(FragmentFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps')
- can_download_to_stdout = False
+ SUPPORTED_FEATURES = ()
+ _CAPTURE_STDERR = True
def real_download(self, filename, info_dict):
self.report_destination(filename)
@@ -56,7 +60,7 @@ class ExternalFD(FragmentFD):
}
if filename != '-':
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
+ self.to_screen(f'\r[{self.get_basename()}] Downloaded {fsize} bytes')
self.try_rename(tmpfilename, filename)
status.update({
'downloaded_bytes': fsize,
@@ -78,7 +82,7 @@ class ExternalFD(FragmentFD):
def EXE_NAME(cls):
return cls.get_basename()
- @property
+ @functools.cached_property
def exe(self):
return self.EXE_NAME
@@ -94,9 +98,11 @@ class ExternalFD(FragmentFD):
@classmethod
def supports(cls, info_dict):
- return (
- (cls.can_download_to_stdout or not info_dict.get('to_stdout'))
- and info_dict['protocol'] in cls.SUPPORTED_PROTOCOLS)
+ return all((
+ not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES,
+ '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES,
+ all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')),
+ ))
@classmethod
def can_download(cls, info_dict, path=None):
@@ -123,33 +129,28 @@ class ExternalFD(FragmentFD):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
- p = Popen(cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate_or_kill()
- if p.returncode != 0:
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- return p.returncode
+ _, stderr, returncode = Popen.run(
+ cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None)
+ if returncode and stderr:
+ self.to_stderr(stderr)
+ return returncode
- fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- count = 0
- while count <= fragment_retries:
- p = Popen(cmd, stderr=subprocess.PIPE)
- _, stderr = p.communicate_or_kill()
- if p.returncode == 0:
+ retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
+ frag_index=None, fatal=not skip_unavailable_fragments)
+ for retry in retry_manager:
+ _, stderr, returncode = Popen.run(cmd, text=True, stderr=subprocess.PIPE)
+ if not returncode:
break
# TODO: Decide whether to retry based on error code
# https://aria2.github.io/manual/en/html/aria2c.html#exit-status
- self.to_stderr(stderr.decode('utf-8', 'replace'))
- count += 1
- if count <= fragment_retries:
- self.to_screen(
- '[%s] Got error. Retrying fragments (attempt %d of %s)...'
- % (self.get_basename(), count, self.format_retries(fragment_retries)))
- if count > fragment_retries:
- if not skip_unavailable_fragments:
- self.report_error('Giving up after %s fragment retries' % fragment_retries)
- return -1
+ if stderr:
+ self.to_stderr(stderr)
+ retry.error = Exception()
+ continue
+ if not skip_unavailable_fragments and retry_manager.error:
+ return -1
decrypt_fragment = self.decrypter(info_dict)
dest, _ = self.sanitize_open(tmpfilename, 'wb')
@@ -157,7 +158,7 @@ class ExternalFD(FragmentFD):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
try:
src, _ = self.sanitize_open(fragment_filename, 'rb')
- except IOError as err:
+ except OSError as err:
if skip_unavailable_fragments and frag_index > 1:
self.report_skip_fragment(frag_index, err)
continue
@@ -174,12 +175,13 @@ class ExternalFD(FragmentFD):
class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
+ _CAPTURE_STDERR = False # curl writes the progress to stderr
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += ['--header', f'{key}: {val}']
cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
cmd += self._valueless_option('--silent', 'noprogress')
@@ -198,16 +200,6 @@ class CurlFD(ExternalFD):
cmd += ['--', info_dict['url']]
return cmd
- def _call_downloader(self, tmpfilename, info_dict):
- cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
-
- self._debug_cmd(cmd)
-
- # curl writes the progress to stderr so don't capture it.
- p = Popen(cmd)
- p.communicate_or_kill()
- return p.returncode
-
class AxelFD(ExternalFD):
AVAILABLE_OPT = '-V'
@@ -216,7 +208,7 @@ class AxelFD(ExternalFD):
cmd = [self.exe, '-o', tmpfilename]
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['-H', '%s: %s' % (key, val)]
+ cmd += ['-H', f'{key}: {val}']
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -229,7 +221,7 @@ class WgetFD(ExternalFD):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += ['--header', f'{key}: {val}']
cmd += self._option('--limit-rate', 'ratelimit')
retry = self._option('--tries', 'retries')
if len(retry) == 2:
@@ -240,7 +232,7 @@ class WgetFD(ExternalFD):
proxy = self.params.get('proxy')
if proxy:
for var in ('http_proxy', 'https_proxy'):
- cmd += ['--execute', '%s=%s' % (var, proxy)]
+ cmd += ['--execute', f'{var}={proxy}']
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
@@ -260,6 +252,10 @@ class Aria2cFD(ExternalFD):
check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES)
return all(check_results)
+ @staticmethod
+ def _aria2c_filename(fn):
+ return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}'
+
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
@@ -271,7 +267,7 @@ class Aria2cFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['--header', '%s: %s' % (key, val)]
+ cmd += ['--header', f'{key}: {val}']
cmd += self._option('--max-overall-download-limit', 'ratelimit')
cmd += self._option('--interface', 'source_address')
cmd += self._option('--all-proxy', 'proxy')
@@ -288,11 +284,9 @@ class Aria2cFD(ExternalFD):
# https://github.com/aria2/aria2/issues/1373
dn = os.path.dirname(tmpfilename)
if dn:
- if not os.path.isabs(dn):
- dn = '.%s%s' % (os.path.sep, dn)
- cmd += ['--dir', dn + os.path.sep]
+ cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep]
if 'fragments' not in info_dict:
- cmd += ['--out', '.%s%s' % (os.path.sep, os.path.basename(tmpfilename))]
+ cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))]
cmd += ['--auto-file-renaming=false']
if 'fragments' in info_dict:
@@ -301,11 +295,11 @@ class Aria2cFD(ExternalFD):
url_list = []
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
- url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
+ url_list.append('%s\n\tout=%s' % (fragment['url'], self._aria2c_filename(fragment_filename)))
stream, _ = self.sanitize_open(url_list_file, 'wb')
- stream.write('\n'.join(url_list).encode('utf-8'))
+ stream.write('\n'.join(url_list).encode())
stream.close()
- cmd += ['-i', url_list_file]
+ cmd += ['-i', self._aria2c_filename(url_list_file)]
else:
cmd += ['--', info_dict['url']]
return cmd
@@ -320,13 +314,13 @@ class HttpieFD(ExternalFD):
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
- cmd += ['%s:%s' % (key, val)]
+ cmd += [f'{key}:{val}']
return cmd
class FFmpegFD(ExternalFD):
SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments')
- can_download_to_stdout = True
+ SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS)
@classmethod
def available(cls, path=None):
@@ -334,10 +328,6 @@ class FFmpegFD(ExternalFD):
# Fixme: This may be wrong when --ffmpeg-location is used
return FFmpegPostProcessor().available
- @classmethod
- def supports(cls, info_dict):
- return all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+'))
-
def on_process_started(self, proc, stdin):
""" Override this in subclasses """
pass
@@ -368,9 +358,11 @@ class FFmpegFD(ExternalFD):
if not self.params.get('verbose'):
args += ['-hide_banner']
- args += info_dict.get('_ffmpeg_args', [])
+ args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args'), default=[])
- # This option exists only for compatibility. Extractors should use `_ffmpeg_args` instead
+ # These exists only for compatibility. Extractors should use
+ # info_dict['downloader_options']['ffmpeg_args'] instead
+ args += info_dict.get('_ffmpeg_args') or []
seekable = info_dict.get('_seekable')
if seekable is not None:
# setting -seekable prevents ffmpeg from guessing if the server
@@ -380,20 +372,15 @@ class FFmpegFD(ExternalFD):
# http://trac.ffmpeg.org/ticket/6125#comment:10
args += ['-seekable', '1' if seekable else '0']
- # start_time = info_dict.get('start_time') or 0
- # if start_time:
- # args += ['-ss', compat_str(start_time)]
- # end_time = info_dict.get('end_time')
- # if end_time:
- # args += ['-t', compat_str(end_time - start_time)]
-
- if info_dict.get('http_headers') is not None and re.match(r'^https?://', urls[0]):
- # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
- # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
- headers = handle_youtubedl_headers(info_dict['http_headers'])
- args += [
+ http_headers = None
+ if info_dict.get('http_headers'):
+ youtubedl_headers = handle_youtubedl_headers(info_dict['http_headers'])
+ http_headers = [
+ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+ # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
'-headers',
- ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+ ''.join(f'{key}: {val}\r\n' for key, val in youtubedl_headers.items())
+ ]
env = None
proxy = self.params.get('proxy')
@@ -411,8 +398,8 @@ class FFmpegFD(ExternalFD):
# We could switch to the following code if we are able to detect version properly
# args += ['-http_proxy', proxy]
env = os.environ.copy()
- compat_setenv('HTTP_PROXY', proxy, env=env)
- compat_setenv('http_proxy', proxy, env=env)
+ env['HTTP_PROXY'] = proxy
+ env['http_proxy'] = proxy
protocol = info_dict.get('protocol')
@@ -442,20 +429,31 @@ class FFmpegFD(ExternalFD):
if isinstance(conn, list):
for entry in conn:
args += ['-rtmp_conn', entry]
- elif isinstance(conn, compat_str):
+ elif isinstance(conn, str):
args += ['-rtmp_conn', conn]
+ start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end')
+
for i, url in enumerate(urls):
+ if http_headers is not None and re.match(r'^https?://', url):
+ args += http_headers
+ if start_time:
+ args += ['-ss', str(start_time)]
+ if end_time:
+ args += ['-t', str(end_time - start_time)]
+
args += self._configuration_args((f'_i{i + 1}', '_i')) + ['-i', url]
- args += ['-c', 'copy']
+ if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'):
+ args += ['-c', 'copy']
+
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
stream_number = fmt.get('manifest_stream_number', 0)
args.extend(['-map', f'{i}:{stream_number}'])
if self.params.get('test', False):
- args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
+ args += ['-fs', str(self._TEST_FILE_SIZE)]
ext = info_dict['ext']
if protocol in ('m3u8', 'm3u8_native'):
@@ -490,24 +488,23 @@ class FFmpegFD(ExternalFD):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
- proc = Popen(args, stdin=subprocess.PIPE, env=env)
- if url in ('-', 'pipe:'):
- self.on_process_started(proc, proc.stdin)
- try:
- retval = proc.wait()
- except BaseException as e:
- # subprocces.run would send the SIGKILL signal to ffmpeg and the
- # mp4 file couldn't be played, but if we ask ffmpeg to quit it
- # produces a file that is playable (this is mostly useful for live
- # streams). Note that Windows is not affected and produces playable
- # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
- if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
- proc.communicate_or_kill(b'q')
- else:
- proc.kill()
- proc.wait()
- raise
- return retval
+ with Popen(args, stdin=subprocess.PIPE, env=env) as proc:
+ if url in ('-', 'pipe:'):
+ self.on_process_started(proc, proc.stdin)
+ try:
+ retval = proc.wait()
+ except BaseException as e:
+ # subprocces.run would send the SIGKILL signal to ffmpeg and the
+ # mp4 file couldn't be played, but if we ask ffmpeg to quit it
+ # produces a file that is playable (this is mostly useful for live
+ # streams). Note that Windows is not affected and produces playable
+ # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
+ if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
+ proc.communicate_or_kill(b'q')
+ else:
+ proc.kill(timeout=None)
+ raise
+ return retval
class AVconvFD(FFmpegFD):
@@ -520,16 +517,14 @@ _BY_NAME = {
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
}
-_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
-
def list_external_downloaders():
return sorted(_BY_NAME.keys())
def get_external_downloader(external_downloader):
- """ Given the name of the executable, see whether we support the given
- downloader . """
- # Drop .exe extension on Windows
+ """ Given the name of the executable, see whether we support the given downloader """
bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME.get(bn, _BY_EXE.get(bn))
+ return _BY_NAME.get(bn) or next((
+ klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn
+ ), None)
diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py
index 0008b7c..306f921 100644
--- a/hypervideo_dl/downloader/f4m.py
+++ b/hypervideo_dl/downloader/f4m.py
@@ -1,23 +1,14 @@
-from __future__ import division, unicode_literals
-
+import base64
import io
import itertools
+import struct
import time
+import urllib.error
+import urllib.parse
from .fragment import FragmentFD
-from ..compat import (
- compat_b64decode,
- compat_etree_fromstring,
- compat_urlparse,
- compat_urllib_error,
- compat_urllib_parse_urlparse,
- compat_struct_pack,
- compat_struct_unpack,
-)
-from ..utils import (
- fix_xml_ampersands,
- xpath_text,
-)
+from ..compat import compat_etree_fromstring
+from ..utils import fix_xml_ampersands, xpath_text
class DataTruncatedError(Exception):
@@ -40,13 +31,13 @@ class FlvReader(io.BytesIO):
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
- return compat_struct_unpack('!Q', self.read_bytes(8))[0]
+ return struct.unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
- return compat_struct_unpack('!I', self.read_bytes(4))[0]
+ return struct.unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
- return compat_struct_unpack('!B', self.read_bytes(1))[0]
+ return struct.unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
@@ -193,7 +184,7 @@ def build_fragments_list(boot_info):
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
- # In some live HDS streams (for example Rai), `fragments_count` is
+ # In some live HDS streams (e.g. Rai), `fragments_count` is
# abnormal and causing out-of-memory errors. It's OK to change the
# number of fragments for live streams as they are updated periodically
if fragments_count == 4294967295 and boot_info['live']:
@@ -208,11 +199,11 @@ def build_fragments_list(boot_info):
def write_unsigned_int(stream, val):
- stream.write(compat_struct_pack('!I', val))
+ stream.write(struct.pack('!I', val))
def write_unsigned_int_24(stream, val):
- stream.write(compat_struct_pack('!I', val)[1:])
+ stream.write(struct.pack('!I', val)[1:])
def write_flv_header(stream):
@@ -261,8 +252,6 @@ class F4mFD(FragmentFD):
A downloader for f4m manifests or AdobeHDS.
"""
- FD_NAME = 'f4m'
-
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
@@ -308,12 +297,12 @@ class F4mFD(FragmentFD):
# 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m
bootstrap_url = node.get('url')
if bootstrap_url:
- bootstrap_url = compat_urlparse.urljoin(
+ bootstrap_url = urllib.parse.urljoin(
base_url, bootstrap_url)
boot_info = self._get_bootstrap_from_url(bootstrap_url)
else:
bootstrap_url = None
- bootstrap = compat_b64decode(node.text)
+ bootstrap = base64.b64decode(node.text)
boot_info = read_bootstrap_info(bootstrap)
return boot_info, bootstrap_url
@@ -343,14 +332,14 @@ class F4mFD(FragmentFD):
# Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
man_base_url = get_base_url(doc) or man_url
- base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
+ base_url = urllib.parse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
boot_info, bootstrap_url = self._parse_bootstrap_node(
bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
- metadata = compat_b64decode(metadata_node.text)
+ metadata = base64.b64decode(metadata_node.text)
else:
metadata = None
@@ -378,7 +367,7 @@ class F4mFD(FragmentFD):
if not live:
write_metadata_tag(dest_stream, metadata)
- base_url_parsed = compat_urllib_parse_urlparse(base_url)
+ base_url_parsed = urllib.parse.urlparse(base_url)
self._start_frag_download(ctx, info_dict)
@@ -398,9 +387,10 @@ class F4mFD(FragmentFD):
query.append(info_dict['extra_param_to_segment_url'])
url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
try:
- success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
+ success = self._download_fragment(ctx, url_parsed.geturl(), info_dict)
if not success:
return False
+ down_data = self._read_fragment(ctx)
reader = FlvReader(down_data)
while True:
try:
@@ -417,7 +407,7 @@ class F4mFD(FragmentFD):
if box_type == b'mdat':
self._append_fragment(ctx, box_data)
break
- except (compat_urllib_error.HTTPError, ) as err:
+ except urllib.error.HTTPError as err:
if live and (err.code == 404 or err.code == 410):
# We didn't keep up with the live window. Continue
# with the next available fragment.
@@ -434,6 +424,4 @@ class F4mFD(FragmentFD):
msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
self.report_warning(msg)
- self._finish_frag_download(ctx, info_dict)
-
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/fc2.py b/hypervideo_dl/downloader/fc2.py
index 157bcf2..f9763de 100644
--- a/hypervideo_dl/downloader/fc2.py
+++ b/hypervideo_dl/downloader/fc2.py
@@ -1,5 +1,3 @@
-from __future__ import division, unicode_literals
-
import threading
from .common import FileDownloader
@@ -20,6 +18,9 @@ class FC2LiveFD(FileDownloader):
heartbeat_state = [None, 1]
def heartbeat():
+ if heartbeat_state[1] < 0:
+ return
+
try:
heartbeat_state[1] += 1
ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
@@ -38,4 +39,8 @@ class FC2LiveFD(FileDownloader):
'ws': None,
'protocol': 'live_ffmpeg',
})
- return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
+ try:
+ return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
+ finally:
+ # stop heartbeating
+ heartbeat_state[1] = -1
diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py
index a991c6d..e61bd0e 100644
--- a/hypervideo_dl/downloader/fragment.py
+++ b/hypervideo_dl/downloader/fragment.py
@@ -1,28 +1,20 @@
-from __future__ import division, unicode_literals
-
+import concurrent.futures
+import contextlib
import http.client
import json
import math
import os
+import struct
import time
-
-try:
- import concurrent.futures
- can_threaded_download = True
-except ImportError:
- can_threaded_download = False
+import urllib.error
from .common import FileDownloader
from .http import HttpFD
from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
-from ..compat import (
- compat_os_name,
- compat_urllib_error,
- compat_struct_pack,
-)
+from ..compat import compat_os_name
from ..utils import (
DownloadError,
- error_to_compat_str,
+ RetryManager,
encodeFilename,
sanitized_Request,
traverse_obj,
@@ -33,9 +25,7 @@ class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
- def report_retry(self, err, count, retries):
- super().to_screen(
- f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
+ to_console_title = to_screen
class FragmentFD(FileDownloader):
@@ -75,9 +65,9 @@ class FragmentFD(FileDownloader):
"""
def report_retry_fragment(self, err, frag_index, count, retries):
- self.to_screen(
- '\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
- % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
+ self.deprecation_warning('hypervideo_dl.downloader.FragmentFD.report_retry_fragment is deprecated. '
+ 'Use hypervideo_dl.downloader.FileDownloader.report_retry instead')
+ return self.report_retry(err, count, retries, frag_index)
def report_skip_fragment(self, frag_index, err=None):
err = f' {err};' if err else ''
@@ -131,7 +121,7 @@ class FragmentFD(FileDownloader):
'request_data': request_data,
'ctx_id': ctx.get('ctx_id'),
}
- success = ctx['dl'].download(fragment_filename, fragment_info_dict)
+ success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
return False
if fragment_info_dict.get('filetime'):
@@ -140,6 +130,8 @@ class FragmentFD(FileDownloader):
return True
def _read_fragment(self, ctx):
+ if not ctx.get('fragment_filename_sanitized'):
+ return None
try:
down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
except FileNotFoundError:
@@ -172,21 +164,13 @@ class FragmentFD(FileDownloader):
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
self.report_destination(ctx['filename'])
- dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': self.params.get('continuedl', True),
- 'quiet': self.params.get('quiet'),
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit'),
- 'retries': self.params.get('retries', 0),
- 'nopart': self.params.get('nopart', False),
- 'test': self.params.get('test', False),
- }
- )
+ dl = HttpQuietDownloader(self.ydl, {
+ **self.params,
+ 'noprogress': True,
+ 'test': False,
+ })
tmpfilename = self.temp_name(ctx['filename'])
open_mode = 'wb'
resume_len = 0
@@ -259,6 +243,9 @@ class FragmentFD(FileDownloader):
if s['status'] not in ('downloading', 'finished'):
return
+ if not total_frags and ctx.get('fragment_count'):
+ state['fragment_count'] = ctx['fragment_count']
+
if ctx_id is not None and s.get('ctx_id') != ctx_id:
return
@@ -308,18 +295,23 @@ class FragmentFD(FileDownloader):
self.try_remove(ytdl_filename)
elapsed = time.time() - ctx['started']
- if ctx['tmpfilename'] == '-':
- downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+ to_file = ctx['tmpfilename'] != '-'
+ if to_file:
+ downloaded_bytes = os.path.getsize(encodeFilename(ctx['tmpfilename']))
else:
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+
+ if not downloaded_bytes:
+ if to_file:
+ self.try_remove(ctx['tmpfilename'])
+ self.report_error('The downloaded file is empty')
+ return False
+ elif to_file:
self.try_rename(ctx['tmpfilename'], ctx['filename'])
- if self.params.get('updatetime', True):
- filetime = ctx.get('fragment_filetime')
- if filetime:
- try:
- os.utime(ctx['filename'], (time.time(), filetime))
- except Exception:
- pass
- downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+ filetime = ctx.get('fragment_filetime')
+ if self.params.get('updatetime', True) and filetime:
+ with contextlib.suppress(Exception):
+ os.utime(ctx['filename'], (time.time(), filetime))
self._hook_progress({
'downloaded_bytes': downloaded_bytes,
@@ -331,6 +323,7 @@ class FragmentFD(FileDownloader):
'max_progress': ctx.get('max_progress'),
'progress_idx': ctx.get('progress_idx'),
}, info_dict)
+ return True
def _prepare_external_frag_download(self, ctx):
if 'live' not in ctx:
@@ -342,8 +335,7 @@ class FragmentFD(FileDownloader):
total_frags_str += ' (not including %d ad)' % ad_frags
else:
total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}')
tmpfilename = self.temp_name(ctx['filename'])
@@ -362,10 +354,12 @@ class FragmentFD(FileDownloader):
return _key_cache[url]
def decrypt_fragment(fragment, frag_content):
+ if frag_content is None:
+ return
decrypt_info = fragment.get('decrypt_info')
if not decrypt_info or decrypt_info['METHOD'] != 'AES-128':
return frag_content
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', fragment['media_sequence'])
+ iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence'])
decrypt_info['KEY'] = decrypt_info.get('KEY') or _get_key(info_dict.get('_decryption_key_url') or decrypt_info['URI'])
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
@@ -376,7 +370,7 @@ class FragmentFD(FileDownloader):
return decrypt_fragment
- def download_and_append_fragments_multiple(self, *args, pack_func=None, finish_func=None):
+ def download_and_append_fragments_multiple(self, *args, **kwargs):
'''
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
@@ -384,7 +378,7 @@ class FragmentFD(FileDownloader):
interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
- return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
+ return self.download_and_append_fragments(*args[0], **kwargs)
max_workers = self.params.get('concurrent_fragment_downloads', 1)
if max_progress > 1:
self._prepare_multiline_status(max_progress)
@@ -394,8 +388,7 @@ class FragmentFD(FileDownloader):
ctx['max_progress'] = max_progress
ctx['progress_idx'] = idx
return self.download_and_append_fragments(
- ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func,
- tpe=tpe, interrupt_trigger=interrupt_trigger)
+ ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger)
class FTPE(concurrent.futures.ThreadPoolExecutor):
# has to stop this or it's going to wait on the worker thread itself
@@ -442,18 +435,12 @@ class FragmentFD(FileDownloader):
return result
def download_and_append_fragments(
- self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None,
- tpe=None, interrupt_trigger=None):
- if not interrupt_trigger:
- interrupt_trigger = (True, )
-
- fragment_retries = self.params.get('fragment_retries', 0)
- is_fatal = (
- ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0))
- if self.params.get('skip_unavailable_fragments', True) else (lambda _: True))
+ self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False),
+ pack_func=(lambda content, idx: content), finish_func=None,
+ tpe=None, interrupt_trigger=(True, )):
- if not pack_func:
- pack_func = lambda frag_content, _: frag_content
+ if not self.params.get('skip_unavailable_fragments', True):
+ is_fatal = lambda _: True
def download_fragment(fragment, ctx):
if not interrupt_trigger[0]:
@@ -467,31 +454,25 @@ class FragmentFD(FileDownloader):
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
# Never skip the first fragment
- fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
- while count <= fragment_retries:
- try:
- if self._download_fragment(ctx, fragment['url'], info_dict, headers):
- break
- return
- except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/ytdl-org/youtube-dl/issues/10165,
- # https://github.com/ytdl-org/youtube-dl/issues/10448).
- count += 1
- ctx['last_error'] = err
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- except DownloadError:
- # Don't retry fragment if error occurred during HTTP downloading
- # itself since it has own retry settings
- if not fatal:
- break
- raise
+ fatal = is_fatal(fragment.get('index') or (frag_index - 1))
- if count > fragment_retries and fatal:
- ctx['dest_stream'].close()
- self.report_error('Giving up after %s fragment retries' % fragment_retries)
+ def error_callback(err, count, retries):
+ if fatal and count > retries:
+ ctx['dest_stream'].close()
+ self.report_retry(err, count, retries, frag_index, fatal)
+ ctx['last_error'] = err
+
+ for retry in RetryManager(self.params.get('fragment_retries'), error_callback):
+ try:
+ ctx['fragment_count'] = fragment.get('fragment_count')
+ if not self._download_fragment(ctx, fragment['url'], info_dict, headers):
+ return
+ except (urllib.error.HTTPError, http.client.IncompleteRead) as err:
+ retry.error = err
+ continue
+ except DownloadError: # has own retry settings
+ if fatal:
+ raise
def append_fragment(frag_content, frag_index, ctx):
if frag_content:
@@ -508,8 +489,7 @@ class FragmentFD(FileDownloader):
max_workers = math.ceil(
self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
- if can_threaded_download and max_workers > 1:
-
+ if max_workers > 1:
def _download_fragment(fragment):
ctx_copy = ctx.copy()
download_fragment(fragment, ctx_copy)
@@ -517,23 +497,36 @@ class FragmentFD(FileDownloader):
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
- for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
- ctx['fragment_filename_sanitized'] = frag_filename
- ctx['fragment_index'] = frag_index
- result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
- if not result:
- return False
+ try:
+ for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ ctx.update({
+ 'fragment_filename_sanitized': frag_filename,
+ 'fragment_index': frag_index,
+ })
+ if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx):
+ return False
+ except KeyboardInterrupt:
+ self._finish_multiline_status()
+ self.report_error(
+ 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False)
+ pool.shutdown(wait=False)
+ raise
else:
for fragment in fragments:
if not interrupt_trigger[0]:
break
- download_fragment(fragment, ctx)
- result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
+ try:
+ download_fragment(fragment, ctx)
+ result = append_fragment(
+ decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
+ except KeyboardInterrupt:
+ if info_dict.get('is_live'):
+ break
+ raise
if not result:
return False
if finish_func is not None:
ctx['dest_stream'].write(finish_func())
ctx['dest_stream'].flush()
- self._finish_frag_download(ctx, info_dict)
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py
index f3f32b5..4520edc 100644
--- a/hypervideo_dl/downloader/hls.py
+++ b/hypervideo_dl/downloader/hls.py
@@ -1,23 +1,14 @@
-from __future__ import unicode_literals
-
-import re
-import io
import binascii
+import io
+import re
+import urllib.parse
-from ..downloader import get_suitable_downloader
-from .fragment import FragmentFD
+from . import get_suitable_downloader
from .external import FFmpegFD
-
-from ..compat import (
- compat_pycrypto_AES,
- compat_urlparse,
-)
-from ..utils import (
- parse_m3u8_attributes,
- update_url_query,
- bug_reports_message,
-)
+from .fragment import FragmentFD
from .. import webvtt
+from ..dependencies import Cryptodome_AES
+from ..utils import bug_reports_message, parse_m3u8_attributes, update_url_query
class HlsFD(FragmentFD):
@@ -70,12 +61,18 @@ class HlsFD(FragmentFD):
s = urlh.read().decode('utf-8', 'ignore')
can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None
- if can_download and not compat_pycrypto_AES and '#EXT-X-KEY:METHOD=AES-128' in s:
- if FFmpegFD.available():
+ if can_download:
+ has_ffmpeg = FFmpegFD.available()
+ no_crypto = not Cryptodome_AES and '#EXT-X-KEY:METHOD=AES-128' in s
+ if no_crypto and has_ffmpeg:
can_download, message = False, 'The stream has AES-128 encryption and pycryptodome is not available'
- else:
+ elif no_crypto:
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; '
'Decryption will be performed natively, but will be extremely slow')
+ elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
+ install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
+ message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
+ f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
if not can_download:
has_drm = re.search('|'.join([
r'#EXT-X-FAXS-CM:', # Adobe Flash Access
@@ -102,8 +99,7 @@ class HlsFD(FragmentFD):
if real_downloader and not real_downloader.supports_manifest(s):
real_downloader = None
if real_downloader:
- self.to_screen(
- '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}')
def is_ad_fragment_start(s):
return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
@@ -150,7 +146,7 @@ class HlsFD(FragmentFD):
extra_query = None
extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
if extra_param_to_segment_url:
- extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
+ extra_query = urllib.parse.parse_qs(extra_param_to_segment_url)
i = 0
media_sequence = 0
decrypt_info = {'METHOD': 'NONE'}
@@ -172,7 +168,7 @@ class HlsFD(FragmentFD):
frag_url = (
line
if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
+ else urllib.parse.urljoin(man_url, line))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
@@ -197,10 +193,18 @@ class HlsFD(FragmentFD):
frag_url = (
map_info.get('URI')
if re.match(r'^https?://', map_info.get('URI'))
- else compat_urlparse.urljoin(man_url, map_info.get('URI')))
+ else urllib.parse.urljoin(man_url, map_info.get('URI')))
if extra_query:
frag_url = update_url_query(frag_url, extra_query)
+ if map_info.get('BYTERANGE'):
+ splitted_byte_range = map_info.get('BYTERANGE').split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+
fragments.append({
'frag_index': frag_index,
'url': frag_url,
@@ -210,14 +214,6 @@ class HlsFD(FragmentFD):
})
media_sequence += 1
- if map_info.get('BYTERANGE'):
- splitted_byte_range = map_info.get('BYTERANGE').split('@')
- sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
- byte_range = {
- 'start': sub_range_start,
- 'end': sub_range_start + int(splitted_byte_range[0]),
- }
-
elif line.startswith('#EXT-X-KEY'):
decrypt_url = decrypt_info.get('URI')
decrypt_info = parse_m3u8_attributes(line[11:])
@@ -225,7 +221,7 @@ class HlsFD(FragmentFD):
if 'IV' in decrypt_info:
decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
if not re.match(r'^https?://', decrypt_info['URI']):
- decrypt_info['URI'] = compat_urlparse.urljoin(
+ decrypt_info['URI'] = urllib.parse.urljoin(
man_url, decrypt_info['URI'])
if extra_query:
decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
@@ -339,7 +335,7 @@ class HlsFD(FragmentFD):
continue
block.write_into(output)
- return output.getvalue().encode('utf-8')
+ return output.getvalue().encode()
def fin_fragments():
dedup_window = extra_state.get('webvtt_dedup_window')
@@ -350,7 +346,7 @@ class HlsFD(FragmentFD):
for cue in dedup_window:
webvtt.CueBlock.from_json(cue).write_into(output)
- return output.getvalue().encode('utf-8')
+ return output.getvalue().encode()
self.download_and_append_fragments(
ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments)
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py
index 591a9b0..95c870e 100644
--- a/hypervideo_dl/downloader/http.py
+++ b/hypervideo_dl/downloader/http.py
@@ -1,29 +1,33 @@
-from __future__ import unicode_literals
-
+import http.client
import os
+import random
+import socket
import ssl
import time
-import random
+import urllib.error
from .common import FileDownloader
-from ..compat import (
- compat_urllib_error,
- compat_http_client
-)
from ..utils import (
ContentTooShortError,
+ RetryManager,
+ ThrottledDownload,
+ XAttrMetadataError,
+ XAttrUnavailableError,
encodeFilename,
int_or_none,
parse_http_range,
sanitized_Request,
- ThrottledDownload,
try_call,
write_xattr,
- XAttrMetadataError,
- XAttrUnavailableError,
)
-RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException)
+RESPONSE_READ_EXCEPTIONS = (
+ TimeoutError,
+ socket.timeout, # compat: py < 3.10
+ ConnectionError,
+ ssl.SSLError,
+ http.client.HTTPException
+)
class HttpFD(FileDownloader):
@@ -69,9 +73,6 @@ class HttpFD(FileDownloader):
ctx.is_resume = ctx.resume_len > 0
- count = 0
- retries = self.params.get('retries', 0)
-
class SucceedDownload(Exception):
pass
@@ -134,19 +135,18 @@ class HttpFD(FileDownloader):
if has_range:
content_range = ctx.data.headers.get('Content-Range')
content_range_start, content_range_end, content_len = parse_http_range(content_range)
- if content_range_start is not None and range_start == content_range_start:
- # Content-Range is present and matches requested Range, resume is possible
- accept_content_len = (
+ # Content-Range is present and matches requested Range, resume is possible
+ if range_start == content_range_start and (
# Non-chunked download
not ctx.chunk_size
# Chunked download and requested piece or
# its part is promised to be served
or content_range_end == range_end
- or content_len < range_end)
- if accept_content_len:
- ctx.content_len = content_len
- ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0)
- return
+ or content_len < range_end):
+ ctx.content_len = content_len
+ if content_len or req_end:
+ ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0)
+ return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
@@ -154,7 +154,7 @@ class HttpFD(FileDownloader):
ctx.resume_len = 0
ctx.open_mode = 'wb'
ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
- except (compat_urllib_error.HTTPError, ) as err:
+ except urllib.error.HTTPError as err:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
try:
@@ -162,7 +162,7 @@ class HttpFD(FileDownloader):
ctx.data = self.ydl.urlopen(
sanitized_Request(url, request_data, headers))
content_length = ctx.data.info()['Content-Length']
- except (compat_urllib_error.HTTPError, ) as err:
+ except urllib.error.HTTPError as err:
if err.code < 500 or err.code >= 600:
raise
else:
@@ -195,7 +195,7 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
- except compat_urllib_error.URLError as err:
+ except urllib.error.URLError as err:
if isinstance(err.reason, ssl.CertificateError):
raise
raise RetryDownload(err)
@@ -204,6 +204,12 @@ class HttpFD(FileDownloader):
except RESPONSE_READ_EXCEPTIONS as err:
raise RetryDownload(err)
+ def close_stream():
+ if ctx.stream is not None:
+ if not ctx.tmpfilename == '-':
+ ctx.stream.close()
+ ctx.stream = None
+
def download():
data_len = ctx.data.info().get('Content-length', None)
@@ -220,10 +226,12 @@ class HttpFD(FileDownloader):
min_data_len = self.params.get('min_filesize')
max_data_len = self.params.get('max_filesize')
if min_data_len is not None and data_len < min_data_len:
- self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ self.to_screen(
+ f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.')
return False
if max_data_len is not None and data_len > max_data_len:
- self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ self.to_screen(
+ f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.')
return False
byte_counter = 0 + ctx.resume_len
@@ -235,12 +243,9 @@ class HttpFD(FileDownloader):
before = start # start measuring
def retry(e):
- to_stdout = ctx.tmpfilename == '-'
- if ctx.stream is not None:
- if not to_stdout:
- ctx.stream.close()
- ctx.stream = None
- ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
+ close_stream()
+ ctx.resume_len = (byte_counter if ctx.tmpfilename == '-'
+ else os.path.getsize(encodeFilename(ctx.tmpfilename)))
raise RetryDownload(e)
while True:
@@ -264,19 +269,19 @@ class HttpFD(FileDownloader):
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
self.report_destination(ctx.filename)
- except (OSError, IOError) as err:
+ except OSError as err:
self.report_error('unable to open for writing: %s' % str(err))
return False
if self.params.get('xattr_set_filesize', False) and data_len is not None:
try:
- write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode())
except (XAttrUnavailableError, XAttrMetadataError) as err:
self.report_error('unable to set filesize xattr: %s' % str(err))
try:
ctx.stream.write(data_block)
- except (IOError, OSError) as err:
+ except OSError as err:
self.to_stderr('\n')
self.report_error('unable to write data: %s' % str(err))
return False
@@ -342,9 +347,7 @@ class HttpFD(FileDownloader):
if data_len is not None and byte_counter != data_len:
err = ContentTooShortError(byte_counter, int(data_len))
- if count <= retries:
- retry(err)
- raise err
+ retry(err)
self.try_rename(ctx.tmpfilename, ctx.filename)
@@ -363,21 +366,20 @@ class HttpFD(FileDownloader):
return True
- while count <= retries:
+ for retry in RetryManager(self.params.get('retries'), self.report_retry):
try:
establish_connection()
return download()
- except RetryDownload as e:
- count += 1
- if count <= retries:
- self.report_retry(e.source_error, count, retries)
- else:
- self.to_screen(f'[download] Got server HTTP error: {e.source_error}')
+ except RetryDownload as err:
+ retry.error = err.source_error
continue
except NextFragment:
+ retry.error = None
+ retry.attempt -= 1
continue
except SucceedDownload:
return True
-
- self.report_error('giving up after %s retries' % retries)
+ except: # noqa: E722
+ close_stream()
+ raise
return False
diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py
index 4d5618c..a157a8a 100644
--- a/hypervideo_dl/downloader/ism.py
+++ b/hypervideo_dl/downloader/ism.py
@@ -1,27 +1,23 @@
-from __future__ import unicode_literals
-
-import time
import binascii
import io
+import struct
+import time
+import urllib.error
from .fragment import FragmentFD
-from ..compat import (
- compat_Struct,
- compat_urllib_error,
-)
+from ..utils import RetryManager
+u8 = struct.Struct('>B')
+u88 = struct.Struct('>Bx')
+u16 = struct.Struct('>H')
+u1616 = struct.Struct('>Hxx')
+u32 = struct.Struct('>I')
+u64 = struct.Struct('>Q')
-u8 = compat_Struct('>B')
-u88 = compat_Struct('>Bx')
-u16 = compat_Struct('>H')
-u1616 = compat_Struct('>Hxx')
-u32 = compat_Struct('>I')
-u64 = compat_Struct('>Q')
-
-s88 = compat_Struct('>bx')
-s16 = compat_Struct('>h')
-s1616 = compat_Struct('>hxx')
-s32 = compat_Struct('>i')
+s88 = struct.Struct('>bx')
+s16 = struct.Struct('>h')
+s1616 = struct.Struct('>hxx')
+s32 = struct.Struct('>i')
unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000)
@@ -142,6 +138,8 @@ def write_piff_header(stream, params):
if fourcc == 'AACL':
sample_entry_box = box(b'mp4a', sample_entry_payload)
+ if fourcc == 'EC-3':
+ sample_entry_box = box(b'ec-3', sample_entry_payload)
elif stream_type == 'video':
sample_entry_payload += u16.pack(0) # pre defined
sample_entry_payload += u16.pack(0) # reserved
@@ -156,7 +154,7 @@ def write_piff_header(stream, params):
sample_entry_payload += u16.pack(0x18) # depth
sample_entry_payload += s16.pack(-1) # pre defined
- codec_private_data = binascii.unhexlify(params['codec_private_data'].encode('utf-8'))
+ codec_private_data = binascii.unhexlify(params['codec_private_data'].encode())
if fourcc in ('H264', 'AVC1'):
sps, pps = codec_private_data.split(u32.pack(1))[1:]
avcc_payload = u8.pack(1) # configuration version
@@ -235,8 +233,6 @@ class IsmFD(FragmentFD):
Download segments in a ISM manifest
"""
- FD_NAME = 'ism'
-
def real_download(self, filename, info_dict):
segments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
@@ -252,7 +248,6 @@ class IsmFD(FragmentFD):
'ism_track_written': False,
})
- fragment_retries = self.params.get('fragment_retries', 0)
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
@@ -260,8 +255,10 @@ class IsmFD(FragmentFD):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
- count = 0
- while count <= fragment_retries:
+
+ retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry,
+ frag_index=frag_index, fatal=not skip_unavailable_fragments)
+ for retry in retry_manager:
try:
success = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
@@ -274,18 +271,13 @@ class IsmFD(FragmentFD):
write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
extra_state['ism_track_written'] = True
self._append_fragment(ctx, frag_content)
- break
- except compat_urllib_error.HTTPError as err:
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- self.report_skip_fragment(frag_index)
+ except urllib.error.HTTPError as err:
+ retry.error = err
continue
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False
- self._finish_frag_download(ctx, info_dict)
+ if retry_manager.error:
+ if not skip_unavailable_fragments:
+ return False
+ self.report_skip_fragment(frag_index)
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py
index c8332c0..170a78d 100644
--- a/hypervideo_dl/downloader/mhtml.py
+++ b/hypervideo_dl/downloader/mhtml.py
@@ -1,24 +1,15 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import io
import quopri
import re
import uuid
from .fragment import FragmentFD
-from ..utils import (
- escapeHTML,
- formatSeconds,
- srt_subtitles_timecode,
- urljoin,
-)
+from ..compat import imghdr
+from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin
from ..version import __version__ as YT_DLP_VERSION
class MhtmlFD(FragmentFD):
- FD_NAME = 'mhtml'
-
_STYLESHEET = """\
html, body {
margin: 0;
@@ -62,7 +53,7 @@ body > figure > img {
def _escape_mime(s):
return '=?utf-8?Q?' + (b''.join(
bytes((b,)) if b >= 0x20 else b'=%02X' % b
- for b in quopri.encodestring(s.encode('utf-8'), header=True)
+ for b in quopri.encodestring(s.encode(), header=True)
)).decode('us-ascii') + '?='
def _gen_cid(self, i, fragment, frag_boundary):
@@ -159,7 +150,7 @@ body > figure > img {
length=len(stub),
title=self._escape_mime(title),
stub=stub
- ).encode('utf-8'))
+ ).encode())
extra_state['header_written'] = True
for i, fragment in enumerate(fragments):
@@ -176,21 +167,13 @@ body > figure > img {
continue
frag_content = self._read_fragment(ctx)
- mime_type = b'image/jpeg'
- if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
- mime_type = b'image/png'
- if frag_content.startswith((b'GIF87a', b'GIF89a')):
- mime_type = b'image/gif'
- if frag_content.startswith(b'RIFF') and frag_content[8:12] == 'WEBP':
- mime_type = b'image/webp'
-
frag_header = io.BytesIO()
frag_header.write(
b'--%b\r\n' % frag_boundary.encode('us-ascii'))
frag_header.write(
b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii'))
frag_header.write(
- b'Content-type: %b\r\n' % mime_type)
+ b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode())
frag_header.write(
b'Content-length: %u\r\n' % len(frag_content))
frag_header.write(
@@ -203,5 +186,4 @@ body > figure > img {
ctx['dest_stream'].write(
b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii'))
- self._finish_frag_download(ctx, info_dict)
- return True
+ return self._finish_frag_download(ctx, info_dict)
diff --git a/hypervideo_dl/downloader/niconico.py b/hypervideo_dl/downloader/niconico.py
index 521dfec..77ed39e 100644
--- a/hypervideo_dl/downloader/niconico.py
+++ b/hypervideo_dl/downloader/niconico.py
@@ -1,22 +1,17 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
import threading
+from . import get_suitable_downloader
from .common import FileDownloader
-from ..downloader import get_suitable_downloader
-from ..extractor.niconico import NiconicoIE
from ..utils import sanitized_Request
class NiconicoDmcFD(FileDownloader):
""" Downloading niconico douga from DMC with heartbeat """
- FD_NAME = 'niconico_dmc'
-
def real_download(self, filename, info_dict):
- self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
+ from ..extractor.niconico import NiconicoIE
+ self.to_screen('[%s] Downloading from DMC' % self.FD_NAME)
ie = NiconicoIE(self.ydl)
info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict)
@@ -54,4 +49,4 @@ class NiconicoDmcFD(FileDownloader):
with heartbeat_lock:
timer[0].cancel()
download_complete = True
- return success
+ return success
diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py
index 90f1acf..0e09525 100644
--- a/hypervideo_dl/downloader/rtmp.py
+++ b/hypervideo_dl/downloader/rtmp.py
@@ -1,18 +1,15 @@
-from __future__ import unicode_literals
-
import os
import re
import subprocess
import time
from .common import FileDownloader
-from ..compat import compat_str
from ..utils import (
+ Popen,
check_executable,
- encodeFilename,
encodeArgument,
+ encodeFilename,
get_exe_version,
- Popen,
)
@@ -94,8 +91,7 @@ class RtmpFD(FileDownloader):
self.to_screen('')
return proc.wait()
except BaseException: # Including KeyboardInterrupt
- proc.kill()
- proc.wait()
+ proc.kill(timeout=None)
raise
url = info_dict['url']
@@ -146,7 +142,7 @@ class RtmpFD(FileDownloader):
if isinstance(conn, list):
for entry in conn:
basic_args += ['--conn', entry]
- elif isinstance(conn, compat_str):
+ elif isinstance(conn, str):
basic_args += ['--conn', conn]
if protocol is not None:
basic_args += ['--protocol', protocol]
diff --git a/hypervideo_dl/downloader/rtsp.py b/hypervideo_dl/downloader/rtsp.py
index 7815d59..e89269f 100644
--- a/hypervideo_dl/downloader/rtsp.py
+++ b/hypervideo_dl/downloader/rtsp.py
@@ -1,13 +1,8 @@
-from __future__ import unicode_literals
-
import os
import subprocess
from .common import FileDownloader
-from ..utils import (
- check_executable,
- encodeFilename,
-)
+from ..utils import check_executable, encodeFilename
class RtspFD(FileDownloader):
@@ -32,7 +27,7 @@ class RtspFD(FileDownloader):
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
- self.to_screen('\r[%s] %s bytes' % (args[0], fsize))
+ self.to_screen(f'\r[{args[0]}] {fsize} bytes')
self.try_rename(tmpfilename, filename)
self._hook_progress({
'downloaded_bytes': fsize,
diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py
index 58e2bce..6837ff1 100644
--- a/hypervideo_dl/downloader/websocket.py
+++ b/hypervideo_dl/downloader/websocket.py
@@ -1,19 +1,12 @@
+import asyncio
+import contextlib
import os
import signal
-import asyncio
import threading
-try:
- import websockets
-except (ImportError, SyntaxError):
- # websockets 3.10 on python 3.6 causes SyntaxError
- # See https://github.com/hypervideo/hypervideo/issues/2633
- has_websockets = False
-else:
- has_websockets = True
-
from .common import FileDownloader
from .external import FFmpegFD
+from ..dependencies import websockets
class FFmpegSinkFD(FileDownloader):
@@ -26,14 +19,12 @@ class FFmpegSinkFD(FileDownloader):
async def call_conn(proc, stdin):
try:
await self.real_connection(stdin, info_dict)
- except (BrokenPipeError, OSError):
+ except OSError:
pass
finally:
- try:
+ with contextlib.suppress(OSError):
stdin.flush()
stdin.close()
- except OSError:
- pass
os.kill(os.getpid(), signal.SIGINT)
class FFmpegStdinFD(FFmpegFD):
diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py
index dd21ac8..dfd290a 100644
--- a/hypervideo_dl/downloader/youtube_live_chat.py
+++ b/hypervideo_dl/downloader/youtube_live_chat.py
@@ -1,24 +1,20 @@
-from __future__ import division, unicode_literals
-
import json
import time
+import urllib.error
from .fragment import FragmentFD
-from ..compat import compat_urllib_error
from ..utils import (
- try_get,
+ RegexNotFoundError,
+ RetryManager,
dict_get,
int_or_none,
- RegexNotFoundError,
+ try_get,
)
-from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
class YoutubeLiveChatFD(FragmentFD):
""" Downloads YouTube live chats fragment by fragment """
- FD_NAME = 'youtube_live_chat'
-
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
@@ -26,7 +22,6 @@ class YoutubeLiveChatFD(FragmentFD):
self.report_warning('Live chat download runs until the livestream ends. '
'If you wish to download the video simultaneously, run a separate hypervideo instance')
- fragment_retries = self.params.get('fragment_retries', 0)
test = self.params.get('test', False)
ctx = {
@@ -35,7 +30,9 @@ class YoutubeLiveChatFD(FragmentFD):
'total_frags': None,
}
- ie = YT_BaseIE(self.ydl)
+ from ..extractor.youtube import YoutubeBaseInfoExtractor
+
+ ie = YoutubeBaseInfoExtractor(self.ydl)
start_time = int(time.time() * 1000)
@@ -54,7 +51,7 @@ class YoutubeLiveChatFD(FragmentFD):
replay_chat_item_action = action['replayChatItemAction']
offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
processed_fragment.extend(
- json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ json.dumps(action, ensure_ascii=False).encode() + b'\n')
if offset is not None:
continuation = try_get(
live_chat_continuation,
@@ -96,7 +93,7 @@ class YoutubeLiveChatFD(FragmentFD):
'isLive': True,
}
processed_fragment.extend(
- json.dumps(pseudo_action, ensure_ascii=False).encode('utf-8') + b'\n')
+ json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
continuation_data_getters = [
lambda x: x['continuations'][0]['invalidationContinuationData'],
lambda x: x['continuations'][0]['timedContinuationData'],
@@ -112,8 +109,7 @@ class YoutubeLiveChatFD(FragmentFD):
return continuation_id, live_offset, click_tracking_params
def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
- count = 0
- while count <= fragment_retries:
+ for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index):
try:
success = dl_fragment(url, request_data, headers)
if not success:
@@ -128,21 +124,15 @@ class YoutubeLiveChatFD(FragmentFD):
live_chat_continuation = try_get(
data,
lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
- if info_dict['protocol'] == 'youtube_live_chat_replay':
- if frag_index == 1:
- continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
- else:
- continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
- elif info_dict['protocol'] == 'youtube_live_chat':
- continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
- return True, continuation_id, offset, click_tracking_params
- except compat_urllib_error.HTTPError as err:
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- self.report_error('giving up after %s fragment retries' % fragment_retries)
- return False, None, None, None
+
+ func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live
+ or frag_index == 1 and try_refresh_replay_beginning
+ or parse_actions_replay)
+ return (True, *func(live_chat_continuation))
+ except urllib.error.HTTPError as err:
+ retry.error = err
+ continue
+ return False, None, None, None
self._prepare_and_start_frag_download(ctx, info_dict)
@@ -190,7 +180,7 @@ class YoutubeLiveChatFD(FragmentFD):
request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
headers.update({'content-type': 'application/json'})
- fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode('utf-8') + b'\n'
+ fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
url, frag_index, fragment_request_data, headers)
else:
@@ -201,8 +191,7 @@ class YoutubeLiveChatFD(FragmentFD):
if test:
break
- self._finish_frag_download(ctx, info_dict)
- return True
+ return self._finish_frag_download(ctx, info_dict)
@staticmethod
def parse_live_timestamp(action):