aboutsummaryrefslogtreecommitdiffstats
path: root/hypervideo_dl/downloader
diff options
context:
space:
mode:
Diffstat (limited to 'hypervideo_dl/downloader')
-rw-r--r--hypervideo_dl/downloader/__init__.py17
-rw-r--r--hypervideo_dl/downloader/common.py99
-rw-r--r--hypervideo_dl/downloader/dash.py68
-rw-r--r--hypervideo_dl/downloader/external.py102
-rw-r--r--hypervideo_dl/downloader/f4m.py2
-rw-r--r--hypervideo_dl/downloader/fc2.py41
-rw-r--r--hypervideo_dl/downloader/fragment.py171
-rw-r--r--hypervideo_dl/downloader/hls.py9
-rw-r--r--hypervideo_dl/downloader/http.py130
-rw-r--r--hypervideo_dl/downloader/ism.py4
-rw-r--r--hypervideo_dl/downloader/mhtml.py13
-rw-r--r--hypervideo_dl/downloader/rtmp.py3
-rw-r--r--hypervideo_dl/downloader/websocket.py7
-rw-r--r--hypervideo_dl/downloader/youtube_live_chat.py9
14 files changed, 447 insertions, 228 deletions
diff --git a/hypervideo_dl/downloader/__init__.py b/hypervideo_dl/downloader/__init__.py
index 2449c74..96d484d 100644
--- a/hypervideo_dl/downloader/__init__.py
+++ b/hypervideo_dl/downloader/__init__.py
@@ -12,10 +12,15 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
info_copy = info_dict.copy()
info_copy['to_stdout'] = to_stdout
- downloaders = [_get_suitable_downloader(info_copy, proto, params, default)
- for proto in (protocol or info_copy['protocol']).split('+')]
+ protocols = (protocol or info_copy['protocol']).split('+')
+ downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols]
+
if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params):
return FFmpegFD
+ elif (set(downloaders) == {DashSegmentsFD}
+ and not (to_stdout and len(protocols) > 1)
+ and set(protocols) == {'http_dash_segments_generator'}):
+ return DashSegmentsFD
elif len(downloaders) == 1:
return downloaders[0]
return None
@@ -25,6 +30,7 @@ def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=N
from .common import FileDownloader
from .dash import DashSegmentsFD
from .f4m import F4mFD
+from .fc2 import FC2LiveFD
from .hls import HlsFD
from .http import HttpFD
from .rtmp import RtmpFD
@@ -41,6 +47,7 @@ from .external import (
PROTOCOL_MAP = {
'rtmp': RtmpFD,
+ 'rtmpe': RtmpFD,
'rtmp_ffmpeg': FFmpegFD,
'm3u8_native': HlsFD,
'm3u8': FFmpegFD,
@@ -48,9 +55,11 @@ PROTOCOL_MAP = {
'rtsp': RtspFD,
'f4m': F4mFD,
'http_dash_segments': DashSegmentsFD,
+ 'http_dash_segments_generator': DashSegmentsFD,
'ism': IsmFD,
'mhtml': MhtmlFD,
'niconico_dmc': NiconicoDmcFD,
+ 'fc2_live': FC2LiveFD,
'websocket_frag': WebSocketFragmentFD,
'youtube_live_chat': YoutubeLiveChatFD,
'youtube_live_chat_replay': YoutubeLiveChatFD,
@@ -62,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False):
'm3u8_native': 'm3u8_n',
'rtmp_ffmpeg': 'rtmp_f',
'http_dash_segments': 'dash',
+ 'http_dash_segments_generator': 'dash_g',
'niconico_dmc': 'dmc',
'websocket_frag': 'WSfrag',
}
@@ -70,6 +80,7 @@ def shorten_protocol_name(proto, simplify=False):
'https': 'http',
'ftps': 'ftp',
'm3u8_native': 'm3u8',
+ 'http_dash_segments_generator': 'dash',
'rtmp_ffmpeg': 'rtmp',
'm3u8_frag_urls': 'm3u8',
'dash_frag_urls': 'dash',
@@ -108,7 +119,7 @@ def _get_suitable_downloader(info_dict, protocol, params, default):
return FFmpegFD
elif (external_downloader or '').lower() == 'native':
return HlsFD
- elif get_suitable_downloader(
+ elif protocol == 'm3u8_native' and get_suitable_downloader(
info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']):
return HlsFD
elif params.get('hls_prefer_native') is True:
diff --git a/hypervideo_dl/downloader/common.py b/hypervideo_dl/downloader/common.py
index 27ca2cd..7cef3e8 100644
--- a/hypervideo_dl/downloader/common.py
+++ b/hypervideo_dl/downloader/common.py
@@ -4,14 +4,17 @@ import os
import re
import time
import random
+import errno
from ..utils import (
decodeArgument,
encodeFilename,
error_to_compat_str,
format_bytes,
+ sanitize_open,
shell_quote,
timeconvert,
+ timetuple_from_msec,
)
from ..minicurses import (
MultilineLogger,
@@ -38,6 +41,7 @@ class FileDownloader(object):
ratelimit: Download speed limit, in bytes/sec.
throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
retries: Number of times to retry for HTTP error 5xx
+ file_access_retries: Number of times to retry on file access error
buffersize: Size of download buffer in bytes.
noresizebuffer: Do not automatically resize the download buffer.
continuedl: Try to continue downloads if possible.
@@ -75,14 +79,12 @@ class FileDownloader(object):
@staticmethod
def format_seconds(seconds):
- (mins, secs) = divmod(seconds, 60)
- (hours, mins) = divmod(mins, 60)
- if hours > 99:
+ time = timetuple_from_msec(seconds * 1000)
+ if time.hours > 99:
return '--:--:--'
- if hours == 0:
- return '%02d:%02d' % (mins, secs)
- else:
- return '%02d:%02d:%02d' % (hours, mins, secs)
+ if not time.hours:
+ return '%02d:%02d' % time[1:-1]
+ return '%02d:%02d:%02d' % time[:-1]
@staticmethod
def calc_percent(byte_counter, data_len):
@@ -94,6 +96,8 @@ class FileDownloader(object):
def format_percent(percent):
if percent is None:
return '---.-%'
+ elif percent == 100:
+ return '100%'
return '%6s' % ('%3.1f%%' % percent)
@staticmethod
@@ -155,7 +159,7 @@ class FileDownloader(object):
return int(round(number * multiplier))
def to_screen(self, *args, **kargs):
- self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
+ self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
def to_stderr(self, message):
self.ydl.to_stderr(message)
@@ -206,13 +210,41 @@ class FileDownloader(object):
def ytdl_filename(self, filename):
return filename + '.ytdl'
+ def wrap_file_access(action, *, fatal=False):
+ def outer(func):
+ def inner(self, *args, **kwargs):
+ file_access_retries = self.params.get('file_access_retries', 0)
+ retry = 0
+ while True:
+ try:
+ return func(self, *args, **kwargs)
+ except (IOError, OSError) as err:
+ retry = retry + 1
+ if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
+ if not fatal:
+ self.report_error(f'unable to {action} file: {err}')
+ return
+ raise
+ self.to_screen(
+ f'[download] Unable to {action} file due to file access error. '
+ f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
+ time.sleep(0.01)
+ return inner
+ return outer
+
+ @wrap_file_access('open', fatal=True)
+ def sanitize_open(self, filename, open_mode):
+ return sanitize_open(filename, open_mode)
+
+ @wrap_file_access('remove')
+ def try_remove(self, filename):
+ os.remove(filename)
+
+ @wrap_file_access('rename')
def try_rename(self, old_filename, new_filename):
if old_filename == new_filename:
return
- try:
- os.replace(old_filename, new_filename)
- except (IOError, OSError) as err:
- self.report_error(f'unable to rename file: {err}')
+ os.replace(old_filename, new_filename)
def try_utime(self, filename, last_modified_hdr):
"""Try to set the last-modified time of the given file."""
@@ -245,14 +277,32 @@ class FileDownloader(object):
elif self.ydl.params.get('logger'):
self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
elif self.params.get('progress_with_newline'):
- self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
+ self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
else:
- self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
+ self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
+ self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
def _finish_multiline_status(self):
self._multiline.end()
- def _report_progress_status(self, s):
+ _progress_styles = {
+ 'downloaded_bytes': 'light blue',
+ 'percent': 'light blue',
+ 'eta': 'yellow',
+ 'speed': 'green',
+ 'elapsed': 'bold white',
+ 'total_bytes': '',
+ 'total_bytes_estimate': '',
+ }
+
+ def _report_progress_status(self, s, default_template):
+ for name, style in self._progress_styles.items():
+ name = f'_{name}_str'
+ if name not in s:
+ continue
+ s[name] = self._format_progress(s[name], style)
+ s['_default_template'] = default_template % s
+
progress_dict = s.copy()
progress_dict.pop('info_dict')
progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
@@ -265,6 +315,10 @@ class FileDownloader(object):
progress_template.get('download-title') or 'hypervideo %(progress._default_template)s',
progress_dict))
+ def _format_progress(self, *args, **kwargs):
+ return self.ydl._format_text(
+ self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
+
def report_progress(self, s):
if s['status'] == 'finished':
if self.params.get('noprogress'):
@@ -277,8 +331,7 @@ class FileDownloader(object):
s['_elapsed_str'] = self.format_seconds(s['elapsed'])
msg_template += ' in %(_elapsed_str)s'
s['_percent_str'] = self.format_percent(100)
- s['_default_template'] = msg_template % s
- self._report_progress_status(s)
+ self._report_progress_status(s, msg_template)
return
if s['status'] != 'downloading':
@@ -287,7 +340,7 @@ class FileDownloader(object):
if s.get('eta') is not None:
s['_eta_str'] = self.format_eta(s['eta'])
else:
- s['_eta_str'] = 'Unknown ETA'
+ s['_eta_str'] = 'Unknown'
if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
@@ -319,9 +372,12 @@ class FileDownloader(object):
else:
msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
else:
- msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
- s['_default_template'] = msg_template % s
- self._report_progress_status(s)
+ msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ if s.get('fragment_index') and s.get('fragment_count'):
+ msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
+ elif s.get('fragment_index'):
+ msg_template += ' (frag %(fragment_index)s)'
+ self._report_progress_status(s, msg_template)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@@ -372,6 +428,7 @@ class FileDownloader(object):
'status': 'finished',
'total_bytes': os.path.getsize(encodeFilename(filename)),
}, info_dict)
+ self._finish_multiline_status()
return True, False
if subtitle is False:
diff --git a/hypervideo_dl/downloader/dash.py b/hypervideo_dl/downloader/dash.py
index 6444ad6..a845ee7 100644
--- a/hypervideo_dl/downloader/dash.py
+++ b/hypervideo_dl/downloader/dash.py
@@ -1,4 +1,5 @@
from __future__ import unicode_literals
+import time
from ..downloader import get_suitable_downloader
from .fragment import FragmentFD
@@ -15,27 +16,53 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
- if info_dict.get('is_live'):
+ if info_dict.get('is_live') and set(info_dict['protocol'].split('+')) != {'http_dash_segments_generator'}:
self.report_error('Live DASH videos are not supported')
- fragment_base_url = info_dict.get('fragment_base_url')
- fragments = info_dict['fragments'][:1] if self.params.get(
- 'test', False) else info_dict['fragments']
-
+ real_start = time.time()
real_downloader = get_suitable_downloader(
info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-'))
- ctx = {
- 'filename': filename,
- 'total_frags': len(fragments),
- }
+ requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])]
+ args = []
+ for fmt in requested_formats or [info_dict]:
+ try:
+ fragment_count = 1 if self.params.get('test') else len(fmt['fragments'])
+ except TypeError:
+ fragment_count = None
+ ctx = {
+ 'filename': fmt.get('filepath') or filename,
+ 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'),
+ 'total_frags': fragment_count,
+ }
+
+ if real_downloader:
+ self._prepare_external_frag_download(ctx)
+ else:
+ self._prepare_and_start_frag_download(ctx, fmt)
+ ctx['start'] = real_start
+
+ fragments_to_download = self._get_fragments(fmt, ctx)
+
+ if real_downloader:
+ self.to_screen(
+ '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
+ info_dict['fragments'] = list(fragments_to_download)
+ fd = real_downloader(self.ydl, self.params)
+ return fd.real_download(filename, info_dict)
+
+ args.append([ctx, fragments_to_download, fmt])
- if real_downloader:
- self._prepare_external_frag_download(ctx)
- else:
- self._prepare_and_start_frag_download(ctx, info_dict)
+ return self.download_and_append_fragments_multiple(*args)
+
+ def _resolve_fragments(self, fragments, ctx):
+ fragments = fragments(ctx) if callable(fragments) else fragments
+ return [next(iter(fragments))] if self.params.get('test') else fragments
+
+ def _get_fragments(self, fmt, ctx):
+ fragment_base_url = fmt.get('fragment_base_url')
+ fragments = self._resolve_fragments(fmt['fragments'], ctx)
- fragments_to_download = []
frag_index = 0
for i, fragment in enumerate(fragments):
frag_index += 1
@@ -46,17 +73,8 @@ class DashSegmentsFD(FragmentFD):
assert fragment_base_url
fragment_url = urljoin(fragment_base_url, fragment['path'])
- fragments_to_download.append({
+ yield {
'frag_index': frag_index,
'index': i,
'url': fragment_url,
- })
-
- if real_downloader:
- self.to_screen(
- '[%s] Fragment downloads will be delegated to %s' % (self.FD_NAME, real_downloader.get_basename()))
- info_dict['fragments'] = fragments_to_download
- fd = real_downloader(self.ydl, self.params)
- return fd.real_download(filename, info_dict)
-
- return self.download_and_append_fragments(ctx, fragments_to_download, info_dict)
+ }
diff --git a/hypervideo_dl/downloader/external.py b/hypervideo_dl/downloader/external.py
index 74adb05..b99dc37 100644
--- a/hypervideo_dl/downloader/external.py
+++ b/hypervideo_dl/downloader/external.py
@@ -13,17 +13,18 @@ from ..compat import (
)
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import (
+ classproperty,
cli_option,
cli_valueless_option,
cli_bool_option,
_configuration_args,
+ determine_ext,
encodeFilename,
encodeArgument,
handle_youtubedl_headers,
check_executable,
- is_outdated_version,
- process_communicate_or_kill,
- sanitize_open,
+ Popen,
+ remove_end,
)
@@ -73,17 +74,23 @@ class ExternalFD(FragmentFD):
def get_basename(cls):
return cls.__name__[:-2].lower()
+ @classproperty
+ def EXE_NAME(cls):
+ return cls.get_basename()
+
@property
def exe(self):
- return self.get_basename()
+ return self.EXE_NAME
@classmethod
def available(cls, path=None):
- path = check_executable(path or cls.get_basename(), [cls.AVAILABLE_OPT])
- if path:
- cls.exe = path
- return path
- return False
+ path = check_executable(
+ cls.EXE_NAME if path in (None, cls.get_basename()) else path,
+ [cls.AVAILABLE_OPT])
+ if not path:
+ return False
+ cls.exe = path
+ return path
@classmethod
def supports(cls, info_dict):
@@ -106,7 +113,7 @@ class ExternalFD(FragmentFD):
def _configuration_args(self, keys=None, *args, **kwargs):
return _configuration_args(
- self.get_basename(), self.params.get('external_downloader_args'), self.get_basename(),
+ self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME,
keys, *args, **kwargs)
def _call_downloader(self, tmpfilename, info_dict):
@@ -116,9 +123,8 @@ class ExternalFD(FragmentFD):
self._debug_cmd(cmd)
if 'fragments' not in info_dict:
- p = subprocess.Popen(
- cmd, stderr=subprocess.PIPE)
- _, stderr = process_communicate_or_kill(p)
+ p = Popen(cmd, stderr=subprocess.PIPE)
+ _, stderr = p.communicate_or_kill()
if p.returncode != 0:
self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode
@@ -128,9 +134,8 @@ class ExternalFD(FragmentFD):
count = 0
while count <= fragment_retries:
- p = subprocess.Popen(
- cmd, stderr=subprocess.PIPE)
- _, stderr = process_communicate_or_kill(p)
+ p = Popen(cmd, stderr=subprocess.PIPE)
+ _, stderr = p.communicate_or_kill()
if p.returncode == 0:
break
# TODO: Decide whether to retry based on error code
@@ -147,23 +152,23 @@ class ExternalFD(FragmentFD):
return -1
decrypt_fragment = self.decrypter(info_dict)
- dest, _ = sanitize_open(tmpfilename, 'wb')
+ dest, _ = self.sanitize_open(tmpfilename, 'wb')
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (tmpfilename, frag_index)
try:
- src, _ = sanitize_open(fragment_filename, 'rb')
- except IOError:
+ src, _ = self.sanitize_open(fragment_filename, 'rb')
+ except IOError as err:
if skip_unavailable_fragments and frag_index > 1:
- self.to_screen('[%s] Skipping fragment %d ...' % (self.get_basename(), frag_index))
+ self.report_skip_fragment(frag_index, err)
continue
- self.report_error('Unable to open fragment %d' % frag_index)
+ self.report_error(f'Unable to open fragment {frag_index}; {err}')
return -1
dest.write(decrypt_fragment(fragment, src.read()))
src.close()
if not self.params.get('keep_fragments', False):
- os.remove(encodeFilename(fragment_filename))
+ self.try_remove(encodeFilename(fragment_filename))
dest.close()
- os.remove(encodeFilename('%s.frag.urls' % tmpfilename))
+ self.try_remove(encodeFilename('%s.frag.urls' % tmpfilename))
return 0
@@ -171,7 +176,7 @@ class CurlFD(ExternalFD):
AVAILABLE_OPT = '-V'
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '--location', '-o', tmpfilename]
+ cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
@@ -199,8 +204,8 @@ class CurlFD(ExternalFD):
self._debug_cmd(cmd)
# curl writes the progress to stderr so don't capture it.
- p = subprocess.Popen(cmd)
- process_communicate_or_kill(p)
+ p = Popen(cmd)
+ p.communicate_or_kill()
return p.returncode
@@ -221,7 +226,7 @@ class WgetFD(ExternalFD):
AVAILABLE_OPT = '--version'
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
+ cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies', '--compression=auto']
if info_dict.get('http_headers') is not None:
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
@@ -232,7 +237,10 @@ class WgetFD(ExternalFD):
retry[1] = '0'
cmd += retry
cmd += self._option('--bind-address', 'source_address')
- cmd += self._option('--proxy', 'proxy')
+ proxy = self.params.get('proxy')
+ if proxy:
+ for var in ('http_proxy', 'https_proxy'):
+ cmd += ['--execute', '%s=%s' % (var, proxy)]
cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
@@ -255,7 +263,7 @@ class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
cmd = [self.exe, '-c',
'--console-log-level=warn', '--summary-interval=0', '--download-result=hide',
- '--file-allocation=none', '-x16', '-j16', '-s16']
+ '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16']
if 'fragments' in info_dict:
cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true']
else:
@@ -269,6 +277,7 @@ class Aria2cFD(ExternalFD):
cmd += self._option('--all-proxy', 'proxy')
cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
+ cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=')
cmd += self._configuration_args()
# aria2c strips out spaces from the beginning/end of filenames and paths.
@@ -293,7 +302,7 @@ class Aria2cFD(ExternalFD):
for frag_index, fragment in enumerate(info_dict['fragments']):
fragment_filename = '%s-Frag%d' % (os.path.basename(tmpfilename), frag_index)
url_list.append('%s\n\tout=%s' % (fragment['url'], fragment_filename))
- stream, _ = sanitize_open(url_list_file, 'wb')
+ stream, _ = self.sanitize_open(url_list_file, 'wb')
stream.write('\n'.join(url_list).encode('utf-8'))
stream.close()
cmd += ['-i', url_list_file]
@@ -304,10 +313,7 @@ class Aria2cFD(ExternalFD):
class HttpieFD(ExternalFD):
AVAILABLE_OPT = '--version'
-
- @classmethod
- def available(cls, path=None):
- return ExternalFD.available(cls, path or 'http')
+ EXE_NAME = 'http'
def _make_cmd(self, tmpfilename, info_dict):
cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
@@ -446,8 +452,7 @@ class FFmpegFD(ExternalFD):
if info_dict.get('requested_formats') or protocol == 'http_dash_segments':
for (i, fmt) in enumerate(info_dict.get('requested_formats') or [info_dict]):
stream_number = fmt.get('manifest_stream_number', 0)
- a_or_v = 'a' if fmt.get('acodec') != 'none' else 'v'
- args.extend(['-map', f'{i}:{a_or_v}:{stream_number}'])
+ args.extend(['-map', f'{i}:{stream_number}'])
if self.params.get('test', False):
args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
@@ -461,12 +466,21 @@ class FFmpegFD(ExternalFD):
args += ['-f', 'mpegts']
else:
args += ['-f', 'mp4']
- if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
+ if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
args += ['-bsf:a', 'aac_adtstoasc']
elif protocol == 'rtmp':
args += ['-f', 'flv']
elif ext == 'mp4' and tmpfilename == '-':
args += ['-f', 'mpegts']
+ elif ext == 'unknown_video':
+ ext = determine_ext(remove_end(tmpfilename, '.part'))
+ if ext == 'unknown_video':
+ self.report_warning(
+ 'The video format is unknown and cannot be downloaded by ffmpeg. '
+ 'Explicitly set the extension in the filename to attempt download in that format')
+ else:
+ self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename')
+ args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
else:
args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)]
@@ -476,7 +490,7 @@ class FFmpegFD(ExternalFD):
args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
self._debug_cmd(args)
- proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
+ proc = Popen(args, stdin=subprocess.PIPE, env=env)
if url in ('-', 'pipe:'):
self.on_process_started(proc, proc.stdin)
try:
@@ -488,7 +502,7 @@ class FFmpegFD(ExternalFD):
# streams). Note that Windows is not affected and produces playable
# files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and url not in ('-', 'pipe:'):
- process_communicate_or_kill(proc, b'q')
+ proc.communicate_or_kill(b'q')
else:
proc.kill()
proc.wait()
@@ -500,11 +514,13 @@ class AVconvFD(FFmpegFD):
pass
-_BY_NAME = dict(
- (klass.get_basename(), klass)
+_BY_NAME = {
+ klass.get_basename(): klass
for name, klass in globals().items()
if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD')
-)
+}
+
+_BY_EXE = {klass.EXE_NAME: klass for klass in _BY_NAME.values()}
def list_external_downloaders():
@@ -516,4 +532,4 @@ def get_external_downloader(external_downloader):
downloader . """
# Drop .exe extension on Windows
bn = os.path.splitext(os.path.basename(external_downloader))[0]
- return _BY_NAME.get(bn)
+ return _BY_NAME.get(bn, _BY_EXE.get(bn))
diff --git a/hypervideo_dl/downloader/f4m.py b/hypervideo_dl/downloader/f4m.py
index 9da2776..0008b7c 100644
--- a/hypervideo_dl/downloader/f4m.py
+++ b/hypervideo_dl/downloader/f4m.py
@@ -366,7 +366,7 @@ class F4mFD(FragmentFD):
ctx = {
'filename': filename,
'total_frags': total_frags,
- 'live': live,
+ 'live': bool(live),
}
self._prepare_frag_download(ctx)
diff --git a/hypervideo_dl/downloader/fc2.py b/hypervideo_dl/downloader/fc2.py
new file mode 100644
index 0000000..157bcf2
--- /dev/null
+++ b/hypervideo_dl/downloader/fc2.py
@@ -0,0 +1,41 @@
+from __future__ import division, unicode_literals
+
+import threading
+
+from .common import FileDownloader
+from .external import FFmpegFD
+
+
+class FC2LiveFD(FileDownloader):
+ """
+ Downloads FC2 live without being stopped. <br>
+ Note, this is not a part of public API, and will be removed without notice.
+ DO NOT USE
+ """
+
+ def real_download(self, filename, info_dict):
+ ws = info_dict['ws']
+
+ heartbeat_lock = threading.Lock()
+ heartbeat_state = [None, 1]
+
+ def heartbeat():
+ try:
+ heartbeat_state[1] += 1
+ ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1])
+ except Exception:
+ self.to_screen('[fc2:live] Heartbeat failed')
+
+ with heartbeat_lock:
+ heartbeat_state[0] = threading.Timer(30, heartbeat)
+ heartbeat_state[0]._daemonic = True
+ heartbeat_state[0].start()
+
+ heartbeat()
+
+ new_info_dict = info_dict.copy()
+ new_info_dict.update({
+ 'ws': None,
+ 'protocol': 'live_ffmpeg',
+ })
+ return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict)
diff --git a/hypervideo_dl/downloader/fragment.py b/hypervideo_dl/downloader/fragment.py
index 57068db..a991c6d 100644
--- a/hypervideo_dl/downloader/fragment.py
+++ b/hypervideo_dl/downloader/fragment.py
@@ -1,9 +1,10 @@
from __future__ import division, unicode_literals
+import http.client
+import json
+import math
import os
import time
-import json
-from math import ceil
try:
import concurrent.futures
@@ -13,8 +14,9 @@ except ImportError:
from .common import FileDownloader
from .http import HttpFD
-from ..aes import aes_cbc_decrypt_bytes
+from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7
from ..compat import (
+ compat_os_name,
compat_urllib_error,
compat_struct_pack,
)
@@ -22,8 +24,8 @@ from ..utils import (
DownloadError,
error_to_compat_str,
encodeFilename,
- sanitize_open,
sanitized_Request,
+ traverse_obj,
)
@@ -31,6 +33,10 @@ class HttpQuietDownloader(HttpFD):
def to_screen(self, *args, **kargs):
pass
+ def report_retry(self, err, count, retries):
+ super().to_screen(
+ f'[download] Got server HTTP error: {err}. Retrying (attempt {count} of {self.format_retries(retries)}) ...')
+
class FragmentFD(FileDownloader):
"""
@@ -44,6 +50,7 @@ class FragmentFD(FileDownloader):
Skip unavailable fragments (DASH and hlsnative only)
keep_fragments: Keep downloaded fragments on disk after downloading is
finished
+ concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads
_no_ytdl_file: Don't use .ytdl file
For each incomplete fragment download hypervideo keeps on disk a special
@@ -72,8 +79,9 @@ class FragmentFD(FileDownloader):
'\r[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s) ...'
% (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
- def report_skip_fragment(self, frag_index):
- self.to_screen('[download] Skipping fragment %d ...' % frag_index)
+ def report_skip_fragment(self, frag_index, err=None):
+ err = f' {err};' if err else ''
+ self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...')
def _prepare_url(self, info_dict, url):
headers = info_dict.get('http_headers')
@@ -84,11 +92,11 @@ class FragmentFD(FileDownloader):
self._start_frag_download(ctx, info_dict)
def __do_ytdl_file(self, ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-' and not self.params.get('_no_ytdl_file')
+ return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file')
def _read_ytdl_file(self, ctx):
assert 'ytdl_corrupt' not in ctx
- stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+ stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
try:
ytdl_data = json.loads(stream.read())
ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index']
@@ -100,7 +108,7 @@ class FragmentFD(FileDownloader):
stream.close()
def _write_ytdl_file(self, ctx):
- frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+ frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
try:
downloader = {
'current_fragment': {
@@ -125,14 +133,19 @@ class FragmentFD(FileDownloader):
}
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success:
- return False, None
+ return False
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
ctx['fragment_filename_sanitized'] = fragment_filename
- return True, self._read_fragment(ctx)
+ return True
def _read_fragment(self, ctx):
- down, frag_sanitized = sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+ try:
+ down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb')
+ except FileNotFoundError:
+ if ctx.get('live'):
+ return None
+ raise
ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read()
down.close()
@@ -146,7 +159,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx):
self._write_ytdl_file(ctx)
if not self.params.get('keep_fragments', False):
- os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
+ self.try_remove(encodeFilename(ctx['fragment_filename_sanitized']))
del ctx['fragment_filename_sanitized']
def _prepare_frag_download(self, ctx):
@@ -165,8 +178,8 @@ class FragmentFD(FileDownloader):
dl = HttpQuietDownloader(
self.ydl,
{
- 'continuedl': True,
- 'quiet': True,
+ 'continuedl': self.params.get('continuedl', True),
+ 'quiet': self.params.get('quiet'),
'noprogress': True,
'ratelimit': self.params.get('ratelimit'),
'retries': self.params.get('retries', 0),
@@ -208,7 +221,7 @@ class FragmentFD(FileDownloader):
self._write_ytdl_file(ctx)
assert ctx['fragment_index'] == 0
- dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+ dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode)
ctx.update({
'dl': dl,
@@ -236,6 +249,7 @@ class FragmentFD(FileDownloader):
start = time.time()
ctx.update({
'started': start,
+ 'fragment_started': start,
# Amount of fragment's bytes downloaded by the time of the previous
# frag progress hook invocation
'prev_frag_downloaded_bytes': 0,
@@ -266,6 +280,9 @@ class FragmentFD(FileDownloader):
ctx['fragment_index'] = state['fragment_index']
state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+ ctx['speed'] = state['speed'] = self.calc_speed(
+ ctx['fragment_started'], time_now, frag_total_bytes)
+ ctx['fragment_started'] = time.time()
ctx['prev_frag_downloaded_bytes'] = 0
else:
frag_downloaded_bytes = s['downloaded_bytes']
@@ -274,8 +291,8 @@ class FragmentFD(FileDownloader):
state['eta'] = self.calc_eta(
start, time_now, estimated_size - resume_len,
state['downloaded_bytes'] - resume_len)
- state['speed'] = s.get('speed') or ctx.get('speed')
- ctx['speed'] = state['speed']
+ ctx['speed'] = state['speed'] = self.calc_speed(
+ ctx['fragment_started'], time_now, frag_downloaded_bytes)
ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
self._hook_progress(state, info_dict)
@@ -288,7 +305,7 @@ class FragmentFD(FileDownloader):
if self.__do_ytdl_file(ctx):
ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
if os.path.isfile(ytdl_filename):
- os.remove(ytdl_filename)
+ self.try_remove(ytdl_filename)
elapsed = time.time() - ctx['started']
if ctx['tmpfilename'] == '-':
@@ -355,9 +372,7 @@ class FragmentFD(FileDownloader):
# not what it decrypts to.
if self.params.get('test', False):
return frag_content
- padding_len = 16 - (len(frag_content) % 16)
- decrypted_data = aes_cbc_decrypt_bytes(frag_content + bytes([padding_len] * padding_len), decrypt_info['KEY'], iv)
- return decrypted_data[:-decrypted_data[-1]]
+ return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv))
return decrypt_fragment
@@ -366,64 +381,105 @@ class FragmentFD(FileDownloader):
@params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ...
all args must be either tuple or list
'''
+ interrupt_trigger = [True]
max_progress = len(args)
if max_progress == 1:
return self.download_and_append_fragments(*args[0], pack_func=pack_func, finish_func=finish_func)
- max_workers = self.params.get('concurrent_fragment_downloads', max_progress)
- self._prepare_multiline_status(max_progress)
+ max_workers = self.params.get('concurrent_fragment_downloads', 1)
+ if max_progress > 1:
+ self._prepare_multiline_status(max_progress)
+ is_live = any(traverse_obj(args, (..., 2, 'is_live'), default=[]))
def thread_func(idx, ctx, fragments, info_dict, tpe):
ctx['max_progress'] = max_progress
ctx['progress_idx'] = idx
- return self.download_and_append_fragments(ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func, tpe=tpe)
+ return self.download_and_append_fragments(
+ ctx, fragments, info_dict, pack_func=pack_func, finish_func=finish_func,
+ tpe=tpe, interrupt_trigger=interrupt_trigger)
class FTPE(concurrent.futures.ThreadPoolExecutor):
# has to stop this or it's going to wait on the worker thread itself
def __exit__(self, exc_type, exc_val, exc_tb):
pass
+ if compat_os_name == 'nt':
+ def future_result(future):
+ while True:
+ try:
+ return future.result(0.1)
+ except KeyboardInterrupt:
+ raise
+ except concurrent.futures.TimeoutError:
+ continue
+ else:
+ def future_result(future):
+ return future.result()
+
+ def interrupt_trigger_iter(fg):
+ for f in fg:
+ if not interrupt_trigger[0]:
+ break
+ yield f
+
spins = []
for idx, (ctx, fragments, info_dict) in enumerate(args):
- tpe = FTPE(ceil(max_workers / max_progress))
- job = tpe.submit(thread_func, idx, ctx, fragments, info_dict, tpe)
+ tpe = FTPE(math.ceil(max_workers / max_progress))
+ job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe)
spins.append((tpe, job))
result = True
for tpe, job in spins:
try:
- result = result and job.result()
+ result = result and future_result(job)
+ except KeyboardInterrupt:
+ interrupt_trigger[0] = False
finally:
tpe.shutdown(wait=True)
+ if not interrupt_trigger[0] and not is_live:
+ raise KeyboardInterrupt()
+ # we expect the user wants to stop and DO WANT the preceding postprocessors to run;
+ # so returning a intermediate result here instead of KeyboardInterrupt on live
return result
- def download_and_append_fragments(self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None, tpe=None):
+ def download_and_append_fragments(
+ self, ctx, fragments, info_dict, *, pack_func=None, finish_func=None,
+ tpe=None, interrupt_trigger=None):
+ if not interrupt_trigger:
+ interrupt_trigger = (True, )
+
fragment_retries = self.params.get('fragment_retries', 0)
- is_fatal = (lambda idx: idx == 0) if self.params.get('skip_unavailable_fragments', True) else (lambda _: True)
+ is_fatal = (
+ ((lambda _: False) if info_dict.get('is_live') else (lambda idx: idx == 0))
+ if self.params.get('skip_unavailable_fragments', True) else (lambda _: True))
+
if not pack_func:
pack_func = lambda frag_content, _: frag_content
def download_fragment(fragment, ctx):
+ if not interrupt_trigger[0]:
+ return
+
frag_index = ctx['fragment_index'] = fragment['frag_index']
+ ctx['last_error'] = None
headers = info_dict.get('http_headers', {}).copy()
byte_range = fragment.get('byte_range')
if byte_range:
headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1)
# Never skip the first fragment
- fatal = is_fatal(fragment.get('index') or (frag_index - 1))
- count, frag_content = 0, None
+ fatal, count = is_fatal(fragment.get('index') or (frag_index - 1)), 0
while count <= fragment_retries:
try:
- success, frag_content = self._download_fragment(ctx, fragment['url'], info_dict, headers)
- if not success:
- return False, frag_index
- break
- except compat_urllib_error.HTTPError as err:
+ if self._download_fragment(ctx, fragment['url'], info_dict, headers):
+ break
+ return
+ except (compat_urllib_error.HTTPError, http.client.IncompleteRead) as err:
# Unavailable (possibly temporary) fragments may be served.
# First we try to retry then either skip or abort.
# See https://github.com/ytdl-org/youtube-dl/issues/10165,
# https://github.com/ytdl-org/youtube-dl/issues/10448).
count += 1
+ ctx['last_error'] = err
if count <= fragment_retries:
self.report_retry_fragment(err, frag_index, count, fragment_retries)
except DownloadError:
@@ -433,49 +489,46 @@ class FragmentFD(FileDownloader):
break
raise
- if count > fragment_retries:
- if not fatal:
- return False, frag_index
+ if count > fragment_retries and fatal:
ctx['dest_stream'].close()
self.report_error('Giving up after %s fragment retries' % fragment_retries)
- return False, frag_index
- return frag_content, frag_index
def append_fragment(frag_content, frag_index, ctx):
- if not frag_content:
- if not is_fatal(frag_index - 1):
- self.report_skip_fragment(frag_index)
- return True
- else:
- ctx['dest_stream'].close()
- self.report_error(
- 'fragment %s not found, unable to continue' % frag_index)
- return False
- self._append_fragment(ctx, pack_func(frag_content, frag_index))
+ if frag_content:
+ self._append_fragment(ctx, pack_func(frag_content, frag_index))
+ elif not is_fatal(frag_index - 1):
+ self.report_skip_fragment(frag_index, 'fragment not found')
+ else:
+ ctx['dest_stream'].close()
+ self.report_error(f'fragment {frag_index} not found, unable to continue')
+ return False
return True
decrypt_fragment = self.decrypter(info_dict)
- max_workers = self.params.get('concurrent_fragment_downloads', 1)
+ max_workers = math.ceil(
+ self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1))
if can_threaded_download and max_workers > 1:
def _download_fragment(fragment):
ctx_copy = ctx.copy()
- frag_content, frag_index = download_fragment(fragment, ctx_copy)
- return fragment, frag_content, frag_index, ctx_copy.get('fragment_filename_sanitized')
+ download_fragment(fragment, ctx_copy)
+ return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized')
self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome')
with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool:
- for fragment, frag_content, frag_index, frag_filename in pool.map(_download_fragment, fragments):
+ for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments):
ctx['fragment_filename_sanitized'] = frag_filename
ctx['fragment_index'] = frag_index
- result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx)
if not result:
return False
else:
for fragment in fragments:
- frag_content, frag_index = download_fragment(fragment, ctx)
- result = append_fragment(decrypt_fragment(fragment, frag_content), frag_index, ctx)
+ if not interrupt_trigger[0]:
+ break
+ download_fragment(fragment, ctx)
+ result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx)
if not result:
return False
diff --git a/hypervideo_dl/downloader/hls.py b/hypervideo_dl/downloader/hls.py
index ef8a81b..f3f32b5 100644
--- a/hypervideo_dl/downloader/hls.py
+++ b/hypervideo_dl/downloader/hls.py
@@ -77,6 +77,15 @@ class HlsFD(FragmentFD):
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodome are available; '
'Decryption will be performed natively, but will be extremely slow')
if not can_download:
+ has_drm = re.search('|'.join([
+ r'#EXT-X-FAXS-CM:', # Adobe Flash Access
+ r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay
+ ]), s)
+ if has_drm and not self.params.get('allow_unplayable_formats'):
+ self.report_error(
+ 'This video is DRM protected; Try selecting another format with --format or '
+ 'add --check-formats to automatically fallback to the next best format')
+ return False
message = message or 'Unsupported features have been detected'
fd = FFmpegFD(self.ydl, self.params)
self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}')
diff --git a/hypervideo_dl/downloader/http.py b/hypervideo_dl/downloader/http.py
index 2e95bb9..591a9b0 100644
--- a/hypervideo_dl/downloader/http.py
+++ b/hypervideo_dl/downloader/http.py
@@ -1,29 +1,30 @@
from __future__ import unicode_literals
-import errno
import os
-import socket
+import ssl
import time
import random
-import re
from .common import FileDownloader
from ..compat import (
- compat_str,
compat_urllib_error,
+ compat_http_client
)
from ..utils import (
ContentTooShortError,
encodeFilename,
int_or_none,
- sanitize_open,
+ parse_http_range,
sanitized_Request,
ThrottledDownload,
+ try_call,
write_xattr,
XAttrMetadataError,
XAttrUnavailableError,
)
+RESPONSE_READ_EXCEPTIONS = (TimeoutError, ConnectionError, ssl.SSLError, compat_http_client.HTTPException)
+
class HttpFD(FileDownloader):
def real_download(self, filename, info_dict):
@@ -54,11 +55,11 @@ class HttpFD(FileDownloader):
ctx.open_mode = 'wb'
ctx.resume_len = 0
- ctx.data_len = None
ctx.block_size = self.params.get('buffersize', 1024)
ctx.start_time = time.time()
- ctx.chunk_size = None
- throttle_start = None
+
+ # parse given Range
+ req_start, req_end, _ = parse_http_range(headers.get('Range'))
if self.params.get('continuedl', True):
# Establish possible resume length
@@ -81,43 +82,50 @@ class HttpFD(FileDownloader):
class NextFragment(Exception):
pass
- def set_range(req, start, end):
- range_header = 'bytes=%d-' % start
- if end:
- range_header += compat_str(end)
- req.add_header('Range', range_header)
-
def establish_connection():
ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
if not is_test and chunk_size else chunk_size)
if ctx.resume_len > 0:
range_start = ctx.resume_len
+ if req_start is not None:
+ # offset the beginning of Range to be within request
+ range_start += req_start
if ctx.is_resume:
self.report_resuming_byte(ctx.resume_len)
ctx.open_mode = 'ab'
+ elif req_start is not None:
+ range_start = req_start
elif ctx.chunk_size > 0:
range_start = 0
else:
range_start = None
ctx.is_resume = False
- range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
- if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
- range_end = ctx.data_len - 1
- has_range = range_start is not None
- ctx.has_range = has_range
+
+ if ctx.chunk_size:
+ chunk_aware_end = range_start + ctx.chunk_size - 1
+ # we're not allowed to download outside Range
+ range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end)
+ elif req_end is not None:
+ # there's no need for chunked downloads, so download until the end of Range
+ range_end = req_end
+ else:
+ range_end = None
+
+ if try_call(lambda: range_start > range_end):
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})'))
+
+ if try_call(lambda: range_end >= ctx.content_len):
+ range_end = ctx.content_len - 1
+
request = sanitized_Request(url, request_data, headers)
+ has_range = range_start is not None
if has_range:
- set_range(request, range_start, range_end)
+ request.add_header('Range', f'bytes={int(range_start)}-{int_or_none(range_end) or ""}')
# Establish connection
try:
- try:
- ctx.data = self.ydl.urlopen(request)
- except (compat_urllib_error.URLError, ) as err:
- # reason may not be available, e.g. for urllib2.HTTPError on python 2.6
- reason = getattr(err, 'reason', None)
- if isinstance(reason, socket.timeout):
- raise RetryDownload(err)
- raise err
+ ctx.data = self.ydl.urlopen(request)
# When trying to resume, Content-Range HTTP header of response has to be checked
# to match the value of requested Range HTTP header. This is due to a webservers
# that don't support resuming and serve a whole file with no Content-Range
@@ -125,31 +133,27 @@ class HttpFD(FileDownloader):
# https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
if has_range:
content_range = ctx.data.headers.get('Content-Range')
- if content_range:
- content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
+ content_range_start, content_range_end, content_len = parse_http_range(content_range)
+ if content_range_start is not None and range_start == content_range_start:
# Content-Range is present and matches requested Range, resume is possible
- if content_range_m:
- if range_start == int(content_range_m.group(1)):
- content_range_end = int_or_none(content_range_m.group(2))
- content_len = int_or_none(content_range_m.group(3))
- accept_content_len = (
- # Non-chunked download
- not ctx.chunk_size
- # Chunked download and requested piece or
- # its part is promised to be served
- or content_range_end == range_end
- or content_len < range_end)
- if accept_content_len:
- ctx.data_len = content_len
- return
+ accept_content_len = (
+ # Non-chunked download
+ not ctx.chunk_size
+ # Chunked download and requested piece or
+ # its part is promised to be served
+ or content_range_end == range_end
+ or content_len < range_end)
+ if accept_content_len:
+ ctx.content_len = content_len
+ ctx.data_len = min(content_len, req_end or content_len) - (req_start or 0)
+ return
# Content-Range is either not present or invalid. Assuming remote webserver is
# trying to send the whole file, resume is not possible, so wiping the local file
# and performing entire redownload
self.report_unable_to_resume()
ctx.resume_len = 0
ctx.open_mode = 'wb'
- ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
- return
+ ctx.data_len = ctx.content_len = int_or_none(ctx.data.info().get('Content-length', None))
except (compat_urllib_error.HTTPError, ) as err:
if err.code == 416:
# Unable to resume (requested range not satisfiable)
@@ -191,14 +195,16 @@ class HttpFD(FileDownloader):
# Unexpected HTTP error
raise
raise RetryDownload(err)
- except socket.error as err:
- if err.errno != errno.ECONNRESET:
- # Connection reset is no problem, just retry
+ except compat_urllib_error.URLError as err:
+ if isinstance(err.reason, ssl.CertificateError):
raise
raise RetryDownload(err)
+ # In urllib.request.AbstractHTTPHandler, the response is partially read on request.
+ # Any errors that occur during this will not be wrapped by URLError
+ except RESPONSE_READ_EXCEPTIONS as err:
+ raise RetryDownload(err)
def download():
- nonlocal throttle_start
data_len = ctx.data.info().get('Content-length', None)
# Range HTTP header may be ignored/unsupported by a webserver
@@ -241,16 +247,8 @@ class HttpFD(FileDownloader):
try:
# Download and write
data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- # socket.timeout is a subclass of socket.error but may not have
- # errno set
- except socket.timeout as e:
- retry(e)
- except socket.error as e:
- # SSLError on python 2 (inherits socket.error) may have
- # no errno set but this error message
- if e.errno in (errno.ECONNRESET, errno.ETIMEDOUT) or getattr(e, 'message', None) == 'The read operation timed out':
- retry(e)
- raise
+ except RESPONSE_READ_EXCEPTIONS as err:
+ retry(err)
byte_counter += len(data_block)
@@ -261,7 +259,7 @@ class HttpFD(FileDownloader):
# Open destination file just in time
if ctx.stream is None:
try:
- ctx.stream, ctx.tmpfilename = sanitize_open(
+ ctx.stream, ctx.tmpfilename = self.sanitize_open(
ctx.tmpfilename, ctx.open_mode)
assert ctx.stream is not None
ctx.filename = self.undo_temp_name(ctx.tmpfilename)
@@ -321,16 +319,16 @@ class HttpFD(FileDownloader):
if speed and speed < (self.params.get('throttledratelimit') or 0):
# The speed must stay below the limit for 3 seconds
# This prevents raising error when the speed temporarily goes down
- if throttle_start is None:
- throttle_start = now
- elif now - throttle_start > 3:
+ if ctx.throttle_start is None:
+ ctx.throttle_start = now
+ elif now - ctx.throttle_start > 3:
if ctx.stream is not None and ctx.tmpfilename != '-':
ctx.stream.close()
raise ThrottledDownload()
elif speed:
- throttle_start = None
+ ctx.throttle_start = None
- if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+ if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len:
ctx.resume_len = byte_counter
# ctx.block_size = block_size
raise NextFragment()
diff --git a/hypervideo_dl/downloader/ism.py b/hypervideo_dl/downloader/ism.py
index 09516ab..4d5618c 100644
--- a/hypervideo_dl/downloader/ism.py
+++ b/hypervideo_dl/downloader/ism.py
@@ -263,9 +263,11 @@ class IsmFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
- success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+ success = self._download_fragment(ctx, segment['url'], info_dict)
if not success:
return False
+ frag_content = self._read_fragment(ctx)
+
if not extra_state['ism_track_written']:
tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
diff --git a/hypervideo_dl/downloader/mhtml.py b/hypervideo_dl/downloader/mhtml.py
index f0f4dc6..c8332c0 100644
--- a/hypervideo_dl/downloader/mhtml.py
+++ b/hypervideo_dl/downloader/mhtml.py
@@ -114,8 +114,8 @@ body > figure > img {
fragment_base_url = info_dict.get('fragment_base_url')
fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
- title = info_dict['title']
- origin = info_dict['webpage_url']
+ title = info_dict.get('title', info_dict['format_id'])
+ origin = info_dict.get('webpage_url', info_dict['url'])
ctx = {
'filename': filename,
@@ -166,10 +166,15 @@ body > figure > img {
if (i + 1) <= ctx['fragment_index']:
continue
- fragment_url = urljoin(fragment_base_url, fragment['path'])
- success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+
+ success = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
continue
+ frag_content = self._read_fragment(ctx)
mime_type = b'image/jpeg'
if frag_content.startswith(b'\x89PNG\r\n\x1a\n'):
diff --git a/hypervideo_dl/downloader/rtmp.py b/hypervideo_dl/downloader/rtmp.py
index 6dca647..90f1acf 100644
--- a/hypervideo_dl/downloader/rtmp.py
+++ b/hypervideo_dl/downloader/rtmp.py
@@ -12,6 +12,7 @@ from ..utils import (
encodeFilename,
encodeArgument,
get_exe_version,
+ Popen,
)
@@ -26,7 +27,7 @@ class RtmpFD(FileDownloader):
start = time.time()
resume_percent = None
resume_downloaded_data_len = None
- proc = subprocess.Popen(args, stderr=subprocess.PIPE)
+ proc = Popen(args, stderr=subprocess.PIPE)
cursor_in_new_line = True
proc_stderr_closed = False
try:
diff --git a/hypervideo_dl/downloader/websocket.py b/hypervideo_dl/downloader/websocket.py
index 0882220..58e2bce 100644
--- a/hypervideo_dl/downloader/websocket.py
+++ b/hypervideo_dl/downloader/websocket.py
@@ -5,9 +5,12 @@ import threading
try:
import websockets
- has_websockets = True
-except ImportError:
+except (ImportError, SyntaxError):
+ # websockets 3.10 on python 3.6 causes SyntaxError
+ # See https://github.com/hypervideo/hypervideo/issues/2633
has_websockets = False
+else:
+ has_websockets = True
from .common import FileDownloader
from .external import FFmpegFD
diff --git a/hypervideo_dl/downloader/youtube_live_chat.py b/hypervideo_dl/downloader/youtube_live_chat.py
index ef4205e..dd21ac8 100644
--- a/hypervideo_dl/downloader/youtube_live_chat.py
+++ b/hypervideo_dl/downloader/youtube_live_chat.py
@@ -22,6 +22,9 @@ class YoutubeLiveChatFD(FragmentFD):
def real_download(self, filename, info_dict):
video_id = info_dict['video_id']
self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+ if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
+ self.report_warning('Live chat download runs until the livestream ends. '
+ 'If you wish to download the video simultaneously, run a separate hypervideo instance')
fragment_retries = self.params.get('fragment_retries', 0)
test = self.params.get('test', False)
@@ -112,9 +115,10 @@ class YoutubeLiveChatFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
- success, raw_fragment = dl_fragment(url, request_data, headers)
+ success = dl_fragment(url, request_data, headers)
if not success:
return False, None, None, None
+ raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError:
@@ -142,9 +146,10 @@ class YoutubeLiveChatFD(FragmentFD):
self._prepare_and_start_frag_download(ctx, info_dict)
- success, raw_fragment = dl_fragment(info_dict['url'])
+ success = dl_fragment(info_dict['url'])
if not success:
return False
+ raw_fragment = self._read_fragment(ctx)
try:
data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
except RegexNotFoundError: