aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md14
-rw-r--r--yt_dlp/YoutubeDL.py6
-rw-r--r--yt_dlp/__init__.py55
-rw-r--r--yt_dlp/options.py5
-rw-r--r--yt_dlp/utils/_utils.py22
5 files changed, 72 insertions, 30 deletions
diff --git a/README.md b/README.md
index d10832103..8db2d4f06 100644
--- a/README.md
+++ b/README.md
@@ -610,12 +610,14 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-hls-use-mpegts Do not use the mpegts container for HLS
videos. This is default when not downloading
live streams
- --download-sections REGEX Download only chapters whose title matches
- the given regular expression. Time ranges
- prefixed by a "*" can also be used in place
- of chapters to download the specified range.
- Needs ffmpeg. This option can be used
- multiple times to download multiple
+ --download-sections REGEX Download only chapters that match the
+ regular expression. A "*" prefix denotes
+ time-range instead of chapter. Negative
+ timestamps are calculated from the end.
+ "*from-url" can be used to download between
+ the "start_time" and "end_time" extracted
+ from the URL. Needs ffmpeg. This option can
+ be used multiple times to download multiple
sections, e.g. --download-sections
"*10:15-inf" --download-sections "intro"
--downloader [PROTO:]NAME Name or path of the external downloader to
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 79b7d47b0..6dade0b2a 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -2806,11 +2806,13 @@ class YoutubeDL:
new_info.update(fmt)
offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf')
end_time = offset + min(chapter.get('end_time', duration), duration)
+ # duration may not be accurate. So allow deviations <1sec
+ if end_time == float('inf') or end_time > offset + duration + 1:
+ end_time = None
if chapter or offset:
new_info.update({
'section_start': offset + chapter.get('start_time', 0),
- # duration may not be accurate. So allow deviations <1sec
- 'section_end': end_time if end_time <= offset + duration + 1 else None,
+ 'section_end': end_time,
'section_title': chapter.get('title'),
'section_number': chapter.get('index'),
})
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index 46edd88d3..b81277a57 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -320,26 +320,49 @@ def validate_options(opts):
opts.skip_download = None
del opts.outtmpl['default']
- def parse_chapters(name, value):
- chapters, ranges = [], []
+ def parse_chapters(name, value, advanced=False):
parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x)
+ TIMESTAMP_RE = r'''(?x)(?:
+ (?P<start_sign>-?)(?P<start>[^-]+)
+ )?\s*-\s*(?:
+ (?P<end_sign>-?)(?P<end>[^-]+)
+ )?'''
+
+ chapters, ranges, from_url = [], [], False
for regex in value or []:
- if regex.startswith('*'):
- for range_ in map(str.strip, regex[1:].split(',')):
- mobj = range_ != '-' and re.fullmatch(r'([^-]+)?\s*-\s*([^-]+)?', range_)
- dur = mobj and (parse_timestamp(mobj.group(1) or '0'), parse_timestamp(mobj.group(2) or 'inf'))
- if None in (dur or [None]):
- raise ValueError(f'invalid {name} time range "{regex}". Must be of the form "*start-end"')
- ranges.append(dur)
+ if advanced and regex == '*from-url':
+ from_url = True
+ continue
+ elif not regex.startswith('*'):
+ try:
+ chapters.append(re.compile(regex))
+ except re.error as err:
+ raise ValueError(f'invalid {name} regex "{regex}" - {err}')
continue
- try:
- chapters.append(re.compile(regex))
- except re.error as err:
- raise ValueError(f'invalid {name} regex "{regex}" - {err}')
- return chapters, ranges
- opts.remove_chapters, opts.remove_ranges = parse_chapters('--remove-chapters', opts.remove_chapters)
- opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges))
+ for range_ in map(str.strip, regex[1:].split(',')):
+ mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_)
+ dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')]
+ signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign'))
+
+ err = None
+ if None in (dur or [None]):
+ err = 'Must be of the form "*start-end"'
+ elif not advanced and any(signs):
+ err = 'Negative timestamps are not allowed'
+ else:
+ dur[0] *= -1 if signs[0] else 1
+ dur[1] *= -1 if signs[1] else 1
+ if dur[1] == float('-inf'):
+ err = '"-inf" is not a valid end'
+ if err:
+ raise ValueError(f'invalid {name} time range "{regex}". {err}')
+ ranges.append(dur)
+
+ return chapters, ranges, from_url
+
+ opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters)
+ opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True))
# Cookies from browser
if opts.cookiesfrombrowser:
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index 9d6dbec9f..163809706 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1012,8 +1012,9 @@ def create_parser():
'--download-sections',
metavar='REGEX', dest='download_ranges', action='append',
help=(
- 'Download only chapters whose title matches the given regular expression. '
- 'Time ranges prefixed by a "*" can also be used in place of chapters to download the specified range. '
+ 'Download only chapters that match the regular expression. '
+ 'A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. '
+ '"*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. '
'Needs ffmpeg. This option can be used multiple times to download multiple sections, '
'e.g. --download-sections "*10:15-inf" --download-sections "intro"'))
downloader.add_option(
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index bc1bc9116..56acadd73 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -3753,11 +3753,11 @@ def match_filter_func(filters, breaking_filters=None):
class download_range_func:
- def __init__(self, chapters, ranges):
- self.chapters, self.ranges = chapters, ranges
+ def __init__(self, chapters, ranges, from_info=False):
+ self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
def __call__(self, info_dict, ydl):
- if not self.ranges and not self.chapters:
+ if not any((self.ranges, self.chapters, self.from_info)):
yield {}
warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
@@ -3770,7 +3770,21 @@ class download_range_func:
if self.chapters and warning:
ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
- yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
+ for start, end in self.ranges or []:
+ yield {
+ 'start_time': self._handle_negative_timestamp(start, info_dict),
+ 'end_time': self._handle_negative_timestamp(end, info_dict),
+ }
+
+ if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
+ yield {
+ 'start_time': info_dict.get('start_time'),
+ 'end_time': info_dict.get('end_time'),
+ }
+
+ @staticmethod
+ def _handle_negative_timestamp(time, info):
+ return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
def __eq__(self, other):
return (isinstance(other, download_range_func)