aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFelix S <felix.von.s@posteo.de>2021-04-23 10:52:21 +0200
committerFelix S <felix.von.s@posteo.de>2021-04-28 17:21:26 +0530
commit333217f43e58f93fc8088d4854044b907adddce5 (patch)
tree0f279e5deb01ad65b6baa0b8a097cef59ab280cc
parent4a2f19abbd61274358211c2e3b1d9658cfbdcdde (diff)
downloadhypervideo-pre-333217f43e58f93fc8088d4854044b907adddce5.tar.lz
hypervideo-pre-333217f43e58f93fc8088d4854044b907adddce5.tar.xz
hypervideo-pre-333217f43e58f93fc8088d4854044b907adddce5.zip
[downloader/hls] Remove duplicate cues using a sliding window of candidates
-rw-r--r--yt_dlp/downloader/hls.py25
-rw-r--r--yt_dlp/webvtt.py10
2 files changed, 35 insertions, 0 deletions
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index cee3807ce..c0e52d35d 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -325,6 +325,31 @@ class HlsFD(FragmentFD):
if isinstance(block, webvtt.CueBlock):
block.start += adjust
block.end += adjust
+
+ dedup_window = extra_state.setdefault('webvtt_dedup_window', [])
+ cue = block.as_json
+
+ # skip the cue if an identical one appears
+ # in the window of potential duplicates
+ # and prune the window of unviable candidates
+ i = 0
+ skip = True
+ while i < len(dedup_window):
+ window_cue = dedup_window[i]
+ if window_cue == cue:
+ break
+ if window_cue['end'] >= cue['start']:
+ i += 1
+ continue
+ del dedup_window[i]
+ else:
+ skip = False
+
+ if skip:
+ continue
+
+ # add the cue to the window
+ dedup_window.append(cue)
elif isinstance(block, webvtt.Magic):
# XXX: we do not handle MPEGTS overflow
if frag_index == 1:
diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py
index 4d026834a..a184ee369 100644
--- a/yt_dlp/webvtt.py
+++ b/yt_dlp/webvtt.py
@@ -322,6 +322,16 @@ class CueBlock(Block):
stream.write(self.text)
stream.write('\n')
+ @property
+ def as_json(self):
+ return {
+ 'id': self.id,
+ 'start': self.start,
+ 'end': self.end,
+ 'text': self.text,
+ 'settings': self.settings,
+ }
+
def parse_fragment(frag_content):
"""