diff options
author | Felix S <felix.von.s@posteo.de> | 2021-08-09 20:22:30 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-10 01:52:30 +0530 |
commit | 25a3f4f5d6de2bd5fb60cd11a46f0ac232882a56 (patch) | |
tree | 3e3bbe3078fe903abbd0e6593a3a0e230564cdda /yt_dlp/downloader/hls.py | |
parent | ad3dc496bbf2e2a574a16244ddde0740778e5daf (diff) | |
download | hypervideo-pre-25a3f4f5d6de2bd5fb60cd11a46f0ac232882a56.tar.lz hypervideo-pre-25a3f4f5d6de2bd5fb60cd11a46f0ac232882a56.tar.xz hypervideo-pre-25a3f4f5d6de2bd5fb60cd11a46f0ac232882a56.zip |
[webvtt] Merge daisy-chained duplicate cues (#638)
Fixes: https://github.com/yt-dlp/yt-dlp/issues/631#issuecomment-893338552
Previous deduplication algorithm only removed duplicate cues with
identical text, styles and timestamps. This change also merges
cues that come in ‘daisy chains’, where sequences of cues with
identical text and styles appear in which the ending timestamp of
one equals the starting timestamp of the next.
This deduplication algorithm has the somewhat unfortunate side effect
that NOTE blocks between cues, if found, will be emitted in a different
order relative to their original cues. This may be unwanted if perfect
fidelity is desired, but then so is daisy-chain deduplication itself.
NOTE blocks ought to be ignored by WebVTT players in any case.
Authored by: fstirlitz
Diffstat (limited to 'yt_dlp/downloader/hls.py')
-rw-r--r-- | yt_dlp/downloader/hls.py | 55 |
1 files changed, 37 insertions, 18 deletions
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index 9cbd5a584..9cfc191cb 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -260,29 +260,35 @@ class HlsFD(FragmentFD): block.end += adjust dedup_window = extra_state.setdefault('webvtt_dedup_window', []) - cue = block.as_json - # skip the cue if an identical one appears - # in the window of potential duplicates - # and prune the window of unviable candidates + ready = [] + i = 0 - skip = True + is_new = True while i < len(dedup_window): - window_cue = dedup_window[i] - if window_cue == cue: - break - if window_cue['end'] >= cue['start']: - i += 1 + wcue = dedup_window[i] + wblock = webvtt.CueBlock.from_json(wcue) + i += 1 + if wblock.hinges(block): + wcue['end'] = block.end + is_new = False + continue + if wblock == block: + is_new = False + continue + if wblock.end > block.start: continue + ready.append(wblock) + i -= 1 del dedup_window[i] - else: - skip = False - if skip: - continue + if is_new: + dedup_window.append(block.as_json) + for block in ready: + block.write_into(output) - # add the cue to the window - dedup_window.append(cue) + # we only emit cues once they fall out of the duplicate window + continue elif isinstance(block, webvtt.Magic): # take care of MPEG PES timestamp overflow if block.mpegts is None: @@ -317,6 +323,19 @@ class HlsFD(FragmentFD): block.write_into(output) return output.getvalue().encode('utf-8') + + def fin_fragments(): + dedup_window = extra_state.get('webvtt_dedup_window') + if not dedup_window: + return b'' + + output = io.StringIO() + for cue in dedup_window: + webvtt.CueBlock.from_json(cue).write_into(output) + + return output.getvalue().encode('utf-8') + + self.download_and_append_fragments( + ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) else: - pack_fragment = None - return self.download_and_append_fragments(ctx, fragments, info_dict, pack_fragment) + return self.download_and_append_fragments(ctx, fragments, info_dict) |