From 6a765f135ccb654861336ea27a2c1c24ea8e286f Mon Sep 17 00:00:00 2001 From: vidiot720 <128325907+vidiot720@users.noreply.github.com> Date: Wed, 19 Apr 2023 09:46:57 +1000 Subject: [extractor/sbs] Overhaul extractor for new API (#6839) Closes #6543 Authored by: vidiot720, dirkf, bashonly --- yt_dlp/utils.py | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'yt_dlp/utils.py') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 40533c2cb..746a2885d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4093,6 +4093,10 @@ def dfxp2srt(dfxp_data): def close(self): return self._out.strip() + # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870 + # This will not trigger false positives since only UTF-8 text is being replaced + dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'') + def parse_node(node): target = TTMLPElementParser() parser = xml.etree.ElementTree.XMLParser(target=target) -- cgit v1.2.3