aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/utils.py
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-05-18 06:42:43 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-05-18 07:03:50 +0530
commit80e8493ee7c3083f4e215794e4a67ba5265f24f7 (patch)
tree55708d0baa26f11925c819a053a904ef8755c164 /yt_dlp/utils.py
parentaedaa455d9874f14662023f21b254168ecd55579 (diff)
downloadhypervideo-pre-80e8493ee7c3083f4e215794e4a67ba5265f24f7.tar.lz
hypervideo-pre-80e8493ee7c3083f4e215794e4a67ba5265f24f7.tar.xz
hypervideo-pre-80e8493ee7c3083f4e215794e4a67ba5265f24f7.zip
[utils] `is_html`: Handle double BOM
Closes #2885
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r--yt_dlp/utils.py11
1 files changed, 5 insertions, 6 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 48a94415d..3b0e6750c 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -3290,14 +3290,13 @@ def is_html(first_bytes):
(b'\xff\xfe', 'utf-16-le'),
(b'\xfe\xff', 'utf-16-be'),
]
+
+ encoding = 'utf-8'
for bom, enc in BOMS:
- if first_bytes.startswith(bom):
- s = first_bytes[len(bom):].decode(enc, 'replace')
- break
- else:
- s = first_bytes.decode('utf-8', 'replace')
+ while first_bytes.startswith(bom):
+ encoding, first_bytes = enc, first_bytes[len(bom):]
- return re.match(r'^\s*<', s)
+ return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
def determine_protocol(info_dict):