diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-05-18 06:42:43 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-05-18 07:03:50 +0530 |
commit | 80e8493ee7c3083f4e215794e4a67ba5265f24f7 (patch) | |
tree | 55708d0baa26f11925c819a053a904ef8755c164 /yt_dlp/utils.py | |
parent | aedaa455d9874f14662023f21b254168ecd55579 (diff) | |
download | hypervideo-pre-80e8493ee7c3083f4e215794e4a67ba5265f24f7.tar.lz hypervideo-pre-80e8493ee7c3083f4e215794e4a67ba5265f24f7.tar.xz hypervideo-pre-80e8493ee7c3083f4e215794e4a67ba5265f24f7.zip |
[utils] `is_html`: Handle double BOM
Closes #2885
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r-- | yt_dlp/utils.py | 11 |
1 files changed, 5 insertions, 6 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 48a94415d..3b0e6750c 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -3290,14 +3290,13 @@ def is_html(first_bytes): (b'\xff\xfe', 'utf-16-le'), (b'\xfe\xff', 'utf-16-be'), ] + + encoding = 'utf-8' for bom, enc in BOMS: - if first_bytes.startswith(bom): - s = first_bytes[len(bom):].decode(enc, 'replace') - break - else: - s = first_bytes.decode('utf-8', 'replace') + while first_bytes.startswith(bom): + encoding, first_bytes = enc, first_bytes[len(bom):] - return re.match(r'^\s*<', s) + return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace')) def determine_protocol(info_dict): |