aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--youtube_dlc/utils.py15
1 files changed, 9 insertions, 6 deletions
diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py
index 586ad4150..ae293589b 100644
--- a/youtube_dlc/utils.py
+++ b/youtube_dlc/utils.py
@@ -3892,13 +3892,16 @@ def read_batch_urls(batch_fd):
def fixup(url):
if not isinstance(url, compat_str):
url = url.decode('utf-8', 'replace')
- BOM_UTF8 = '\xef\xbb\xbf'
- if url.startswith(BOM_UTF8):
- url = url[len(BOM_UTF8):]
- url = url.strip()
- if url.startswith(('#', ';', ']')):
+ BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
+ for bom in BOM_UTF8:
+ if url.startswith(bom):
+ url = url[len(bom):]
+ url = url.lstrip()
+ if not url or url.startswith(('#', ';', ']')):
return False
- return url
+ # "#" cannot be stripped out since it is part of the URI
+ # However, it can be safely stipped out if follwing a whitespace
+ return re.split(r'\s#', url, 1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]