[cleanup] Misc cleanup

author: pukkandan <pukkandan.ytdlp@gmail.com> 2022-06-28 10:40:54 +0530
committer: pukkandan <pukkandan.ytdlp@gmail.com> 2022-06-29 06:43:27 +0530
commit: ae61d108dd83a951b6e8a27e1fb969682416150d (patch)
tree: 71cdc169890133e0097f2d28f452329315633e4b /yt_dlp/utils.py
parent: 47046464faaa3c72465f52c3c6a6191fbfd6b32c (diff)
download: hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.tar.lz
hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.tar.xz
hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.zip
1 files changed, 4 insertions, 5 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 9c9be5fe5..32c41a169 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -234,7 +234,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
 ])
 
 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
 
 NUMBER_RE = r'\d+(?:\.\d+)?'
 
@@ -673,8 +673,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
     result = ''.join(map(replace_insane, s))
     if is_id is NO_DEFAULT:
-        result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result)  # Remove repeated substitute chars
-        STRIP_RE = '(?:\0.|[ _-])*'
+        result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
+        STRIP_RE = r'(?:\0.|[ _-])*'
         result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
     result = result.replace('\0', '') or '_'
 
@@ -2400,8 +2400,7 @@ def remove_quotes(s):
 
 
 def get_domain(url):
-    domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
-    return domain.group('domain') if domain else None
+    return '.'.join(urllib.parse.urlparse(url).netloc.rsplit('.', 2)[-2:])
 
 
 def url_basename(url):
author	pukkandan <pukkandan.ytdlp@gmail.com>	2022-06-28 10:40:54 +0530
committer	pukkandan <pukkandan.ytdlp@gmail.com>	2022-06-29 06:43:27 +0530
commit	ae61d108dd83a951b6e8a27e1fb969682416150d (patch)
tree	71cdc169890133e0097f2d28f452329315633e4b /yt_dlp/utils.py
parent	47046464faaa3c72465f52c3c6a6191fbfd6b32c (diff)
download	hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.tar.lz hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.tar.xz hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.zip