diff options
author | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-06-28 10:40:54 +0530 |
---|---|---|
committer | pukkandan <pukkandan.ytdlp@gmail.com> | 2022-06-29 06:43:27 +0530 |
commit | ae61d108dd83a951b6e8a27e1fb969682416150d (patch) | |
tree | 71cdc169890133e0097f2d28f452329315633e4b /yt_dlp/utils.py | |
parent | 47046464faaa3c72465f52c3c6a6191fbfd6b32c (diff) | |
download | hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.tar.lz hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.tar.xz hypervideo-pre-ae61d108dd83a951b6e8a27e1fb969682416150d.zip |
[cleanup] Misc cleanup
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r-- | yt_dlp/utils.py | 9 |
1 files changed, 4 insertions, 5 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 9c9be5fe5..32c41a169 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -234,7 +234,7 @@ DATE_FORMATS_MONTH_FIRST.extend([ ]) PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" -JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>' +JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>' NUMBER_RE = r'\d+(?:\.\d+)?' @@ -673,8 +673,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT): s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps result = ''.join(map(replace_insane, s)) if is_id is NO_DEFAULT: - result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars - STRIP_RE = '(?:\0.|[ _-])*' + result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars + STRIP_RE = r'(?:\0.|[ _-])*' result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end result = result.replace('\0', '') or '_' @@ -2400,8 +2400,7 @@ def remove_quotes(s): def get_domain(url): - domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url) - return domain.group('domain') if domain else None + return '.'.join(urllib.parse.urlparse(url).netloc.rsplit('.', 2)[-2:]) def url_basename(url): |