aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp')
-rw-r--r--yt_dlp/YoutubeDL.py2
-rw-r--r--yt_dlp/compat/_legacy.py38
-rw-r--r--yt_dlp/downloader/__init__.py8
-rw-r--r--yt_dlp/downloader/hls.py2
-rw-r--r--yt_dlp/extractor/generic.py32
-rw-r--r--yt_dlp/extractor/youtube.py9
-rw-r--r--yt_dlp/utils.py9
7 files changed, 50 insertions, 50 deletions
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
index 9ebb0b82a..0711f38c7 100644
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -3531,7 +3531,7 @@ class YoutubeDL:
'none', '' if f.get('vcodec') == 'none'
else self._format_out('video only', self.Styles.SUPPRESS)),
format_field(f, 'abr', '\t%dk'),
- format_field(f, 'asr', '\t%dHz'),
+ format_field(f, 'asr', '\t%s', func=format_decimal_suffix),
join_nonempty(
self._format_out('UNSUPPORTED', 'light red') if f.get('ext') in ('f4f', 'f4m') else None,
format_field(f, 'language', '[%s]'),
diff --git a/yt_dlp/compat/_legacy.py b/yt_dlp/compat/_legacy.py
index 49bb13a3c..e75f79bbf 100644
--- a/yt_dlp/compat/_legacy.py
+++ b/yt_dlp/compat/_legacy.py
@@ -44,14 +44,26 @@ def compat_setenv(key, value, env=os.environ):
compat_basestring = str
+compat_chr = chr
compat_collections_abc = collections.abc
+compat_cookiejar = http.cookiejar
+compat_cookiejar_Cookie = http.cookiejar.Cookie
compat_cookies = http.cookies
+compat_cookies_SimpleCookie = http.cookies.SimpleCookie
compat_etree_Element = etree.Element
compat_etree_register_namespace = etree.register_namespace
compat_filter = filter
+compat_get_terminal_size = shutil.get_terminal_size
compat_getenv = os.getenv
+compat_getpass = getpass.getpass
+compat_html_entities = html.entities
+compat_html_entities_html5 = html.entities.html5
+compat_HTMLParser = html.parser.HTMLParser
+compat_http_client = http.client
+compat_http_server = http.server
compat_input = input
compat_integer_types = (int, )
+compat_itertools_count = itertools.count
compat_kwargs = lambda kwargs: kwargs
compat_map = map
compat_numeric_types = (int, float, complex)
@@ -59,34 +71,22 @@ compat_print = print
compat_shlex_split = shlex.split
compat_socket_create_connection = socket.create_connection
compat_Struct = struct.Struct
+compat_struct_pack = struct.pack
+compat_struct_unpack = struct.unpack
compat_subprocess_get_DEVNULL = lambda: DEVNULL
+compat_tokenize_tokenize = tokenize.tokenize
+compat_urllib_error = urllib.error
+compat_urllib_parse = urllib.parse
compat_urllib_parse_quote = urllib.parse.quote
compat_urllib_parse_quote_plus = urllib.parse.quote_plus
+compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes
compat_urllib_parse_urlunparse = urllib.parse.urlunparse
-compat_urllib_request_DataHandler = urllib.request.DataHandler
compat_urllib_request = urllib.request
+compat_urllib_request_DataHandler = urllib.request.DataHandler
compat_urllib_response = urllib.response
compat_urlretrieve = urllib.request.urlretrieve
compat_xml_parse_error = etree.ParseError
compat_xpath = lambda xpath: xpath
compat_zip = zip
workaround_optparse_bug9161 = lambda: None
-compat_getpass = getpass.getpass
-compat_chr = chr
-compat_urllib_parse = urllib.parse
-compat_itertools_count = itertools.count
-compat_cookiejar = http.cookiejar
-compat_cookiejar_Cookie = http.cookiejar.Cookie
-compat_cookies_SimpleCookie = http.cookies.SimpleCookie
-compat_get_terminal_size = shutil.get_terminal_size
-compat_html_entities = html.entities
-compat_html_entities_html5 = html.entities.html5
-compat_tokenize_tokenize = tokenize.tokenize
-compat_HTMLParser = html.parser.HTMLParser
-compat_http_client = http.client
-compat_http_server = http.server
-compat_struct_pack = struct.pack
-compat_struct_unpack = struct.unpack
-compat_urllib_error = urllib.error
-compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus
diff --git a/yt_dlp/downloader/__init__.py b/yt_dlp/downloader/__init__.py
index a7dc6c9d0..c34dbcea9 100644
--- a/yt_dlp/downloader/__init__.py
+++ b/yt_dlp/downloader/__init__.py
@@ -59,10 +59,11 @@ PROTOCOL_MAP = {
def shorten_protocol_name(proto, simplify=False):
short_protocol_names = {
- 'm3u8_native': 'm3u8_n',
- 'rtmp_ffmpeg': 'rtmp_f',
+ 'm3u8_native': 'm3u8',
+ 'm3u8': 'm3u8F',
+ 'rtmp_ffmpeg': 'rtmpF',
'http_dash_segments': 'dash',
- 'http_dash_segments_generator': 'dash_g',
+ 'http_dash_segments_generator': 'dashG',
'niconico_dmc': 'dmc',
'websocket_frag': 'WSfrag',
}
@@ -70,6 +71,7 @@ def shorten_protocol_name(proto, simplify=False):
short_protocol_names.update({
'https': 'http',
'ftps': 'ftp',
+ 'm3u8': 'm3u8', # Reverse above m3u8 mapping
'm3u8_native': 'm3u8',
'http_dash_segments_generator': 'dash',
'rtmp_ffmpeg': 'rtmp',
diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py
index 1e75c5e9c..2010f3dc9 100644
--- a/yt_dlp/downloader/hls.py
+++ b/yt_dlp/downloader/hls.py
@@ -69,7 +69,7 @@ class HlsFD(FragmentFD):
elif no_crypto:
message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; '
'Decryption will be performed natively, but will be extremely slow')
- elif re.search(r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
+ elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s):
install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and '
message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, '
f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command')
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py
index 49f81e562..b63271c1f 100644
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -2825,12 +2825,22 @@ class GenericIE(InfoExtractor):
new_url, {'force_videoid': force_videoid})
return self.url_result(new_url)
- full_response = None
- if head_response is False:
+ def request_webpage():
request = sanitized_Request(url)
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after HEAD request finishes, but not sure if we can rely on this.
request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
- head_response = full_response
+ return self._request_webpage(request, video_id)
+
+ full_response = None
+ if head_response is False:
+ head_response = full_response = request_webpage()
info_dict = {
'id': video_id,
@@ -2868,19 +2878,7 @@ class GenericIE(InfoExtractor):
self.report_warning(
'%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
- if not full_response:
- request = sanitized_Request(url)
- # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
- # making it impossible to download only chunk of the file (yet we need only 512kB to
- # test whether it's HTML or not). According to yt-dlp default Accept-Encoding
- # that will always result in downloading the whole file that is not desirable.
- # Therefore for extraction pass we have to override Accept-Encoding to any in order
- # to accept raw bytes and being able to download only a chunk.
- # It may probably better to solve this by checking Content-Type for application/octet-stream
- # after HEAD request finishes, but not sure if we can rely on this.
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
-
+ full_response = full_response or request_webpage()
first_bytes = full_response.read(512)
# Is it an M3U playlist?
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 1a9c88f35..3e2ac030e 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2467,6 +2467,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
func_id = f'js_{player_id}_{self._signature_cache_id(example_sig)}'
assert os.path.basename(func_id) == func_id
+ self.write_debug(f'Extracting signature function {func_id}')
cache_spec = self.cache.load('youtube-sigfuncs', func_id)
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
@@ -2714,10 +2715,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
@classmethod
def extract_id(cls, url):
- mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- return mobj.group('id')
+ video_id = cls.get_temp_id(url)
+ if not video_id:
+ raise ExtractorError(f'Invalid URL: {url}')
+ return video_id
def _extract_chapters_from_json(self, data, duration):
chapter_list = traverse_obj(
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
index 9c9be5fe5..32c41a169 100644
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -234,7 +234,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?})\s*</script>'
NUMBER_RE = r'\d+(?:\.\d+)?'
@@ -673,8 +673,8 @@ def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
result = ''.join(map(replace_insane, s))
if is_id is NO_DEFAULT:
- result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
- STRIP_RE = '(?:\0.|[ _-])*'
+ result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
+ STRIP_RE = r'(?:\0.|[ _-])*'
result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
result = result.replace('\0', '') or '_'
@@ -2400,8 +2400,7 @@ def remove_quotes(s):
def get_domain(url):
- domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
- return domain.group('domain') if domain else None
+ return '.'.join(urllib.parse.urlparse(url).netloc.rsplit('.', 2)[-2:])
def url_basename(url):