aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor')
-rw-r--r--yt_dlp/extractor/common.py18
-rw-r--r--yt_dlp/extractor/fc2.py3
-rw-r--r--yt_dlp/extractor/voicy.py6
3 files changed, 16 insertions, 11 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index ebeca4395..6a451c20b 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1343,7 +1343,7 @@ class InfoExtractor:
return self._og_search_property('url', html, **kargs)
def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs):
- return self._html_search_regex(r'(?s)<title>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
+ return self._html_search_regex(r'(?s)<title\b[^>]*>([^<]+)</title>', html, name, fatal=fatal, **kwargs)
def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
name = variadic(name)
@@ -1509,8 +1509,9 @@ class InfoExtractor:
'url': url_or_none(e.get('contentUrl')),
'title': unescapeHTML(e.get('name')),
'description': unescapeHTML(e.get('description')),
- 'thumbnails': [{'url': url_or_none(url)}
- for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
+ 'thumbnails': [{'url': url}
+ for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
+ if url_or_none(url)],
'duration': parse_duration(e.get('duration')),
'timestamp': unified_timestamp(e.get('uploadDate')),
# author can be an instance of 'Organization' or 'Person' types.
@@ -2803,13 +2804,18 @@ class InfoExtractor:
mime_type = representation_attrib['mimeType']
content_type = representation_attrib.get('contentType', mime_type.split('/')[0])
- codecs = parse_codecs(representation_attrib.get('codecs', ''))
+ codec_str = representation_attrib.get('codecs', '')
+ # Some kind of binary subtitle found in some youtube livestreams
+ if mime_type == 'application/x-rawcc':
+ codecs = {'scodec': codec_str}
+ else:
+ codecs = parse_codecs(codec_str)
if content_type not in ('video', 'audio', 'text'):
if mime_type == 'image/jpeg':
content_type = mime_type
- elif codecs['vcodec'] != 'none':
+ elif codecs.get('vcodec', 'none') != 'none':
content_type = 'video'
- elif codecs['acodec'] != 'none':
+ elif codecs.get('acodec', 'none') != 'none':
content_type = 'audio'
elif codecs.get('scodec', 'none') != 'none':
content_type = 'text'
diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py
index 225677b00..54b136ec7 100644
--- a/yt_dlp/extractor/fc2.py
+++ b/yt_dlp/extractor/fc2.py
@@ -10,7 +10,6 @@ from ..utils import (
WebSocketsWrapper,
js_to_json,
sanitized_Request,
- std_headers,
traverse_obj,
update_url_query,
urlencode_postdata,
@@ -207,7 +206,7 @@ class FC2LiveIE(InfoExtractor):
'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:],
'Origin': 'https://live.fc2.com',
'Accept': '*/*',
- 'User-Agent': std_headers['User-Agent'],
+ 'User-Agent': self.get_param('http_headers')['User-Agent'],
})
self.write_debug('[debug] Sending HLS server request')
diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py
index e4570a03a..feab79138 100644
--- a/yt_dlp/extractor/voicy.py
+++ b/yt_dlp/extractor/voicy.py
@@ -1,3 +1,5 @@
+import itertools
+
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
@@ -9,8 +11,6 @@ from ..utils import (
unsmuggle_url,
)
-import itertools
-
class VoicyBaseIE(InfoExtractor):
def _extract_from_playlist_data(self, value):
@@ -105,7 +105,7 @@ class VoicyChannelIE(VoicyBaseIE):
@classmethod
def suitable(cls, url):
- return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
+ return not VoicyIE.suitable(url) and super().suitable(url)
def _entries(self, channel_id):
pager = ''