aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--yt_dlp/extractor/common.py12
-rw-r--r--yt_dlp/extractor/youtube.py62
2 files changed, 54 insertions, 20 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index b24599d5f..5767662ed 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1033,11 +1033,19 @@ class InfoExtractor:
expected_status=expected_status)
return res if res is False else res[0]
- def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True, lenient=False):
if transform_source:
json_string = transform_source(json_string)
try:
- return json.loads(json_string, strict=False)
+ try:
+ return json.loads(json_string, strict=False)
+ except json.JSONDecodeError as e:
+ if not lenient:
+ raise
+ try:
+ return json.loads(json_string[:e.pos], strict=False)
+ except ValueError:
+ raise e
except ValueError as ve:
errmsg = '%s: Failed to parse JSON ' % video_id
if fatal:
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 245778dff..6d9659b1d 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -397,8 +397,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if self._LOGIN_REQUIRED and not self._cookies_passed:
self.raise_login_required('Login details are needed to download this content', method='cookies')
- _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
- _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
+ _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+})\s*;'
+ _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
def _get_default_ytcfg(self, client='web'):
@@ -2212,28 +2212,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}, {
# Story. Requires specific player params to work.
# Note: stories get removed after some period of time
- 'url': 'https://www.youtube.com/watch?v=yN3x1t3sieA',
+ 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
'info_dict': {
- 'id': 'yN3x1t3sieA',
+ 'id': 'vv8qTUWmulI',
'ext': 'mp4',
- 'uploader': 'Linus Tech Tips',
- 'duration': 13,
- 'channel': 'Linus Tech Tips',
+ 'availability': 'unlisted',
+ 'view_count': int,
+ 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
+ 'upload_date': '20220526',
+ 'categories': ['Education'],
+ 'title': 'Story',
+ 'channel': 'IT\'S HISTORY',
+ 'description': '',
+ 'uploader_id': 'BlastfromthePast',
+ 'duration': 12,
+ 'uploader': 'IT\'S HISTORY',
'playable_in_embed': True,
- 'tags': [],
'age_limit': 0,
- 'uploader_url': 'http://www.youtube.com/user/LinusTechTips',
- 'upload_date': '20220402',
- 'thumbnail': 'https://i.ytimg.com/vi_webp/yN3x1t3sieA/maxresdefault.webp',
- 'title': 'Story',
'live_status': 'not_live',
- 'uploader_id': 'LinusTechTips',
+ 'tags': [],
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
+ 'uploader_url': 'http://www.youtube.com/user/BlastfromthePast',
+ 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
+ 'info_dict': {
+ 'id': 'tjjjtzRLHvA',
+ 'ext': 'mp4',
+ 'title': 'ハッシュタグ無し };if window.ytcsi',
+ 'upload_date': '20220323',
+ 'like_count': int,
+ 'availability': 'unlisted',
+ 'channel': 'nao20010128nao',
+ 'thumbnail': 'https://i.ytimg.com/vi_webp/tjjjtzRLHvA/maxresdefault.webp',
+ 'age_limit': 0,
+ 'uploader': 'nao20010128nao',
+ 'uploader_id': 'nao20010128nao',
+ 'categories': ['Music'],
'view_count': int,
'description': '',
- 'channel_id': 'UCXuqSBlHAE6Xw-yeJA0Tunw',
- 'categories': ['Science & Technology'],
- 'channel_url': 'https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw',
- 'availability': 'unlisted',
+ 'channel_url': 'https://www.youtube.com/channel/UCdqltm_7iv1Vs6kp6Syke5A',
+ 'channel_id': 'UCdqltm_7iv1Vs6kp6Syke5A',
+ 'live_status': 'not_live',
+ 'playable_in_embed': True,
+ 'channel_follower_count': int,
+ 'duration': 6,
+ 'tags': [],
+ 'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
}
}
]
@@ -2754,7 +2780,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _extract_yt_initial_variable(self, webpage, regex, video_id, name):
return self._parse_json(self._search_regex(
(fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}',
- regex), webpage, name, default='{}'), video_id, fatal=False)
+ regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True)
def _extract_comment(self, comment_renderer, parent=None):
comment_id = comment_renderer.get('commentId')