aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpukkandan <pukkandan.ytdlp@gmail.com>2022-10-03 16:50:27 +0530
committerpukkandan <pukkandan.ytdlp@gmail.com>2022-10-03 16:56:19 +0530
commit8b7fb8b60da78b54a518246b251be3d1829fef38 (patch)
tree0a18644076581c900ef769f6319d758749f4a279
parenta83333c4328591c279a27dd0ec4c7c5addcc411f (diff)
downloadhypervideo-pre-8b7fb8b60da78b54a518246b251be3d1829fef38.tar.lz
hypervideo-pre-8b7fb8b60da78b54a518246b251be3d1829fef38.tar.xz
hypervideo-pre-8b7fb8b60da78b54a518246b251be3d1829fef38.zip
[extractor] Make search_json able to parse lists
Now `contains_pattern` can be set to `\[.+\]`
-rw-r--r--yt_dlp/extractor/common.py4
-rw-r--r--yt_dlp/extractor/dropbox.py2
-rw-r--r--yt_dlp/extractor/radiofrance.py2
3 files changed, 4 insertions, 4 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 11e715871..caec0ccf6 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1227,7 +1227,7 @@ class InfoExtractor:
return None
def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
- contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
+ contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
"""Searches string for the JSON object specified by start_pattern"""
# NB: end_pattern is only used to reduce the size of the initial match
if default is NO_DEFAULT:
@@ -1236,7 +1236,7 @@ class InfoExtractor:
fatal, has_default = False, True
json_string = self._search_regex(
- rf'(?:{start_pattern})\s*(?P<json>{{\s*(?:{contains_pattern})\s*}})\s*(?:{end_pattern})',
+ rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
if not json_string:
return default
diff --git a/yt_dlp/extractor/dropbox.py b/yt_dlp/extractor/dropbox.py
index 0d12513b2..54d97a25d 100644
--- a/yt_dlp/extractor/dropbox.py
+++ b/yt_dlp/extractor/dropbox.py
@@ -54,7 +54,7 @@ class DropboxIE(InfoExtractor):
raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
info_json = self._search_json(r'InitReact\.mountComponent\(.*?,', webpage, 'mountComponent', video_id,
- contains_pattern=r'.+?"preview".+?', end_pattern=r'\)')['props']
+ contains_pattern=r'{.+?"preview".+?}', end_pattern=r'\)')['props']
transcode_url = traverse_obj(info_json, ((None, 'preview'), 'file', 'preview', 'content', 'transcode_url'), get_all=False)
formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id)
diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py
index 7b60b2617..38420a15d 100644
--- a/yt_dlp/extractor/radiofrance.py
+++ b/yt_dlp/extractor/radiofrance.py
@@ -84,7 +84,7 @@ class FranceCultureIE(InfoExtractor):
webpage = self._download_webpage(url, display_id)
# _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846
- video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'\s*"@type"\s*:\s*"AudioObject"\s*.+')
+ video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}')
return {
'id': video_id,