[extractor] Add `default` parameter to `_search_json` (#4057)

Authored by: pukkandan, coletdjnz
author: coletdev <coletdjnz@protonmail.com> 2022-06-19 00:55:18 +0000
committer: GitHub <noreply@github.com> 2022-06-18 17:55:18 -0700
commit: f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905 (patch)
tree: 53d58302032bb3a9cd314591ba3cb872384d1ba3
parent: 9fde8a6b125466419745bfc2afed1f34f9821b3f (diff)
download: hypervideo-pre-f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905.tar.lz
hypervideo-pre-f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905.tar.xz
hypervideo-pre-f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905.zip
2 files changed, 26 insertions, 7 deletions
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py
index c1c9b0adf..179602d46 100644
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -486,9 +486,9 @@ class YoutubeWebArchiveIE(InfoExtractor):
         search_meta = ((lambda x: self._html_search_meta(x, webpage, default=None)) if webpage else (lambda x: None))
         player_response = self._search_json(
             self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response',
-            video_id, fatal=False)
+            video_id, default={})
         initial_data = self._search_json(
-            self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, fatal=False)
+            self._YT_INITIAL_DATA_RE, webpage, 'initial data', video_id, default={})
 
         initial_data_video = traverse_obj(
             initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'videoPrimaryInfoRenderer'),
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 601394b41..093a9b5cd 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1188,13 +1188,32 @@ class InfoExtractor:
             self.report_warning('unable to extract %s' % _name + bug_reports_message())
             return None
 
-    def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='', contains_pattern='(?s:.+)', fatal=True, **kwargs):
+    def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
+                     contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
         """Searches string for the JSON object specified by start_pattern"""
         # NB: end_pattern is only used to reduce the size of the initial match
-        return self._parse_json(
-            self._search_regex(rf'{start_pattern}\s*(?P<json>{{{contains_pattern}}})\s*{end_pattern}',
-                               string, name, group='json', fatal=fatal) or '{}',
-            video_id, fatal=fatal, ignore_extra=True, **kwargs) or {}
+        if default is NO_DEFAULT:
+            default, has_default = {}, False
+        else:
+            fatal, has_default = False, True
+
+        json_string = self._search_regex(
+            rf'{start_pattern}\s*(?P<json>{{\s*{contains_pattern}\s*}})\s*{end_pattern}',
+            string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
+        if not json_string:
+            return default
+
+        _name = self._downloader._format_err(name, self._downloader.Styles.EMPHASIS)
+        try:
+            return self._parse_json(json_string, video_id, ignore_extra=True, **kwargs)
+        except ExtractorError as e:
+            if fatal:
+                raise ExtractorError(
+                    f'Unable to extract {_name} - Failed to parse JSON', cause=e.cause, video_id=video_id)
+            elif not has_default:
+                self.report_warning(
+                    f'Unable to extract {_name} - Failed to parse JSON: {e}', video_id=video_id)
+        return default
 
     def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
         """
author	coletdev <coletdjnz@protonmail.com>	2022-06-19 00:55:18 +0000
committer	GitHub <noreply@github.com>	2022-06-18 17:55:18 -0700
commit	f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905 (patch)
tree	53d58302032bb3a9cd314591ba3cb872384d1ba3
parent	9fde8a6b125466419745bfc2afed1f34f9821b3f (diff)
download	hypervideo-pre-f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905.tar.lz hypervideo-pre-f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905.tar.xz hypervideo-pre-f0bc6e2019a2f81d358ebddc4ae4cf8e9e4ed905.zip