aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsiikamiika <siikamiika@users.noreply.github.com>2020-08-11 00:13:43 +0300
committersiikamiika <siikamiika@users.noreply.github.com>2020-08-11 00:13:43 +0300
commit15eae44d74c80cca29cd5b24129585ad2d1e535f (patch)
tree188a50abe2b66eae909e6bc390bb1f9393092d29
parenteaedbfd97e860214399b0028fc47a487762e8294 (diff)
downloadhypervideo-pre-15eae44d74c80cca29cd5b24129585ad2d1e535f.tar.lz
hypervideo-pre-15eae44d74c80cca29cd5b24129585ad2d1e535f.tar.xz
hypervideo-pre-15eae44d74c80cca29cd5b24129585ad2d1e535f.zip
harden regex with lookbehind
-rw-r--r--youtube_dl/downloader/youtube_live_chat.py4
-rw-r--r--youtube_dl/extractor/youtube.py4
2 files changed, 4 insertions, 4 deletions
diff --git a/youtube_dl/downloader/youtube_live_chat.py b/youtube_dl/downloader/youtube_live_chat.py
index 697e52550..4932dd9c5 100644
--- a/youtube_dl/downloader/youtube_live_chat.py
+++ b/youtube_dl/downloader/youtube_live_chat.py
@@ -28,8 +28,8 @@ class YoutubeLiveChatReplayFD(FragmentFD):
return self._download_fragment(ctx, url, info_dict, headers)
def parse_yt_initial_data(data):
- window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?);'
- var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?);'
+ window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
+ var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
for patt in window_patt, var_patt:
try:
raw_json = re.search(patt, data).group(1)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e143bbee7..9fff8bdf4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1495,8 +1495,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_yt_initial_data(self, video_id, webpage):
config = self._search_regex(
- (r'window\["ytInitialData"\]\s*=\s*(.*);',
- r'var\s+ytInitialData\s*=\s*(.*?);'),
+ (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+ r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
webpage, 'ytInitialData', default=None)
if config:
return self._parse_json(