diff options
author | Zenon Mousmoulas <zmousm@users.noreply.github.com> | 2022-01-30 15:35:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-30 19:05:39 +0530 |
commit | 19afd9ea513fc2cd29b7242544cfe0dec1db892e (patch) | |
tree | 0e1756aaf7f136cd05e544a958f1e2c3b12da455 /yt_dlp/extractor/glomex.py | |
parent | b72270d27eb8086b1038bf21d9c6cf88ce20e211 (diff) | |
download | hypervideo-pre-19afd9ea513fc2cd29b7242544cfe0dec1db892e.tar.lz hypervideo-pre-19afd9ea513fc2cd29b7242544cfe0dec1db892e.tar.xz hypervideo-pre-19afd9ea513fc2cd29b7242544cfe0dec1db892e.zip |
[GlomexEmbed] Avoid large match objects
Closes #2512
Authored by: zmousm
Diffstat (limited to 'yt_dlp/extractor/glomex.py')
-rw-r--r-- | yt_dlp/extractor/glomex.py | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py index 1d387bdfd..a6477faab 100644 --- a/yt_dlp/extractor/glomex.py +++ b/yt_dlp/extractor/glomex.py @@ -198,8 +198,13 @@ class GlomexEmbedIE(GlomexBaseIE): )+</script> )''' % {'quot_re': r'["\']', 'url_re': VALID_SRC} - for mobj in re.finditer(EMBED_RE, webpage): - mdict = mobj.groupdict() + for mtup in re.findall(EMBED_RE, webpage): + # re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512 + mdict = dict(zip(( + 'url', '_', + 'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player', + 'script_tag', '_', '_', 'integration_js', '_', 'id_js', + ), mtup)) if mdict.get('url'): url = unescapeHTML(mdict['url']) if not cls.suitable(url): |