aboutsummaryrefslogtreecommitdiffstats
path: root/yt_dlp/extractor/glomex.py
diff options
context:
space:
mode:
authorZenon Mousmoulas <zmousm@users.noreply.github.com>2022-01-30 15:35:39 +0200
committerGitHub <noreply@github.com>2022-01-30 19:05:39 +0530
commit19afd9ea513fc2cd29b7242544cfe0dec1db892e (patch)
tree0e1756aaf7f136cd05e544a958f1e2c3b12da455 /yt_dlp/extractor/glomex.py
parentb72270d27eb8086b1038bf21d9c6cf88ce20e211 (diff)
downloadhypervideo-pre-19afd9ea513fc2cd29b7242544cfe0dec1db892e.tar.lz
hypervideo-pre-19afd9ea513fc2cd29b7242544cfe0dec1db892e.tar.xz
hypervideo-pre-19afd9ea513fc2cd29b7242544cfe0dec1db892e.zip
[GlomexEmbed] Avoid large match objects
Closes #2512 Authored by: zmousm
Diffstat (limited to 'yt_dlp/extractor/glomex.py')
-rw-r--r--yt_dlp/extractor/glomex.py9
1 files changed, 7 insertions, 2 deletions
diff --git a/yt_dlp/extractor/glomex.py b/yt_dlp/extractor/glomex.py
index 1d387bdfd..a6477faab 100644
--- a/yt_dlp/extractor/glomex.py
+++ b/yt_dlp/extractor/glomex.py
@@ -198,8 +198,13 @@ class GlomexEmbedIE(GlomexBaseIE):
)+</script>
)''' % {'quot_re': r'["\']', 'url_re': VALID_SRC}
- for mobj in re.finditer(EMBED_RE, webpage):
- mdict = mobj.groupdict()
+ for mtup in re.findall(EMBED_RE, webpage):
+ # re.finditer causes a memory spike. See https://github.com/yt-dlp/yt-dlp/issues/2512
+ mdict = dict(zip((
+ 'url', '_',
+ 'html_tag', '_', 'integration_html', '_', 'id_html', '_', 'glomex_player',
+ 'script_tag', '_', '_', 'integration_js', '_', 'id_js',
+ ), mtup))
if mdict.get('url'):
url = unescapeHTML(mdict['url'])
if not cls.suitable(url):