From 4c9a1a3ba56c2906f9ef8d768de7f8e5a2361144 Mon Sep 17 00:00:00 2001 From: Matthew Date: Sun, 9 Oct 2022 18:55:26 +1300 Subject: [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor (#5087) Closes https://github.com/yt-dlp/yt-dlp/issues/4994 Authored by: coletdjnz --- yt_dlp/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'yt_dlp/utils.py') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index cb14908c7..5a88a928d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -408,7 +408,7 @@ def get_elements_html_by_attribute(*args, **kwargs): return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)] -def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True): +def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True): """ Return the text (content) and the html (whole) of the tag with the specified attribute in the passed HTML document @@ -419,7 +419,7 @@ def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value value = re.escape(value) if escape_value else value partial_element_re = rf'''(?x) - <(?P[a-zA-Z0-9:._-]+) + <(?P{tag}) (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)? \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q) ''' -- cgit v1.2.3