diff options
Diffstat (limited to 'yt_dlp/utils.py')
-rw-r--r-- | yt_dlp/utils.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 55e452a15..17f34a853 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2006,6 +2006,23 @@ class HTMLAttributeParser(compat_HTMLParser): self.attrs = dict(attrs) +class HTMLListAttrsParser(compat_HTMLParser): + """HTML parser to gather the attributes for the elements of a list""" + + def __init__(self): + compat_HTMLParser.__init__(self) + self.items = [] + self._level = 0 + + def handle_starttag(self, tag, attrs): + if tag == 'li' and self._level == 0: + self.items.append(dict(attrs)) + self._level += 1 + + def handle_endtag(self, tag): + self._level -= 1 + + def extract_attributes(html_element): """Given a string for an HTML element such as <el @@ -2032,6 +2049,15 @@ def extract_attributes(html_element): return parser.attrs +def parse_list(webpage): + """Given a string for an series of HTML <li> elements, + return a dictionary of their attributes""" + parser = HTMLListAttrsParser() + parser.feed(webpage) + parser.close() + return parser.items + + def clean_html(html): """Clean an HTML snippet into a readable string""" |