From 64fa820ccf61a7aea6c2a48b1362b3a4ec270cad Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 25 May 2022 17:53:46 +0530 Subject: [cleanup] Misc fixes (see desc) * [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054 * [rumble] Fix tests - Closes #3976 * [make] Remove `cat` abuse - Closes #3989 * [make] Revert #3684 - Closes #3814 * [utils] Improve `get_elements_by_class` - Closes #3993 * [utils] Inherit `Namespace` from `types.SimpleNamespace` * [utils] Use `re.fullmatch` for matching filters * [jsinterp] Handle quotes in `_separate` * [make_readme] Allow overshooting last line Authored by: pukkandan, kwconder, MrRawes, Lesmiscore --- yt_dlp/utils.py | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) (limited to 'yt_dlp/utils.py') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 137d29d0a..e6e6d2759 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -34,6 +34,7 @@ import sys import tempfile import time import traceback +import types import urllib.parse import xml.etree.ElementTree import zlib @@ -397,14 +398,14 @@ def get_element_html_by_attribute(attribute, value, html, **kargs): def get_elements_by_class(class_name, html, **kargs): """Return the content of all tags with the specified class in the passed HTML document as a list""" return get_elements_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), + 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name), html, escape_value=False) def get_elements_html_by_class(class_name, html): """Return the html of all tags with the specified class in the passed HTML document as a list""" return get_elements_html_by_attribute( - 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name), + 'class', r'[^\'"]*(?<=[\'"\s])%s(?=[\'"\s])[^\'"]*' % re.escape(class_name), html, escape_value=False) @@ -3404,16 +3405,15 @@ def _match_one(filter_part, dct, incomplete): else: is_incomplete = lambda k: k in incomplete - operator_rex = re.compile(r'''(?x)\s* + operator_rex = re.compile(r'''(?x) (?P[a-z_]+) \s*(?P!\s*)?(?P%s)(?P\s*\?)?\s* (?: (?P["\'])(?P.+?)(?P=quote)| (?P.+?) ) - \s*$ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) - m = operator_rex.search(filter_part) + m = operator_rex.fullmatch(filter_part.strip()) if m: m = m.groupdict() unnegated_op = COMPARISON_OPERATORS[m['op']] @@ -3449,11 +3449,10 @@ def _match_one(filter_part, dct, incomplete): '': lambda v: (v is True) if isinstance(v, bool) else (v is not None), '!': lambda v: (v is False) if isinstance(v, bool) else (v is None), } - operator_rex = re.compile(r'''(?x)\s* + operator_rex = re.compile(r'''(?x) (?P%s)\s*(?P[a-z_]+) - \s*$ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) - m = operator_rex.search(filter_part) + m = operator_rex.fullmatch(filter_part.strip()) if m: op = UNARY_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) @@ -5395,23 +5394,15 @@ class classproperty: return self.func(cls) -class Namespace: +class Namespace(types.SimpleNamespace): """Immutable namespace""" - def __init__(self, **kwargs): - self._dict = kwargs - - def __getattr__(self, attr): - return self._dict[attr] - - def __contains__(self, item): - return item in self._dict.values() - def __iter__(self): - return iter(self._dict.items()) + return iter(self.__dict__.values()) - def __repr__(self): - return f'{type(self).__name__}({", ".join(f"{k}={v}" for k, v in self)})' + @property + def items_(self): + return self.__dict__.items() # Deprecated -- cgit v1.2.3