From bfd973ece3369c593b5e82a88cc16de80088a73e Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 1 Aug 2022 06:53:25 +0530 Subject: [extractors] Use new framework for existing embeds (#4307) `Brightcove` is difficult to migrate because it's subclasses may depend on the signature of the current functions. So it is left as-is for now Note: Tests have not been migrated --- yt_dlp/extractor/mangomolo.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'yt_dlp/extractor/mangomolo.py') diff --git a/yt_dlp/extractor/mangomolo.py b/yt_dlp/extractor/mangomolo.py index a392e9b54..568831aa8 100644 --- a/yt_dlp/extractor/mangomolo.py +++ b/yt_dlp/extractor/mangomolo.py @@ -3,11 +3,29 @@ from ..compat import ( compat_b64decode, compat_urllib_parse_unquote, ) -from ..utils import int_or_none +from ..utils import classproperty, int_or_none class MangomoloBaseIE(InfoExtractor): - _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)' + _BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)' + _SLUG = None + + @classproperty + def _VALID_URL(cls): + return f'{cls._BASE_REGEX}{cls._SLUG}' + + @classproperty + def _EMBED_REGEX(cls): + return [rf']+src=(["\'])(?P{cls._VALID_URL}.+?)\1'] + + def _extract_from_webpage(self, url, webpage): + for res in super()._extract_from_webpage(url, webpage): + yield { + **res, + '_type': 'url_transparent', + 'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'), + 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'), + } def _get_real_id(self, page_id): return page_id @@ -41,14 +59,15 @@ class MangomoloBaseIE(InfoExtractor): class MangomoloVideoIE(MangomoloBaseIE): _TYPE = 'video' IE_NAME = 'mangomolo:' + _TYPE - _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P\d+)' + _SLUG = r'video\?.*?\bid=(?P\d+)' + _IS_LIVE = False class MangomoloLiveIE(MangomoloBaseIE): _TYPE = 'live' IE_NAME = 'mangomolo:' + _TYPE - _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' + _SLUG = r'(?:live|index)\?.*?\bchannelid=(?P(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)' _IS_LIVE = True def _get_real_id(self, page_id): -- cgit v1.2.3