From eaeeef9c1d1bedb76fea953c332ef84d53bffe2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs?= Date: Fri, 2 Dec 2022 05:21:10 +0800 Subject: update from upstream --- hypervideo_dl/extractor/gedidigital.py | 36 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 24 deletions(-) (limited to 'hypervideo_dl/extractor/gedidigital.py') diff --git a/hypervideo_dl/extractor/gedidigital.py b/hypervideo_dl/extractor/gedidigital.py index ec386c2..1878d63 100644 --- a/hypervideo_dl/extractor/gedidigital.py +++ b/hypervideo_dl/extractor/gedidigital.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import re from .common import InfoExtractor @@ -14,7 +11,7 @@ from ..utils import ( class GediDigitalIE(InfoExtractor): - _VALID_URL = r'''(?x)(?P(?:https?:)//video\. + _VALID_URL = r'''(?x:(?P(?:https?:)//video\. (?: (?: (?:espresso\.)?repubblica @@ -36,7 +33,13 @@ class GediDigitalIE(InfoExtractor): |corrierealpi |lasentinella )\.gelocal - )\.it(?:/[^/]+){2,4}/(?P\d+))(?:$|[?&].*)''' + )\.it(?:/[^/]+){2,4}/(?P\d+))(?:$|[?&].*))''' + _EMBED_REGEX = [rf'''(?x) + (?: + data-frame-src=| + {_VALID_URL})\1'''] _TESTS = [{ 'url': 'https://video.lastampa.it/politica/il-paradosso-delle-regionali-la-lega-vince-ma-sembra-aver-perso/121559/121683', 'md5': '84658d7fb9e55a6e57ecc77b73137494', @@ -112,22 +115,9 @@ class GediDigitalIE(InfoExtractor): urls[i] = urljoin(base_url(e), url_basename(e)) return urls - @staticmethod - def _extract_urls(webpage): - entries = [ - mobj.group('eurl') - for mobj in re.finditer(r'''(?x) - (?: - data-frame-src=| - %s)\1''' % GediDigitalIE._VALID_URL, webpage)] - return GediDigitalIE._sanitize_urls(entries) - - @staticmethod - def _extract_url(webpage): - urls = GediDigitalIE._extract_urls(webpage) - return urls[0] if urls else None + @classmethod + def _extract_embed_urls(cls, url, webpage): + return cls._sanitize_urls(tuple(super()._extract_embed_urls(url, webpage))) @staticmethod def _clean_formats(formats): @@ -142,8 +132,7 @@ class GediDigitalIE(InfoExtractor): formats[:] = clean_formats def _real_extract(self, url): - video_id = self._match_id(url) - url = self._match_valid_url(url).group('url') + video_id, url = self._match_valid_url(url).group('id', 'base_url') webpage = self._download_webpage(url, video_id) title = self._html_search_meta( ['twitter:title', 'og:title'], webpage, fatal=True) @@ -197,7 +186,6 @@ class GediDigitalIE(InfoExtractor): duration = int_or_none(v) self._clean_formats(formats) - self._sort_formats(formats) return { 'id': video_id, -- cgit v1.2.3