diff options
Diffstat (limited to 'hypervideo_dl/extractor/ant1newsgr.py')
-rw-r--r-- | hypervideo_dl/extractor/ant1newsgr.py | 19 |
1 files changed, 2 insertions, 17 deletions
diff --git a/hypervideo_dl/extractor/ant1newsgr.py b/hypervideo_dl/extractor/ant1newsgr.py index 1075b46..7b384b2 100644 --- a/hypervideo_dl/extractor/ant1newsgr.py +++ b/hypervideo_dl/extractor/ant1newsgr.py @@ -1,7 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re import urllib.parse from .common import InfoExtractor @@ -10,7 +6,6 @@ from ..utils import ( ExtractorError, determine_ext, scale_thumbnails_to_max_format_width, - unescapeHTML, ) @@ -24,7 +19,6 @@ class Ant1NewsGrBaseIE(InfoExtractor): raise ExtractorError('no source found for %s' % video_id) formats, subs = (self._extract_m3u8_formats_and_subtitles(source, video_id, 'mp4') if determine_ext(source) == 'm3u8' else ([{'url': source}], {})) - self._sort_formats(formats) thumbnails = scale_thumbnails_to_max_format_width( formats, [{'url': info['thumb']}], r'(?<=/imgHandler/)\d+') return { @@ -94,7 +88,7 @@ class Ant1NewsGrArticleIE(Ant1NewsGrBaseIE): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) info = self._search_json_ld(webpage, video_id, expected_type='NewsArticle') - embed_urls = list(Ant1NewsGrEmbedIE._extract_urls(webpage)) + embed_urls = list(Ant1NewsGrEmbedIE._extract_embed_urls(url, webpage)) if not embed_urls: raise ExtractorError('no videos found for %s' % video_id, expected=True) return self.playlist_from_matches( @@ -107,6 +101,7 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE): IE_DESC = 'ant1news.gr embedded videos' _BASE_PLAYER_URL_RE = r'(?:https?:)?//(?:[a-zA-Z0-9\-]+\.)?(?:antenna|ant1news)\.gr/templates/pages/player' _VALID_URL = rf'{_BASE_PLAYER_URL_RE}\?([^#]+&)?cid=(?P<id>[^#&]+)' + _EMBED_REGEX = [rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+)(?P=_q1)'] _API_PATH = '/news/templates/data/jsonPlayer' _TESTS = [{ @@ -120,16 +115,6 @@ class Ant1NewsGrEmbedIE(Ant1NewsGrBaseIE): }, }] - @classmethod - def _extract_urls(cls, webpage): - _EMBED_URL_RE = rf'{cls._BASE_PLAYER_URL_RE}\?(?:(?!(?P=_q1)).)+' - _EMBED_RE = rf'<iframe[^>]+?src=(?P<_q1>["\'])(?P<url>{_EMBED_URL_RE})(?P=_q1)' - for mobj in re.finditer(_EMBED_RE, webpage): - url = unescapeHTML(mobj.group('url')) - if not cls.suitable(url): - continue - yield url - def _real_extract(self, url): video_id = self._match_id(url) |