diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-01-18 22:05:11 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-01-18 22:17:41 +0700 | 
| commit | 538b17a09c6546d58babc5eb4a3abc08dcff2d89 (patch) | |
| tree | a23f08944fecaf4fa82cb65aaa6e3936987e78f9 | |
| parent | 4e44598547b02d42aa628506245c40c3d633814e (diff) | |
| download | hypervideo-pre-538b17a09c6546d58babc5eb4a3abc08dcff2d89.tar.lz hypervideo-pre-538b17a09c6546d58babc5eb4a3abc08dcff2d89.tar.xz hypervideo-pre-538b17a09c6546d58babc5eb4a3abc08dcff2d89.zip | |
[20min] Improve
| -rw-r--r-- | youtube_dl/extractor/twentymin.py | 122 | 
1 files changed, 47 insertions, 75 deletions
| diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dl/extractor/twentymin.py index 68d5a0cb5..4fd1aa4bf 100644 --- a/youtube_dl/extractor/twentymin.py +++ b/youtube_dl/extractor/twentymin.py @@ -4,116 +4,88 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( +    int_or_none, +    try_get, +)  class TwentyMinutenIE(InfoExtractor):      IE_NAME = '20min' -    _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))' +    _VALID_URL = r'''(?x) +                    https?:// +                        (?:www\.)?20min\.ch/ +                        (?: +                            videotv/*\?.*?\bvid=| +                            videoplayer/videoplayer\.html\?.*?\bvideoId@ +                        ) +                        (?P<id>\d+) +                    '''      _TESTS = [{ -        # regular video          'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',          'md5': 'e7264320db31eed8c38364150c12496e',          'info_dict': {              'id': '469148',              'ext': 'mp4',              'title': '85 000 Franken für 15 perfekte Minuten', -            'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)', -            'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg' -        } -    }, { -        # news article with video -        'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469', -        'md5': 'cd4cbb99b94130cff423e967cd275e5e', -        'info_dict': { -            'id': '469408', -            'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469', -            'ext': 'flv', -            'title': '«Wir müssen mutig nach vorne schauen»', -            'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.', -            'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg' +            'thumbnail': r're:https?://.*\.jpg$',          }, -        'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',      }, { -        # news article with video -        'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552', -        'md5': '372917ba85ed969e176d287ae54b2f94', +        'url': 'http://www.20min.ch/videoplayer/videoplayer.html?params=client@twentyDE|videoId@523629',          'info_dict': {              'id': '523629', -            'display_id': 'So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',              'ext': 'mp4',              'title': 'So kommen Sie bei Eis und Schnee sicher an', -            'description': 'Schneegestöber und Glatteis führten in den letzten Tagen zu zahlreichen Strassenunfällen. Ein Experte erklärt, worauf man nun beim Autofahren achten muss.', -            'thumbnail': 'http://www.20min.ch/images/content/2/7/0/27032552/83/teaserbreit.jpg', -        } -    }, { -        # YouTube embed -        'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184', -        'md5': 'e7e237fd98da2a3cc1422ce683df234d', -        'info_dict': { -            'id': 'ivM7A7SpDOs', -            'ext': 'mp4', -            'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016', -            'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a', -            'upload_date': '20160424', -            'uploader': 'CMM Castilla-La Mancha Media', -            'uploader_id': 'RTVCM', +            'description': 'md5:117c212f64b25e3d95747e5276863f7d', +            'thumbnail': r're:https?://.*\.jpg$', +        }, +        'params': { +            'skip_download': True,          }, -        'add_ie': ['Youtube'],      }, {          'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',          'only_matching': True, -    }, { -        'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411', -        'only_matching': True,      }] +    @staticmethod +    def _extract_urls(webpage): +        return [m.group('url') for m in re.finditer( +            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:www\.)?20min\.ch/videoplayer/videoplayer.html\?.*?\bvideoId@\d+.*?)\1', +            webpage)] +      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') -        display_id = mobj.group('display_id') or video_id +        video_id = self._match_id(url) -        webpage = self._download_webpage(url, display_id) +        video = self._download_json( +            'http://api.20min.ch/video/%s/show' % video_id, +            video_id)['content'] -        youtube_url = self._html_search_regex( -            r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"', -            webpage, 'YouTube embed URL', default=None) -        if youtube_url is not None: -            return self.url_result(youtube_url, 'Youtube') +        title = video['title'] -        title = self._html_search_regex( -            r'<h1>.*?<span>(.+?)</span></h1>', -            webpage, 'title', default=None) -        if not title: -            title = remove_end(re.sub( -                r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News') +        formats = [{ +            'format_id': format_id, +            'url': 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, p), +            'quality': quality, +        } for quality, (format_id, p) in enumerate([('sd', ''), ('hd', 'h')])] +        self._sort_formats(formats) -        if not video_id: -            params = self._html_search_regex( -                r'<iframe[^>]+src="(?:https?:)?//www\.20min\.ch/videoplayer/videoplayer\.html\?params=(.+?[^"])"', -                webpage, '20min embed URL') -            video_id = self._search_regex( -                r'.*videoId@(\d+)', -                params, 'Video Id') +        description = video.get('lead') +        thumbnail = video.get('thumbnail') -        description = self._html_search_meta( -            'description', webpage, 'description') -        thumbnail = self._og_search_thumbnail(webpage) +        def extract_count(kind): +            return try_get( +                video, +                lambda x: int_or_none(x['communityobject']['thumbs_%s' % kind])) -        formats = [] -        format_preferences = [('sd', ''), ('hd', 'h')] -        for format_id, url_extension in format_preferences: -            format_url = 'http://podcast.20min-tv.ch/podcast/20min/%s%s.mp4' % (video_id, url_extension) -            formats.append({ -                'format_id': format_id, -                'url': format_url, -            }) +        like_count = extract_count('up') +        dislike_count = extract_count('down')          return {              'id': video_id, -            'display_id': display_id,              'title': title,              'description': description,              'thumbnail': thumbnail, +            'like_count': like_count, +            'dislike_count': dislike_count,              'formats': formats,          } | 
