diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/common.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/prosiebensat1.py | 58 | 
2 files changed, 55 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index cb6081dd0..2faaf6226 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -439,7 +439,7 @@ class InfoExtractor(object):          return video_info      @staticmethod -    def playlist_result(entries, playlist_id=None, playlist_title=None): +    def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):          """Returns a playlist"""          video_info = {'_type': 'playlist',                        'entries': entries} @@ -447,6 +447,8 @@ class InfoExtractor(object):              video_info['id'] = playlist_id          if playlist_title:              video_info['title'] = playlist_title +        if playlist_description: +            video_info['description'] = playlist_description          return video_info      def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 32d747ede..1262793c8 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -85,7 +85,7 @@ class ProSiebenSat1IE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Im Interview: Kai Wiesinger',                  'description': 'md5:e4e5370652ec63b95023e914190b4eb9', -                'upload_date': '20140225', +                'upload_date': '20140203',                  'duration': 522.56,              },              'params': { @@ -100,7 +100,7 @@ class ProSiebenSat1IE(InfoExtractor):                  'ext': 'mp4',                  'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',                  'description': 'md5:2669cde3febe9bce13904f701e774eb6', -                'upload_date': '20140225', +                'upload_date': '20141014',                  'duration': 2410.44,              },              'params': { @@ -152,12 +152,22 @@ class ProSiebenSat1IE(InfoExtractor):                  'skip_download': True,              },          }, +        { +            'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', +            'info_dict': { +                'id': '439664', +                'title': 'Episode 8 - Ganze Folge - Playlist', +                'description': 'md5:63b8963e71f481782aeea877658dec84', +            }, +            'playlist_count': 2, +        },      ]      _CLIPID_REGEXES = [          r'"clip_id"\s*:\s+"(\d+)"',          r'clipid: "(\d+)"',          r'clip[iI]d=(\d+)', +        r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",      ]      _TITLE_REGEXES = [          r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', @@ -178,11 +188,19 @@ class ProSiebenSat1IE(InfoExtractor):          r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',          r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',      ] +    _PAGE_TYPE_REGEXES = [ +        r'<meta name="page_type" content="([^"]+)">', +        r"'itemType'\s*:\s*'([^']*)'", +    ] +    _PLAYLIST_ID_REGEXES = [ +        r'content[iI]d=(\d+)', +        r"'itemId'\s*:\s*'([^']*)'", +    ] +    _PLAYLIST_CLIP_REGEXES = [ +        r'(?s)data-qvt=.+?<a href="([^"]+)"', +    ] -    def _real_extract(self, url): -        video_id = self._match_id(url) -        webpage = self._download_webpage(url, video_id) - +    def _extract_clip(self, url, webpage):          clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id')          access_token = 'testclient' @@ -281,3 +299,31 @@ class ProSiebenSat1IE(InfoExtractor):              'duration': duration,              'formats': formats,          } + +    def _extract_playlist(self, url, webpage): +        playlist_id = self._html_search_regex( +            self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') +        for regex in self._PLAYLIST_CLIP_REGEXES: +            playlist_clips = re.findall(regex, webpage) +            if playlist_clips: +                title = self._html_search_regex( +                    self._TITLE_REGEXES, webpage, 'title') +                description = self._html_search_regex( +                    self._DESCRIPTION_REGEXES, webpage, 'description', fatal=False) +                entries = [ +                    self.url_result( +                        re.match('(.+?//.+?)/', url).group(1) + clip_path, +                        'ProSiebenSat1') +                    for clip_path in playlist_clips] +                return self.playlist_result(entries, playlist_id, title, description) + +    def _real_extract(self, url): +        video_id = self._match_id(url) +        webpage = self._download_webpage(url, video_id) +        page_type = self._search_regex( +            self._PAGE_TYPE_REGEXES, webpage, +            'page type', default='clip').lower() +        if page_type == 'clip': +            return self._extract_clip(url, webpage) +        elif page_type == 'playlist': +            return self._extract_playlist(url, webpage) | 
