diff options
Diffstat (limited to 'hypervideo_dl/extractor/yahoo.py')
-rw-r--r-- | hypervideo_dl/extractor/yahoo.py | 46 |
1 files changed, 30 insertions, 16 deletions
diff --git a/hypervideo_dl/extractor/yahoo.py b/hypervideo_dl/extractor/yahoo.py index 53556de..20504de 100644 --- a/hypervideo_dl/extractor/yahoo.py +++ b/hypervideo_dl/extractor/yahoo.py @@ -264,7 +264,7 @@ class YahooIE(InfoExtractor): return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, + 'title': title, 'formats': formats, 'thumbnails': thumbnails, 'description': clean_html(video.get('description')), @@ -414,11 +414,14 @@ class YahooGyaOIE(InfoExtractor): IE_NAME = 'yahoo:gyao' _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ - 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/', + 'url': 'https://gyao.yahoo.co.jp/title/%E3%82%BF%E3%82%A4%E3%83%A0%E3%83%9C%E3%82%AB%E3%83%B3%E3%82%B7%E3%83%AA%E3%83%BC%E3%82%BA%20%E3%83%A4%E3%83%83%E3%82%BF%E3%83%BC%E3%83%9E%E3%83%B3/5f60ceb3-6e5e-40ef-ba40-d68b598d067f', 'info_dict': { - 'id': '00449:v03102', + 'id': '5f60ceb3-6e5e-40ef-ba40-d68b598d067f', }, - 'playlist_count': 2, + 'playlist_mincount': 80, + }, { + 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/', + 'only_matching': True, }, { 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/', 'only_matching': True, @@ -430,19 +433,30 @@ class YahooGyaOIE(InfoExtractor): 'only_matching': True, }] + def _entries(self, program_id): + page = 1 + while True: + playlist = self._download_json( + f'https://gyao.yahoo.co.jp/api/programs/{program_id}/videos?page={page}', program_id, + note=f'Downloading JSON metadata page {page}') + if not playlist: + break + for video in playlist['videos']: + video_id = video.get('id') + if not video_id: + continue + if video.get('streamingAvailability') == 'notYet': + continue + yield self.url_result( + 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'), + YahooGyaOPlayerIE.ie_key(), video_id) + if playlist.get('ended'): + break + page += 1 + def _real_extract(self, url): program_id = self._match_id(url).replace('/', ':') - videos = self._download_json( - 'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos'] - entries = [] - for video in videos: - video_id = video.get('id') - if not video_id: - continue - entries.append(self.url_result( - 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'), - YahooGyaOPlayerIE.ie_key(), video_id)) - return self.playlist_result(entries, program_id) + return self.playlist_result(self._entries(program_id), program_id) class YahooJapanNewsIE(InfoExtractor): @@ -519,7 +533,7 @@ class YahooJapanNewsIE(InfoExtractor): title = self._html_search_meta( ['og:title', 'twitter:title'], webpage, 'title', default=None - ) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title') + ) or self._html_extract_title(webpage) if display_id == host: # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...) |