diff options
author | Mevious <benbryant1@gmail.com> | 2021-06-22 18:57:53 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-23 07:27:53 +0530 |
commit | 29f63c96720caa4272ad79aaedc4d436e4a7976a (patch) | |
tree | 38bcbd249bffa174dd403efedf8724798572308c /yt_dlp/extractor/funimation.py | |
parent | 9fc0de579658de0c73ea03d61a76df8b0e154e23 (diff) | |
download | hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.tar.lz hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.tar.xz hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.zip |
[funimation] Extract subtitles (#434)
Closes #420, https://github.com/ytdl-org/youtube-dl/issues/25645
Related: https://github.com/ytdl-org/youtube-dl/pull/24906
Authored by: Mevious
Diffstat (limited to 'yt_dlp/extractor/funimation.py')
-rw-r--r-- | yt_dlp/extractor/funimation.py | 22 |
1 files changed, 21 insertions, 1 deletions
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index d8f1e169a..9ec1627fa 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -10,8 +10,9 @@ from ..utils import ( determine_ext, int_or_none, js_to_json, + urlencode_postdata, + urljoin, ExtractorError, - urlencode_postdata ) @@ -109,6 +110,7 @@ class FunimationIE(InfoExtractor): if series: title = '%s - %s' % (series, title) description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True) + subtitles = self.extract_subtitles(url, video_id, display_id) try: headers = {} @@ -153,6 +155,24 @@ class FunimationIE(InfoExtractor): 'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')), 'episode_number': int_or_none(title_data.get('episodeNum')), 'episode': episode, + 'subtitles': subtitles, 'season_id': title_data.get('seriesId'), 'formats': formats, } + + def _get_subtitles(self, url, video_id, display_id): + player_url = urljoin(url, '/player/' + video_id) + player_page = self._download_webpage(player_url, display_id) + text_tracks_json_string = self._search_regex( + r'"textTracks": (\[{.+?}\])', + player_page, 'subtitles data', default='') + text_tracks = self._parse_json( + text_tracks_json_string, display_id, js_to_json, fatal=False) or [] + subtitles = {} + for text_track in text_tracks: + url_element = {'url': text_track.get('src')} + language = text_track.get('language') + if text_track.get('type') == 'CC': + language += '_CC' + subtitles.setdefault(language, []).append(url_element) + return subtitles |