[funimation] Extract subtitles (#434)

Closes #420, https://github.com/ytdl-org/youtube-dl/issues/25645 Related: https://github.com/ytdl-org/youtube-dl/pull/24906 Authored by: Mevious
author: Mevious <benbryant1@gmail.com> 2021-06-22 18:57:53 -0700
committer: GitHub <noreply@github.com> 2021-06-23 07:27:53 +0530
commit: 29f63c96720caa4272ad79aaedc4d436e4a7976a (patch)
tree: 38bcbd249bffa174dd403efedf8724798572308c /yt_dlp/extractor/funimation.py
parent: 9fc0de579658de0c73ea03d61a76df8b0e154e23 (diff)
download: hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.tar.lz
hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.tar.xz
hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.zip
1 files changed, 21 insertions, 1 deletions
diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py
index d8f1e169a..9ec1627fa 100644
--- a/yt_dlp/extractor/funimation.py
+++ b/yt_dlp/extractor/funimation.py
@@ -10,8 +10,9 @@ from ..utils import (
     determine_ext,
     int_or_none,
     js_to_json,
+    urlencode_postdata,
+    urljoin,
     ExtractorError,
-    urlencode_postdata
 )
 
 
@@ -109,6 +110,7 @@ class FunimationIE(InfoExtractor):
         if series:
             title = '%s - %s' % (series, title)
         description = self._html_search_meta(['description', 'og:description'], webpage, fatal=True)
+        subtitles = self.extract_subtitles(url, video_id, display_id)
 
         try:
             headers = {}
@@ -153,6 +155,24 @@ class FunimationIE(InfoExtractor):
             'season_number': int_or_none(title_data.get('seasonNum') or _search_kane('season')),
             'episode_number': int_or_none(title_data.get('episodeNum')),
             'episode': episode,
+            'subtitles': subtitles,
             'season_id': title_data.get('seriesId'),
             'formats': formats,
         }
+
+    def _get_subtitles(self, url, video_id, display_id):
+        player_url = urljoin(url, '/player/' + video_id)
+        player_page = self._download_webpage(player_url, display_id)
+        text_tracks_json_string = self._search_regex(
+            r'"textTracks": (\[{.+?}\])',
+            player_page, 'subtitles data', default='')
+        text_tracks = self._parse_json(
+            text_tracks_json_string, display_id, js_to_json, fatal=False) or []
+        subtitles = {}
+        for text_track in text_tracks:
+            url_element = {'url': text_track.get('src')}
+            language = text_track.get('language')
+            if text_track.get('type') == 'CC':
+                language += '_CC'
+            subtitles.setdefault(language, []).append(url_element)
+        return subtitles
author	Mevious <benbryant1@gmail.com>	2021-06-22 18:57:53 -0700
committer	GitHub <noreply@github.com>	2021-06-23 07:27:53 +0530
commit	29f63c96720caa4272ad79aaedc4d436e4a7976a (patch)
tree	38bcbd249bffa174dd403efedf8724798572308c /yt_dlp/extractor/funimation.py
parent	9fc0de579658de0c73ea03d61a76df8b0e154e23 (diff)
download	hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.tar.lz hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.tar.xz hypervideo-pre-29f63c96720caa4272ad79aaedc4d436e4a7976a.zip