From a504ced097e703a9bc6c18b6e31bcafb4783ed80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 15 Feb 2015 18:03:41 +0100 Subject: Improve subtitles support For each language the extractor builds a list with the available formats sorted (like for video formats), then YoutubeDL selects one of them using the '--sub-format' option which now allows giving the format preferences (for example 'ass/srt/best'). For each format the 'url' field can be set so that we only download the contents if needed, or if the contents needs to be processed (like in crunchyroll) the 'data' field can be used. The reasons for this change are: * We weren't checking that the format given with '--sub-format' was available, checking it in each extractor would be repetitive. * It allows to easily support giving a format preference. * The subtitles were automatically downloaded in the extractor, but I think that if you use for example the '--dump-json' option you want to finish as fast as possible. Currently only the ted extractor has been updated, but the old system still works. --- youtube_dl/extractor/common.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c784eedb9..161c623eb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -151,8 +151,14 @@ class InfoExtractor(object): If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. location: Physical location where the video was filmed. - subtitles: The subtitle file contents as a dictionary in the format - {language: subtitles}. + subtitles: The available subtitles as a dictionary in the format + {language: subformats}. "subformats" is a list sorted from + lower to higher preference, each element is a dictionary + with the "ext" entry and one of: + * "data": The subtitles file contents + * "url": A url pointing to the subtitles file + Note: YoutubeDL.extract_info will get the requested + format and replace the "subformats" list with it. duration: Length of the video in seconds, as an integer. view_count: How many users have watched the video on the platform. like_count: Number of positive ratings of the video @@ -993,6 +999,16 @@ class InfoExtractor(object): any_restricted = any_restricted or is_restricted return not any_restricted + def extract_subtitles(self, *args, **kwargs): + subtitles = {} + list_subtitles = self._downloader.params.get('listsubtitles') + if self._downloader.params.get('writesubtitles', False) or list_subtitles: + subtitles.update(self._get_subtitles(*args, **kwargs)) + return subtitles + + def _get_subtitles(self, *args, **kwargs): + raise NotImplementedError("This method must be implemented by subclasses") + class SearchInfoExtractor(InfoExtractor): """ -- cgit v1.2.3 From c84dd8a90dcc75547b343449b921b644a2119c4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 16 Feb 2015 21:12:31 +0100 Subject: [YoutubeDL] store the subtitles to download in the 'requested_subtitles' field We need to keep the orginal subtitles information, so that the '--load-info' option can be used to list or select the subtitles again. We'll also be able to have a separate field for storing the automatic captions info. --- youtube_dl/extractor/common.py | 2 -- 1 file changed, 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 161c623eb..d149e0f92 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -157,8 +157,6 @@ class InfoExtractor(object): with the "ext" entry and one of: * "data": The subtitles file contents * "url": A url pointing to the subtitles file - Note: YoutubeDL.extract_info will get the requested - format and replace the "subformats" list with it. duration: Length of the video in seconds, as an integer. view_count: How many users have watched the video on the platform. like_count: Number of positive ratings of the video -- cgit v1.2.3 From 360e1ca5ccabcb5d48228d9472b09f1bce68bbc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Mon, 16 Feb 2015 21:44:17 +0100 Subject: [youtube] Convert to new subtitles system The automatic captions are stored in the 'automactic_captions' field, which is used if no normal subtitles are found for an specific language. --- youtube_dl/extractor/common.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d149e0f92..fe7d8dbc9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -157,6 +157,8 @@ class InfoExtractor(object): with the "ext" entry and one of: * "data": The subtitles file contents * "url": A url pointing to the subtitles file + automatic_captions: Like 'subtitles', used by the YoutubeIE for + automatically generated captions duration: Length of the video in seconds, as an integer. view_count: How many users have watched the video on the platform. like_count: Number of positive ratings of the video @@ -1007,6 +1009,16 @@ class InfoExtractor(object): def _get_subtitles(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") + def extract_automatic_captions(self, *args, **kwargs): + automatic_captions = {} + list_subtitles = self._downloader.params.get('listsubtitles') + if self._downloader.params.get('writeautomaticsub', False) or list_subtitles: + automatic_captions.update(self._get_automatic_captions(*args, **kwargs)) + return automatic_captions + + def _get_automatic_captions(self, *args, **kwargs): + raise NotImplementedError("This method must be implemented by subclasses") + class SearchInfoExtractor(InfoExtractor): """ -- cgit v1.2.3 From 9868ea493626a3a81d30d084fd00d22982a0f86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Tue, 17 Feb 2015 22:16:29 +0100 Subject: [extractor/common] Simplify subtitles handling methods Initially I was going to use a single method for handling both subtitles and automatic captions, that's why I used the 'list_subtitles' and the 'subtitles' variables. --- youtube_dl/extractor/common.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index fe7d8dbc9..7d8ce1808 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1000,21 +1000,19 @@ class InfoExtractor(object): return not any_restricted def extract_subtitles(self, *args, **kwargs): - subtitles = {} - list_subtitles = self._downloader.params.get('listsubtitles') - if self._downloader.params.get('writesubtitles', False) or list_subtitles: - subtitles.update(self._get_subtitles(*args, **kwargs)) - return subtitles + if (self._downloader.params.get('writesubtitles', False) or + self._downloader.params.get('listsubtitles')): + return self._get_subtitles(*args, **kwargs) + return {} def _get_subtitles(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") def extract_automatic_captions(self, *args, **kwargs): - automatic_captions = {} - list_subtitles = self._downloader.params.get('listsubtitles') - if self._downloader.params.get('writeautomaticsub', False) or list_subtitles: - automatic_captions.update(self._get_automatic_captions(*args, **kwargs)) - return automatic_captions + if (self._downloader.params.get('writeautomaticsub', False) or + self._downloader.params.get('listsubtitles')): + return self._get_automatic_captions(*args, **kwargs) + return {} def _get_automatic_captions(self, *args, **kwargs): raise NotImplementedError("This method must be implemented by subclasses") -- cgit v1.2.3