diff options
author | Felix S <felix.von.s@posteo.de> | 2021-04-18 12:47:42 +0200 |
---|---|---|
committer | Felix S <felix.von.s@posteo.de> | 2021-04-28 17:20:20 +0530 |
commit | fd76a14259ed4f9685e0cddae5a111ac8b4aa300 (patch) | |
tree | fdcab89673452bbafd10f8e903ff0c10fa6619b4 /yt_dlp/extractor/common.py | |
parent | 171e59edd465f6c295bf8264f7024e243ff464d6 (diff) | |
download | hypervideo-pre-fd76a14259ed4f9685e0cddae5a111ac8b4aa300.tar.lz hypervideo-pre-fd76a14259ed4f9685e0cddae5a111ac8b4aa300.tar.xz hypervideo-pre-fd76a14259ed4f9685e0cddae5a111ac8b4aa300.zip |
[extractor/common, downloader/ism] Extract SSTR subtitle tracks
_parse_ism_formats was extended into _parse_ism_formats_and_subtitles;
all direct users were updated, though _extract_ism_formats was left
as a compatibility wrapper.
The SSTR downloader was also modified in order to prepare for muxing
subtitle streams, although no support for any subtitle codecs was
added in this commit.
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r-- | yt_dlp/extractor/common.py | 105 |
1 files changed, 69 insertions, 36 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0c56a9015..045d463d6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2750,26 +2750,38 @@ class InfoExtractor(object): else: # Assuming direct URL to unfragmented media. f['url'] = base_url - formats.append(f) + if content_type in ('video', 'audio'): + formats.append(f) + elif content_type == 'text': + subtitles.setdefault(lang or 'und', []).append(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats, subtitles - def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + def _extract_ism_formats(self, *args, **kwargs): + fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) + if subs: + self.report_warning(bug_reports_message( + "Ignoring subtitle tracks found in the ISM manifest; " + "if any subtitle tracks are missing," + )) + return fmts + + def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', fatal=fatal, data=data, headers=headers, query=query) if res is False: - return [] + return [], {} ism_doc, urlh = res if ism_doc is None: - return [] + return [], {} - return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) + return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id) - def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): + def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): """ Parse formats from ISM manifest. References: @@ -2777,22 +2789,24 @@ class InfoExtractor(object): https://msdn.microsoft.com/en-us/library/ff469518.aspx """ if ism_doc.get('IsLive') == 'TRUE': - return [] + return [], {} if (not self._downloader.params.get('allow_unplayable_formats') and ism_doc.find('Protection') is not None): - return [] + return [], {} duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 formats = [] + subtitles = {} for stream in ism_doc.findall('StreamIndex'): stream_type = stream.get('Type') - if stream_type not in ('video', 'audio'): + if stream_type not in ('video', 'audio', 'text'): continue url_pattern = stream.attrib['Url'] stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_name = stream.get('Name') + stream_language = stream.get('Language', 'und') for track in stream.findall('QualityLevel'): fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None) # TODO: add support for WVC1 and WMAP @@ -2839,33 +2853,52 @@ class InfoExtractor(object): format_id.append(stream_name) format_id.append(compat_str(tbr)) - formats.append({ - 'format_id': '-'.join(format_id), - 'url': ism_url, - 'manifest_url': ism_url, - 'ext': 'ismv' if stream_type == 'video' else 'isma', - 'width': width, - 'height': height, - 'tbr': tbr, - 'asr': sampling_rate, - 'vcodec': 'none' if stream_type == 'audio' else fourcc, - 'acodec': 'none' if stream_type == 'video' else fourcc, - 'protocol': 'ism', - 'fragments': fragments, - '_download_params': { - 'duration': duration, - 'timescale': stream_timescale, - 'width': width or 0, - 'height': height or 0, - 'fourcc': fourcc, - 'codec_private_data': track.get('CodecPrivateData'), - 'sampling_rate': sampling_rate, - 'channels': int_or_none(track.get('Channels', 2)), - 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), - 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), - }, - }) - return formats + if stream_type == 'text': + subtitles.setdefault(stream_language, []).append({ + 'ext': 'ismt', + 'protocol': 'ism', + 'url': ism_url, + 'manifest_url': ism_url, + 'fragments': fragments, + '_download_params': { + 'stream_type': stream_type, + 'duration': duration, + 'timescale': stream_timescale, + 'fourcc': fourcc, + 'language': stream_language, + 'codec_private_data': track.get('CodecPrivateData'), + } + }) + elif stream_type in ('video', 'audio'): + formats.append({ + 'format_id': '-'.join(format_id), + 'url': ism_url, + 'manifest_url': ism_url, + 'ext': 'ismv' if stream_type == 'video' else 'isma', + 'width': width, + 'height': height, + 'tbr': tbr, + 'asr': sampling_rate, + 'vcodec': 'none' if stream_type == 'audio' else fourcc, + 'acodec': 'none' if stream_type == 'video' else fourcc, + 'protocol': 'ism', + 'fragments': fragments, + '_download_params': { + 'stream_type': stream_type, + 'duration': duration, + 'timescale': stream_timescale, + 'width': width or 0, + 'height': height or 0, + 'fourcc': fourcc, + 'language': stream_language, + 'codec_private_data': track.get('CodecPrivateData'), + 'sampling_rate': sampling_rate, + 'channels': int_or_none(track.get('Channels', 2)), + 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), + 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), + }, + }) + return formats, subtitles def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None): def absolute_url(item_url): |