diff options
Diffstat (limited to 'yt_dlp/extractor/common.py')
-rw-r--r-- | yt_dlp/extractor/common.py | 105 |
1 files changed, 69 insertions, 36 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0c56a9015..045d463d6 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -2750,26 +2750,38 @@ class InfoExtractor(object): else: # Assuming direct URL to unfragmented media. f['url'] = base_url - formats.append(f) + if content_type in ('video', 'audio'): + formats.append(f) + elif content_type == 'text': + subtitles.setdefault(lang or 'und', []).append(f) else: self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) return formats, subtitles - def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): + def _extract_ism_formats(self, *args, **kwargs): + fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs) + if subs: + self.report_warning(bug_reports_message( + "Ignoring subtitle tracks found in the ISM manifest; " + "if any subtitle tracks are missing," + )) + return fmts + + def _extract_ism_formats_and_subtitles(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): res = self._download_xml_handle( ism_url, video_id, note=note or 'Downloading ISM manifest', errnote=errnote or 'Failed to download ISM manifest', fatal=fatal, data=data, headers=headers, query=query) if res is False: - return [] + return [], {} ism_doc, urlh = res if ism_doc is None: - return [] + return [], {} - return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id) + return self._parse_ism_formats_and_subtitles(ism_doc, urlh.geturl(), ism_id) - def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None): + def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None): """ Parse formats from ISM manifest. References: @@ -2777,22 +2789,24 @@ class InfoExtractor(object): https://msdn.microsoft.com/en-us/library/ff469518.aspx """ if ism_doc.get('IsLive') == 'TRUE': - return [] + return [], {} if (not self._downloader.params.get('allow_unplayable_formats') and ism_doc.find('Protection') is not None): - return [] + return [], {} duration = int(ism_doc.attrib['Duration']) timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000 formats = [] + subtitles = {} for stream in ism_doc.findall('StreamIndex'): stream_type = stream.get('Type') - if stream_type not in ('video', 'audio'): + if stream_type not in ('video', 'audio', 'text'): continue url_pattern = stream.attrib['Url'] stream_timescale = int_or_none(stream.get('TimeScale')) or timescale stream_name = stream.get('Name') + stream_language = stream.get('Language', 'und') for track in stream.findall('QualityLevel'): fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None) # TODO: add support for WVC1 and WMAP @@ -2839,33 +2853,52 @@ class InfoExtractor(object): format_id.append(stream_name) format_id.append(compat_str(tbr)) - formats.append({ - 'format_id': '-'.join(format_id), - 'url': ism_url, - 'manifest_url': ism_url, - 'ext': 'ismv' if stream_type == 'video' else 'isma', - 'width': width, - 'height': height, - 'tbr': tbr, - 'asr': sampling_rate, - 'vcodec': 'none' if stream_type == 'audio' else fourcc, - 'acodec': 'none' if stream_type == 'video' else fourcc, - 'protocol': 'ism', - 'fragments': fragments, - '_download_params': { - 'duration': duration, - 'timescale': stream_timescale, - 'width': width or 0, - 'height': height or 0, - 'fourcc': fourcc, - 'codec_private_data': track.get('CodecPrivateData'), - 'sampling_rate': sampling_rate, - 'channels': int_or_none(track.get('Channels', 2)), - 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), - 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), - }, - }) - return formats + if stream_type == 'text': + subtitles.setdefault(stream_language, []).append({ + 'ext': 'ismt', + 'protocol': 'ism', + 'url': ism_url, + 'manifest_url': ism_url, + 'fragments': fragments, + '_download_params': { + 'stream_type': stream_type, + 'duration': duration, + 'timescale': stream_timescale, + 'fourcc': fourcc, + 'language': stream_language, + 'codec_private_data': track.get('CodecPrivateData'), + } + }) + elif stream_type in ('video', 'audio'): + formats.append({ + 'format_id': '-'.join(format_id), + 'url': ism_url, + 'manifest_url': ism_url, + 'ext': 'ismv' if stream_type == 'video' else 'isma', + 'width': width, + 'height': height, + 'tbr': tbr, + 'asr': sampling_rate, + 'vcodec': 'none' if stream_type == 'audio' else fourcc, + 'acodec': 'none' if stream_type == 'video' else fourcc, + 'protocol': 'ism', + 'fragments': fragments, + '_download_params': { + 'stream_type': stream_type, + 'duration': duration, + 'timescale': stream_timescale, + 'width': width or 0, + 'height': height or 0, + 'fourcc': fourcc, + 'language': stream_language, + 'codec_private_data': track.get('CodecPrivateData'), + 'sampling_rate': sampling_rate, + 'channels': int_or_none(track.get('Channels', 2)), + 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)), + 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)), + }, + }) + return formats, subtitles def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None, quality=None): def absolute_url(item_url): |