Subtitle extraction from streaming media manifests #247

Authored by fstirlitz Modified from: https://github.com/ytdl-org/youtube-dl/pull/6144 Closes: #73 Fixes: https://github.com/ytdl-org/youtube-dl/issues/6106 https://github.com/ytdl-org/youtube-dl/issues/14977 https://github.com/ytdl-org/youtube-dl/issues/21438 https://github.com/ytdl-org/youtube-dl/issues/23609 https://github.com/ytdl-org/youtube-dl/issues/28132 Might also fix (untested): https://github.com/ytdl-org/youtube-dl/issues/15424 https://github.com/ytdl-org/youtube-dl/issues/18267 https://github.com/ytdl-org/youtube-dl/issues/23899 https://github.com/ytdl-org/youtube-dl/issues/24375 https://github.com/ytdl-org/youtube-dl/issues/24595 https://github.com/ytdl-org/youtube-dl/issues/27899 Related: https://github.com/ytdl-org/youtube-dl/issues/22379 https://github.com/ytdl-org/youtube-dl/pull/24517 https://github.com/ytdl-org/youtube-dl/pull/24886 https://github.com/ytdl-org/youtube-dl/pull/27215 Notes: * The functions `extractor.common._extract_..._formats` are still kept for compatibility * Only some extractors have currently been moved to using `_extract_..._formats_and_subtitles` * Direct subtitle manifests (without a master) are not supported and are wrongly identified as containing video formats * AES support is untested * The fragmented TTML subtitles extracted from DASH/ISM are valid, but are unsupported by `ffmpeg` and most video players * Their XML fragments can be dumped using `ffmpeg -i in.mp4 -f data -map 0 -c copy out.ttml`. Once the unnecessary headers are stripped out of this, it becomes a valid self-contained ttml file * The ttml subs downloaded from DASH manifests can also be directly opened with <https://github.com/SubtitleEdit> * Fragmented WebVTT files extracted from DASH/ISM are also unsupported by most tools * Unlike the ttml files, the XML fragments of these cannot be dumped using `ffmpeg` * The webtt subs extracted from DASH can be parsed by <https://github.com/gpac/gpac> * But validity of the those extracted from ISM are untested
author: pukkandan <pukkandan.ytdlp@gmail.com> 2021-04-28 19:02:43 +0530
committer: GitHub <noreply@github.com> 2021-04-28 19:02:43 +0530
commit: be6202f12b97858b9d716e608394b51065d0419f (patch)
tree: 71f920777c24d9d81c990f0bf57d66e9d5bbaff7 /yt_dlp/downloader/ism.py
parent: db9a564b6a5c31472f8298969584eead0b59fa1c (diff)
parent: e8f834cd8dfc07011d1080321e42bc130e7201bb (diff)
download: hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.tar.lz
hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.tar.xz
hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.zip
1 files changed, 42 insertions, 12 deletions
diff --git a/yt_dlp/downloader/ism.py b/yt_dlp/downloader/ism.py
index 1ca666b4a..07d74aef0 100644
--- a/yt_dlp/downloader/ism.py
+++ b/yt_dlp/downloader/ism.py
@@ -48,7 +48,7 @@ def write_piff_header(stream, params):
     language = params.get('language', 'und')
     height = params.get('height', 0)
     width = params.get('width', 0)
-    is_audio = width == 0 and height == 0
+    stream_type = params['stream_type']
     creation_time = modification_time = int(time.time())
 
     ftyp_payload = b'isml'  # major brand
@@ -77,7 +77,7 @@ def write_piff_header(stream, params):
     tkhd_payload += u32.pack(0) * 2  # reserved
     tkhd_payload += s16.pack(0)  # layer
     tkhd_payload += s16.pack(0)  # alternate group
-    tkhd_payload += s88.pack(1 if is_audio else 0)  # volume
+    tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0)  # volume
     tkhd_payload += u16.pack(0)  # reserved
     tkhd_payload += unity_matrix
     tkhd_payload += u1616.pack(width)
@@ -93,19 +93,34 @@ def write_piff_header(stream, params):
     mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload)  # Media Header Box
 
     hdlr_payload = u32.pack(0)  # pre defined
-    hdlr_payload += b'soun' if is_audio else b'vide'  # handler type
-    hdlr_payload += u32.pack(0) * 3  # reserved
-    hdlr_payload += (b'Sound' if is_audio else b'Video') + b'Handler\0'  # name
+    if stream_type == 'audio':  # handler type
+        hdlr_payload += b'soun'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'SoundHandler\0'  # name
+    elif stream_type == 'video':
+        hdlr_payload += b'vide'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'VideoHandler\0'  # name
+    elif stream_type == 'text':
+        hdlr_payload += b'subt'
+        hdlr_payload += u32.pack(0) * 3  # reserved
+        hdlr_payload += b'SubtitleHandler\0'  # name
+    else:
+        assert False
     mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload)  # Handler Reference Box
 
-    if is_audio:
+    if stream_type == 'audio':
         smhd_payload = s88.pack(0)  # balance
         smhd_payload += u16.pack(0)  # reserved
         media_header_box = full_box(b'smhd', 0, 0, smhd_payload)  # Sound Media Header
-    else:
+    elif stream_type == 'video':
         vmhd_payload = u16.pack(0)  # graphics mode
         vmhd_payload += u16.pack(0) * 3  # opcolor
         media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload)  # Video Media Header
+    elif stream_type == 'text':
+        media_header_box = full_box(b'sthd', 0, 0, b'')  # Subtitle Media Header
+    else:
+        assert False
     minf_payload = media_header_box
 
     dref_payload = u32.pack(1)  # entry count
@@ -117,7 +132,7 @@ def write_piff_header(stream, params):
 
     sample_entry_payload = u8.pack(0) * 6  # reserved
     sample_entry_payload += u16.pack(1)  # data reference index
-    if is_audio:
+    if stream_type == 'audio':
         sample_entry_payload += u32.pack(0) * 2  # reserved
         sample_entry_payload += u16.pack(params.get('channels', 2))
         sample_entry_payload += u16.pack(params.get('bits_per_sample', 16))
@@ -127,7 +142,7 @@ def write_piff_header(stream, params):
 
         if fourcc == 'AACL':
             sample_entry_box = box(b'mp4a', sample_entry_payload)
-    else:
+    elif stream_type == 'video':
         sample_entry_payload += u16.pack(0)  # pre defined
         sample_entry_payload += u16.pack(0)  # reserved
         sample_entry_payload += u32.pack(0) * 3  # pre defined
@@ -155,6 +170,18 @@ def write_piff_header(stream, params):
             avcc_payload += pps
             sample_entry_payload += box(b'avcC', avcc_payload)  # AVC Decoder Configuration Record
             sample_entry_box = box(b'avc1', sample_entry_payload)  # AVC Simple Entry
+        else:
+            assert False
+    elif stream_type == 'text':
+        if fourcc == 'TTML':
+            sample_entry_payload += b'http://www.w3.org/ns/ttml\0'  # namespace
+            sample_entry_payload += b'\0'  # schema location
+            sample_entry_payload += b'\0'  # auxilary mime types(??)
+            sample_entry_box = box(b'stpp', sample_entry_payload)
+        else:
+            assert False
+    else:
+        assert False
     stsd_payload += sample_entry_box
 
     stbl_payload = full_box(b'stsd', 0, 0, stsd_payload)  # Sample Description Box
@@ -221,10 +248,13 @@ class IsmFD(FragmentFD):
 
         self._prepare_and_start_frag_download(ctx)
 
+        extra_state = ctx.setdefault('extra_state', {
+            'ism_track_written': False,
+        })
+
         fragment_retries = self.params.get('fragment_retries', 0)
         skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
 
-        track_written = False
         frag_index = 0
         for i, segment in enumerate(segments):
             frag_index += 1
@@ -236,11 +266,11 @@ class IsmFD(FragmentFD):
                     success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
                     if not success:
                         return False
-                    if not track_written:
+                    if not extra_state['ism_track_written']:
                         tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd'])
                         info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0]
                         write_piff_header(ctx['dest_stream'], info_dict['_download_params'])
-                        track_written = True
+                        extra_state['ism_track_written'] = True
                     self._append_fragment(ctx, frag_content)
                     break
                 except compat_urllib_error.HTTPError as err:
author	pukkandan <pukkandan.ytdlp@gmail.com>	2021-04-28 19:02:43 +0530
committer	GitHub <noreply@github.com>	2021-04-28 19:02:43 +0530
commit	be6202f12b97858b9d716e608394b51065d0419f (patch)
tree	71f920777c24d9d81c990f0bf57d66e9d5bbaff7 /yt_dlp/downloader/ism.py
parent	db9a564b6a5c31472f8298969584eead0b59fa1c (diff)
parent	e8f834cd8dfc07011d1080321e42bc130e7201bb (diff)
download	hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.tar.lz hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.tar.xz hypervideo-pre-be6202f12b97858b9d716e608394b51065d0419f.zip