diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-06-21 19:16:59 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-06-21 19:24:39 +0800 | 
| commit | 4e3357717312ac56145ba166a1ae2806f6db8337 (patch) | |
| tree | 1e2f293754e36bcd4aa0773b8738dde27068605d | |
| parent | 607841af64d308eaf577e528fd7317a8b382b8e6 (diff) | |
| download | hypervideo-pre-4e3357717312ac56145ba166a1ae2806f6db8337.tar.lz hypervideo-pre-4e3357717312ac56145ba166a1ae2806f6db8337.tar.xz hypervideo-pre-4e3357717312ac56145ba166a1ae2806f6db8337.zip | |
[utils] Support ttaf1 namespace in TTML
It's found in bbc.co.uk. See #6038
| -rw-r--r-- | youtube_dl/utils.py | 11 | 
1 files changed, 7 insertions, 4 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 259a9d634..a2746b2d1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):  def dfxp2srt(dfxp_data): -    _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) +    _x = functools.partial(xpath_with_ns, ns_map={ +        'ttml': 'http://www.w3.org/ns/ttml', +        'ttaf1': 'http://www.w3.org/2006/10/ttaf1', +    })      def parse_node(node):          str_or_empty = functools.partial(str_or_none, default='') @@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data):          out = str_or_empty(node.text)          for child in node: -            if child.tag in (_x('ttml:br'), 'br'): +            if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):                  out += '\n' + str_or_empty(child.tail) -            elif child.tag in (_x('ttml:span'), 'span'): +            elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):                  out += str_or_empty(parse_node(child))              else:                  out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data):      dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))      out = [] -    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') +    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')      if not paras:          raise ValueError('Invalid dfxp/TTML subtitle') | 
