diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2015-06-21 19:16:59 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2015-06-21 19:24:39 +0800 |
commit | 4e3357717312ac56145ba166a1ae2806f6db8337 (patch) | |
tree | 1e2f293754e36bcd4aa0773b8738dde27068605d | |
parent | 607841af64d308eaf577e528fd7317a8b382b8e6 (diff) |
[utils] Support ttaf1 namespace in TTML
It's found in bbc.co.uk. See #6038
-rw-r--r-- | youtube_dl/utils.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 259a9d634..a2746b2d1 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds): def dfxp2srt(dfxp_data): - _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'}) + _x = functools.partial(xpath_with_ns, ns_map={ + 'ttml': 'http://www.w3.org/ns/ttml', + 'ttaf1': 'http://www.w3.org/2006/10/ttaf1', + }) def parse_node(node): str_or_empty = functools.partial(str_or_none, default='') @@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data): out = str_or_empty(node.text) for child in node: - if child.tag in (_x('ttml:br'), 'br'): + if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'): out += '\n' + str_or_empty(child.tail) - elif child.tag in (_x('ttml:span'), 'span'): + elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'): out += str_or_empty(parse_node(child)) else: out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data): dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) out = [] - paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') if not paras: raise ValueError('Invalid dfxp/TTML subtitle') |