aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2015-06-21 19:16:59 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2015-06-21 19:24:39 +0800
commit4e3357717312ac56145ba166a1ae2806f6db8337 (patch)
tree1e2f293754e36bcd4aa0773b8738dde27068605d /youtube_dl
parent607841af64d308eaf577e528fd7317a8b382b8e6 (diff)
[utils] Support ttaf1 namespace in TTML
It's found in bbc.co.uk. See #6038
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/utils.py11
1 files changed, 7 insertions, 4 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 259a9d634..a2746b2d1 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1841,7 +1841,10 @@ def srt_subtitles_timecode(seconds):
def dfxp2srt(dfxp_data):
- _x = functools.partial(xpath_with_ns, ns_map={'ttml': 'http://www.w3.org/ns/ttml'})
+ _x = functools.partial(xpath_with_ns, ns_map={
+ 'ttml': 'http://www.w3.org/ns/ttml',
+ 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
+ })
def parse_node(node):
str_or_empty = functools.partial(str_or_none, default='')
@@ -1849,9 +1852,9 @@ def dfxp2srt(dfxp_data):
out = str_or_empty(node.text)
for child in node:
- if child.tag in (_x('ttml:br'), 'br'):
+ if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
out += '\n' + str_or_empty(child.tail)
- elif child.tag in (_x('ttml:span'), 'span'):
+ elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
out += str_or_empty(parse_node(child))
else:
out += str_or_empty(xml.etree.ElementTree.tostring(child))
@@ -1860,7 +1863,7 @@ def dfxp2srt(dfxp_data):
dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
out = []
- paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
if not paras:
raise ValueError('Invalid dfxp/TTML subtitle')