diff options
| -rw-r--r-- | test/test_utils.py | 15 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 9 | 
2 files changed, 21 insertions, 3 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index b40107037..e13e11b59 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -621,6 +621,21 @@ Line  '''          self.assertEqual(dfxp2srt(dfxp_data), srt_data) +        dfxp_data_no_default_namespace = '''<?xml version="1.0" encoding="UTF-8"?> +            <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> +            <body> +                <div xml:lang="en"> +                    <p begin="0" end="1">The first line</p> +                </div> +            </body> +            </tt>''' +        srt_data = '''1 +00:00:00,000 --> 00:00:01,000 +The first line + +''' +        self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ed9ed9ed6..507f07383 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1848,9 +1848,9 @@ def dfxp2srt(dfxp_data):          out = str_or_empty(node.text)          for child in node: -            if child.tag == _x('ttml:br'): +            if child.tag in (_x('ttml:br'), 'br'):                  out += '\n' + str_or_empty(child.tail) -            elif child.tag == _x('ttml:span'): +            elif child.tag in (_x('ttml:span'), 'span'):                  out += str_or_empty(parse_node(child))              else:                  out += str_or_empty(xml.etree.ElementTree.tostring(child)) @@ -1859,7 +1859,10 @@ def dfxp2srt(dfxp_data):      dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))      out = [] -    paras = dfxp.findall(_x('.//ttml:p')) +    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p') + +    if not paras: +        raise ValueError('Invalid dfxp/TTML subtitle')      for para, index in zip(paras, itertools.count(1)):          begin_time = parse_dfxp_time_expr(para.attrib['begin'])  | 
