diff options
-rw-r--r-- | test/test_compat.py | 6 | ||||
-rw-r--r-- | youtube_dl/compat.py | 17 |
2 files changed, 18 insertions, 5 deletions
diff --git a/test/test_compat.py b/test/test_compat.py index 539b30540..f5317ac3e 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase): self.assertTrue(isinstance(doc.find('chinese').text, compat_str)) self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) + def test_compat_etree_fromstring_doctype(self): + xml = '''<?xml version="1.0"?> +<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> +<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' + compat_etree_fromstring(xml) + def test_struct_unpack(self): self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1392361a1..06e5f3ff6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -245,13 +245,20 @@ try: except ImportError: # Python 2.6 from xml.parsers.expat import ExpatError as compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + if sys.version_info[0] >= 3: - compat_etree_fromstring = xml.etree.ElementTree.fromstring + def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) else: # python 2.x tries to encode unicode strings with ascii (see the # XMLParser._fixtext method) - etree = xml.etree.ElementTree - try: _etree_iter = etree.Element.iter except AttributeError: # Python <=2.6 @@ -265,7 +272,7 @@ else: # 2.7 source def _XML(text, parser=None): if not parser: - parser = etree.XMLParser(target=etree.TreeBuilder()) + parser = etree.XMLParser(target=_TreeBuilder()) parser.feed(text) return parser.close() @@ -277,7 +284,7 @@ else: return el def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) for el in _etree_iter(doc): if el.text is not None and isinstance(el.text, bytes): el.text = el.text.decode('utf-8') |