diff options
| -rw-r--r-- | test/test_compat.py | 6 | ||||
| -rw-r--r-- | youtube_dl/compat.py | 17 | 
2 files changed, 18 insertions, 5 deletions
diff --git a/test/test_compat.py b/test/test_compat.py index 539b30540..f5317ac3e 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):          self.assertTrue(isinstance(doc.find('chinese').text, compat_str))          self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str)) +    def test_compat_etree_fromstring_doctype(self): +        xml = '''<?xml version="1.0"?> +<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> +<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' +        compat_etree_fromstring(xml) +      def test_struct_unpack(self):          self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,)) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1392361a1..06e5f3ff6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -245,13 +245,20 @@ try:  except ImportError:  # Python 2.6      from xml.parsers.expat import ExpatError as compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): +    def doctype(self, name, pubid, system): +        pass +  if sys.version_info[0] >= 3: -    compat_etree_fromstring = xml.etree.ElementTree.fromstring +    def compat_etree_fromstring(text): +        return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))  else:      # python 2.x tries to encode unicode strings with ascii (see the      # XMLParser._fixtext method) -    etree = xml.etree.ElementTree -      try:          _etree_iter = etree.Element.iter      except AttributeError:  # Python <=2.6 @@ -265,7 +272,7 @@ else:      # 2.7 source      def _XML(text, parser=None):          if not parser: -            parser = etree.XMLParser(target=etree.TreeBuilder()) +            parser = etree.XMLParser(target=_TreeBuilder())          parser.feed(text)          return parser.close() @@ -277,7 +284,7 @@ else:          return el      def compat_etree_fromstring(text): -        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) +        doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))          for el in _etree_iter(doc):              if el.text is not None and isinstance(el.text, bytes):                  el.text = el.text.decode('utf-8')  | 
