aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_compat.py6
-rw-r--r--youtube_dl/compat.py17
2 files changed, 18 insertions, 5 deletions
diff --git a/test/test_compat.py b/test/test_compat.py
index 539b30540..f5317ac3e 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -103,6 +103,12 @@ class TestCompat(unittest.TestCase):
self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
+ def test_compat_etree_fromstring_doctype(self):
+ xml = '''<?xml version="1.0"?>
+<!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd">
+<smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>'''
+ compat_etree_fromstring(xml)
+
def test_struct_unpack(self):
self.assertEqual(compat_struct_unpack('!B', b'\x00'), (0,))
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 1392361a1..06e5f3ff6 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -245,13 +245,20 @@ try:
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass
+
if sys.version_info[0] >= 3:
- compat_etree_fromstring = xml.etree.ElementTree.fromstring
+ def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
else:
# python 2.x tries to encode unicode strings with ascii (see the
# XMLParser._fixtext method)
- etree = xml.etree.ElementTree
-
try:
_etree_iter = etree.Element.iter
except AttributeError: # Python <=2.6
@@ -265,7 +272,7 @@ else:
# 2.7 source
def _XML(text, parser=None):
if not parser:
- parser = etree.XMLParser(target=etree.TreeBuilder())
+ parser = etree.XMLParser(target=_TreeBuilder())
parser.feed(text)
return parser.close()
@@ -277,7 +284,7 @@ else:
return el
def compat_etree_fromstring(text):
- doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+ doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
for el in _etree_iter(doc):
if el.text is not None and isinstance(el.text, bytes):
el.text = el.text.decode('utf-8')