diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-05-23 01:34:08 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-05-23 01:40:11 +0800 |
commit | eb7941e3e6e92bac26f5d21525fc8ac89c934abe (patch) | |
tree | 8eae588751968cb8293de55b1957d4cecbdcc8dc /youtube_dl/compat.py | |
parent | db3b8b2103099a8859402f2167d7ad1a8fa66829 (diff) |
[compat] Fix for XML with <!DOCTYPE> in Python 2.7 and 3.2
Such XML documents cause DeprecationWarning if python is run
with `-W error`
Diffstat (limited to 'youtube_dl/compat.py')
-rw-r--r-- | youtube_dl/compat.py | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 1392361a1..06e5f3ff6 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -245,13 +245,20 @@ try: except ImportError: # Python 2.6 from xml.parsers.expat import ExpatError as compat_xml_parse_error + +etree = xml.etree.ElementTree + + +class _TreeBuilder(etree.TreeBuilder): + def doctype(self, name, pubid, system): + pass + if sys.version_info[0] >= 3: - compat_etree_fromstring = xml.etree.ElementTree.fromstring + def compat_etree_fromstring(text): + return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) else: # python 2.x tries to encode unicode strings with ascii (see the # XMLParser._fixtext method) - etree = xml.etree.ElementTree - try: _etree_iter = etree.Element.iter except AttributeError: # Python <=2.6 @@ -265,7 +272,7 @@ else: # 2.7 source def _XML(text, parser=None): if not parser: - parser = etree.XMLParser(target=etree.TreeBuilder()) + parser = etree.XMLParser(target=_TreeBuilder()) parser.feed(text) return parser.close() @@ -277,7 +284,7 @@ else: return el def compat_etree_fromstring(text): - doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory))) for el in _etree_iter(doc): if el.text is not None and isinstance(el.text, bytes): el.text = el.text.decode('utf-8') |