diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-10-26 16:41:24 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-10-26 16:41:24 +0100 |
commit | f78546272cf7c4b10c8003870728ab69bec982fc (patch) | |
tree | 35449b6fd8c220411a30403396ef705d149650a8 /youtube_dl/compat.py | |
parent | 387db16a789fea25795433538d80513c18d0f699 (diff) |
[compat] compat_etree_fromstring: also decode the text attribute
Deletes parse_xml from utils, because it also does it.
Diffstat (limited to 'youtube_dl/compat.py')
-rw-r--r-- | youtube_dl/compat.py | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index f39d4e9a9..2d43ec852 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -216,9 +216,19 @@ except ImportError: # Python 2.6 if sys.version_info[0] >= 3: compat_etree_fromstring = xml.etree.ElementTree.fromstring else: - # on python 2.x the the attributes of a node aren't always unicode objects + # on python 2.x the attributes and text of a node aren't always unicode + # objects etree = xml.etree.ElementTree + try: + _etree_iter = etree.Element.iter + except AttributeError: # Python <=2.6 + def _etree_iter(root): + for el in root.findall('*'): + yield el + for sub in _etree_iter(el): + yield sub + # on 2.6 XML doesn't have a parser argument, function copied from CPython # 2.7 source def _XML(text, parser=None): @@ -235,7 +245,11 @@ else: return el def compat_etree_fromstring(text): - return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) + for el in _etree_iter(doc): + if el.text is not None and isinstance(el.text, bytes): + el.text = el.text.decode('utf-8') + return doc try: from urllib.parse import parse_qs as compat_parse_qs |