diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-10-26 16:41:24 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-10-26 16:41:24 +0100 | 
| commit | f78546272cf7c4b10c8003870728ab69bec982fc (patch) | |
| tree | 35449b6fd8c220411a30403396ef705d149650a8 /youtube_dl/compat.py | |
| parent | 387db16a789fea25795433538d80513c18d0f699 (diff) | |
[compat] compat_etree_fromstring: also decode the text attribute
Deletes parse_xml from utils, because it also does it.
Diffstat (limited to 'youtube_dl/compat.py')
| -rw-r--r-- | youtube_dl/compat.py | 18 | 
1 files changed, 16 insertions, 2 deletions
| diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index f39d4e9a9..2d43ec852 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -216,9 +216,19 @@ except ImportError:  # Python 2.6  if sys.version_info[0] >= 3:      compat_etree_fromstring = xml.etree.ElementTree.fromstring  else: -    # on python 2.x the the attributes of a node aren't always unicode objects +    # on python 2.x the attributes and text of a node aren't always unicode +    # objects      etree = xml.etree.ElementTree +    try: +        _etree_iter = etree.Element.iter +    except AttributeError:  # Python <=2.6 +        def _etree_iter(root): +            for el in root.findall('*'): +                yield el +                for sub in _etree_iter(el): +                    yield sub +      # on 2.6 XML doesn't have a parser argument, function copied from CPython      # 2.7 source      def _XML(text, parser=None): @@ -235,7 +245,11 @@ else:          return el      def compat_etree_fromstring(text): -        return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) +        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) +        for el in _etree_iter(doc): +            if el.text is not None and isinstance(el.text, bytes): +                el.text = el.text.decode('utf-8') +        return doc  try:      from urllib.parse import parse_qs as compat_parse_qs | 
