diff options
| -rw-r--r-- | test/test_compat.py | 7 | ||||
| -rw-r--r-- | test/test_utils.py | 11 | ||||
| -rw-r--r-- | youtube_dl/compat.py | 25 | ||||
| -rw-r--r-- | youtube_dl/downloader/f4m.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/bbc.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/bilibili.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/vevo.py | 6 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 3 | 
11 files changed, 61 insertions, 21 deletions
| diff --git a/test/test_compat.py b/test/test_compat.py index 4ee0dc99d..2b0860479 100644 --- a/test/test_compat.py +++ b/test/test_compat.py @@ -13,8 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  from youtube_dl.utils import get_filesystem_encoding  from youtube_dl.compat import (      compat_getenv, +    compat_etree_fromstring,      compat_expanduser,      compat_shlex_split, +    compat_str,      compat_urllib_parse_unquote,      compat_urllib_parse_unquote_plus,  ) @@ -71,5 +73,10 @@ class TestCompat(unittest.TestCase):      def test_compat_shlex_split(self):          self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) +    def test_compat_etree_fromstring(self): +        xml = '<el foo="bar"></el>' +        doc = compat_etree_fromstring(xml.encode('utf-8')) +        self.assertTrue(isinstance(doc.attrib['foo'], compat_str)) +  if __name__ == '__main__':      unittest.main() diff --git a/test/test_utils.py b/test/test_utils.py index 918a7a9ef..a9e0fed7e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -68,6 +68,9 @@ from youtube_dl.utils import (      cli_valueless_option,      cli_bool_option,  ) +from youtube_dl.compat import ( +    compat_etree_fromstring, +)  class TestUtil(unittest.TestCase): @@ -242,7 +245,7 @@ class TestUtil(unittest.TestCase):              <node x="b" y="d" />              <node x="" />          </root>''' -        doc = xml.etree.ElementTree.fromstring(testxml) +        doc = compat_etree_fromstring(testxml)          self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)          self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) @@ -263,7 +266,7 @@ class TestUtil(unittest.TestCase):                  <url>http://server.com/download.mp3</url>              </media:song>          </root>''' -        doc = xml.etree.ElementTree.fromstring(testxml) +        doc = compat_etree_fromstring(testxml)          find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))          self.assertTrue(find('media:song') is not None)          self.assertEqual(find('media:song/media:author').text, 'The Author') @@ -285,7 +288,7 @@ class TestUtil(unittest.TestCase):                  <p>Foo</p>              </div>          </root>''' -        doc = xml.etree.ElementTree.fromstring(testxml) +        doc = compat_etree_fromstring(testxml)          self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')          self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')          self.assertTrue(xpath_text(doc, 'div/bar') is None) @@ -297,7 +300,7 @@ class TestUtil(unittest.TestCase):                  <p x="a">Foo</p>              </div>          </root>''' -        doc = xml.etree.ElementTree.fromstring(testxml) +        doc = compat_etree_fromstring(testxml)          self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')          self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)          self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index d103ab9ad..cf10835ca 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -14,6 +14,7 @@ import socket  import subprocess  import sys  import itertools +import xml.etree.ElementTree  try: @@ -212,6 +213,29 @@ try:  except ImportError:  # Python 2.6      from xml.parsers.expat import ExpatError as compat_xml_parse_error +if sys.version_info[0] >= 3: +    compat_etree_fromstring = xml.etree.ElementTree.fromstring +else: +    # on python 2.x the the attributes of a node are str objects instead of +    # unicode +    etree = xml.etree.ElementTree + +    # on 2.6 XML doesn't have a parser argument, function copied from CPython +    # 2.7 source +    def _XML(text, parser=None): +        if not parser: +            parser = etree.XMLParser(target=etree.TreeBuilder()) +        parser.feed(text) +        return parser.close() + +    def _element_factory(*args, **kwargs): +        el = etree.Element(*args, **kwargs) +        for k, v in el.items(): +            el.set(k, v.decode('utf-8')) +        return el + +    def compat_etree_fromstring(text): +        return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))  try:      from urllib.parse import parse_qs as compat_parse_qs @@ -507,6 +531,7 @@ __all__ = [      'compat_chr',      'compat_cookiejar',      'compat_cookies', +    'compat_etree_fromstring',      'compat_expanduser',      'compat_get_terminal_size',      'compat_getenv', diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 7f6143954..6170cc155 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -5,10 +5,10 @@ import io  import itertools  import os  import time -import xml.etree.ElementTree as etree  from .fragment import FragmentFD  from ..compat import ( +    compat_etree_fromstring,      compat_urlparse,      compat_urllib_error,      compat_urllib_parse_urlparse, @@ -290,7 +290,7 @@ class F4mFD(FragmentFD):          man_url = urlh.geturl()          manifest = urlh.read() -        doc = etree.fromstring(manifest) +        doc = compat_etree_fromstring(manifest)          formats = [(int(f.attrib.get('bitrate', -1)), f)                     for f in self._get_unencrypted_media(doc)]          if requested_bitrate is None: diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 2cdce1eb9..a55a6dbc9 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -2,7 +2,6 @@  from __future__ import unicode_literals  import re -import xml.etree.ElementTree  from .common import InfoExtractor  from ..utils import ( @@ -14,7 +13,10 @@ from ..utils import (      remove_end,      unescapeHTML,  ) -from ..compat import compat_HTTPError +from ..compat import ( +    compat_etree_fromstring, +    compat_HTTPError, +)  class BBCCoUkIE(InfoExtractor): @@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):                  url, programme_id, 'Downloading media selection XML')          except ExtractorError as ee:              if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: -                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8')) +                media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))              else:                  raise          return self._process_media_selector(media_selection, programme_id) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index ecc17ebeb..6c66a1236 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals  import re  import itertools  import json -import xml.etree.ElementTree as ET  from .common import InfoExtractor +from ..compat import ( +    compat_etree_fromstring, +)  from ..utils import (      int_or_none,      unified_strdate, @@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):          except ValueError:              pass -        lq_doc = ET.fromstring(lq_page) +        lq_doc = compat_etree_fromstring(lq_page)          lq_durls = lq_doc.findall('./durl')          hq_doc = self._download_xml( diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 4721c2293..1686cdde1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -3,10 +3,10 @@ from __future__ import unicode_literals  import re  import json -import xml.etree.ElementTree  from .common import InfoExtractor  from ..compat import ( +    compat_etree_fromstring,      compat_parse_qs,      compat_str,      compat_urllib_parse, @@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor):          object_str = fix_xml_ampersands(object_str)          try: -            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) +            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))          except compat_xml_parse_error:              return diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 10c0d5d1f..52523d7b2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -10,7 +10,6 @@ import re  import socket  import sys  import time -import xml.etree.ElementTree  from ..compat import (      compat_cookiejar, @@ -23,6 +22,7 @@ from ..compat import (      compat_urllib_request,      compat_urlparse,      compat_str, +    compat_etree_fromstring,  )  from ..utils import (      NO_DEFAULT, @@ -461,7 +461,7 @@ class InfoExtractor(object):              return xml_string          if transform_source:              xml_string = transform_source(xml_string) -        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) +        return compat_etree_fromstring(xml_string.encode('utf-8'))      def _download_json(self, url_or_request, video_id,                         note='Downloading JSON metadata', diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f8ce10111..0c9b8ca02 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -5,12 +5,12 @@ import re  import json  import base64  import zlib -import xml.etree.ElementTree  from hashlib import sha1  from math import pow, sqrt, floor  from .common import InfoExtractor  from ..compat import ( +    compat_etree_fromstring,      compat_urllib_parse,      compat_urllib_parse_unquote,      compat_urllib_request, @@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text          return output      def _extract_subtitles(self, subtitle): -        sub_root = xml.etree.ElementTree.fromstring(subtitle) +        sub_root = compat_etree_fromstring(subtitle)          return [{              'ext': 'srt',              'data': self._convert_subtitles_to_srt(sub_root), diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c17094f81..4c0de354f 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,10 +1,10 @@  from __future__ import unicode_literals  import re -import xml.etree.ElementTree  from .common import InfoExtractor  from ..compat import ( +    compat_etree_fromstring,      compat_urllib_request,  )  from ..utils import ( @@ -97,7 +97,7 @@ class VevoIE(InfoExtractor):          if last_version['version'] == -1:              raise ExtractorError('Unable to extract last version of the video') -        renditions = xml.etree.ElementTree.fromstring(last_version['data']) +        renditions = compat_etree_fromstring(last_version['data'])          formats = []          # Already sorted from worst to best quality          for rend in renditions.findall('rendition'): @@ -114,7 +114,7 @@ class VevoIE(InfoExtractor):      def _formats_from_smil(self, smil_xml):          formats = [] -        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) +        smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))          els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')          for el in els:              src = el.attrib['src'] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a61e47646..7d846d680 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -36,6 +36,7 @@ import zlib  from .compat import (      compat_basestring,      compat_chr, +    compat_etree_fromstring,      compat_html_entities,      compat_http_client,      compat_kwargs, @@ -1974,7 +1975,7 @@ def dfxp2srt(dfxp_data):          return out -    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) +    dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))      out = []      paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') | 
