Merge pull request #7296 from jaimeMF/xml_attrib_unicode

Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (…
author: Sergey M <dstftw@gmail.com> 2015-10-31 18:15:21 +0000
committer: Sergey M <dstftw@gmail.com> 2015-10-31 18:15:21 +0000
commit: 30eecc6a044d4070d0e8ea4a6e0637867f0d3a28 (patch)
tree: c120cc9df90a468bb0d58bb7a7601cb8418eb32f /youtube_dl/extractor
parent: dbd82a1d4fff1655920e111cc25a7fd526d7bf9a (diff)
parent: ae37338e681319a28d98dc551253d9fa1830969a (diff)
download: youtube-dl-30eecc6a044d4070d0e8ea4a6e0637867f0d3a28.tar.xz
8 files changed, 22 insertions, 18 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 6f465789b..73be6d204 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -14,8 +14,8 @@ from ..utils import (
     parse_duration,
     unified_strdate,
     xpath_text,
-    parse_xml,
 )
+from ..compat import compat_etree_fromstring
 
 
 class ARDMediathekIE(InfoExtractor):
@@ -161,7 +161,7 @@ class ARDMediathekIE(InfoExtractor):
             raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
 
         if re.search(r'[\?&]rss($|[=&])', url):
-            doc = parse_xml(webpage)
+            doc = compat_etree_fromstring(webpage.encode('utf-8'))
             if doc.tag == 'rss':
                 return GenericIE()._extract_rss(url, video_id, doc)
 
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 2cdce1eb9..a55a6dbc9 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -14,7 +13,10 @@ from ..utils import (
     remove_end,
     unescapeHTML,
 )
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_etree_fromstring,
+    compat_HTTPError,
+)
 
 
 class BBCCoUkIE(InfoExtractor):
@@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor):
                 url, programme_id, 'Downloading media selection XML')
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
-                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
+                media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
             else:
                 raise
         return self._process_media_selector(media_selection, programme_id)
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index ecc17ebeb..6c66a1236 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -4,9 +4,11 @@ from __future__ import unicode_literals
 import re
 import itertools
 import json
-import xml.etree.ElementTree as ET
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_etree_fromstring,
+)
 from ..utils import (
     int_or_none,
     unified_strdate,
@@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor):
         except ValueError:
             pass
 
-        lq_doc = ET.fromstring(lq_page)
+        lq_doc = compat_etree_fromstring(lq_page)
         lq_durls = lq_doc.findall('./durl')
 
         hq_doc = self._download_xml(
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 4721c2293..1686cdde1 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -3,10 +3,10 @@ from __future__ import unicode_literals
 
 import re
 import json
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_parse_qs,
     compat_str,
     compat_urllib_parse,
@@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor):
         object_str = fix_xml_ampersands(object_str)
 
         try:
-            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
         except compat_xml_parse_error:
             return
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 1f09fbb47..5e263f8b5 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -10,7 +10,6 @@ import re
 import socket
 import sys
 import time
-import xml.etree.ElementTree
 
 from ..compat import (
     compat_cookiejar,
@@ -23,6 +22,7 @@ from ..compat import (
     compat_urllib_request,
     compat_urlparse,
     compat_str,
+    compat_etree_fromstring,
 )
 from ..utils import (
     NO_DEFAULT,
@@ -461,7 +461,7 @@ class InfoExtractor(object):
             return xml_string
         if transform_source:
             xml_string = transform_source(xml_string)
-        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+        return compat_etree_fromstring(xml_string.encode('utf-8'))
 
     def _download_json(self, url_or_request, video_id,
                        note='Downloading JSON metadata',
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index f8ce10111..0c9b8ca02 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -5,12 +5,12 @@ import re
 import json
 import base64
 import zlib
-import xml.etree.ElementTree
 
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_urllib_parse,
     compat_urllib_parse_unquote,
     compat_urllib_request,
@@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         return output
 
     def _extract_subtitles(self, subtitle):
-        sub_root = xml.etree.ElementTree.fromstring(subtitle)
+        sub_root = compat_etree_fromstring(subtitle)
         return [{
             'ext': 'srt',
             'data': self._convert_subtitles_to_srt(sub_root),
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index a84135032..ee5419f51 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -9,6 +9,7 @@ import sys
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..compat import (
+    compat_etree_fromstring,
     compat_urllib_parse_unquote,
     compat_urllib_request,
     compat_urlparse,
@@ -21,7 +22,6 @@ from ..utils import (
     HEADRequest,
     is_html,
     orderedSet,
-    parse_xml,
     smuggle_url,
     unescapeHTML,
     unified_strdate,
@@ -1238,7 +1238,7 @@ class GenericIE(InfoExtractor):
 
         # Is it an RSS feed, a SMIL file or a XSPF playlist?
         try:
-            doc = parse_xml(webpage)
+            doc = compat_etree_fromstring(webpage.encode('utf-8'))
             if doc.tag == 'rss':
                 return self._extract_rss(url, video_id, doc)
             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index c17094f81..4c0de354f 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_urllib_request,
 )
 from ..utils import (
@@ -97,7 +97,7 @@ class VevoIE(InfoExtractor):
         if last_version['version'] == -1:
             raise ExtractorError('Unable to extract last version of the video')
 
-        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
+        renditions = compat_etree_fromstring(last_version['data'])
         formats = []
         # Already sorted from worst to best quality
         for rend in renditions.findall('rendition'):
@@ -114,7 +114,7 @@ class VevoIE(InfoExtractor):
 
     def _formats_from_smil(self, smil_xml):
         formats = []
-        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
+        smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
         els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
         for el in els:
             src = el.attrib['src']
author	Sergey M <dstftw@gmail.com>	2015-10-31 18:15:21 +0000
committer	Sergey M <dstftw@gmail.com>	2015-10-31 18:15:21 +0000
commit	30eecc6a044d4070d0e8ea4a6e0637867f0d3a28 (patch)
tree	c120cc9df90a468bb0d58bb7a7601cb8418eb32f /youtube_dl/extractor
parent	dbd82a1d4fff1655920e111cc25a7fd526d7bf9a (diff)
parent	ae37338e681319a28d98dc551253d9fa1830969a (diff)
download	youtube-dl-30eecc6a044d4070d0e8ea4a6e0637867f0d3a28.tar.xz