diff options
author | Sergey M <dstftw@gmail.com> | 2015-10-31 18:15:21 +0000 |
---|---|---|
committer | Sergey M <dstftw@gmail.com> | 2015-10-31 18:15:21 +0000 |
commit | 30eecc6a044d4070d0e8ea4a6e0637867f0d3a28 (patch) | |
tree | c120cc9df90a468bb0d58bb7a7601cb8418eb32f /youtube_dl/extractor | |
parent | dbd82a1d4fff1655920e111cc25a7fd526d7bf9a (diff) | |
parent | ae37338e681319a28d98dc551253d9fa1830969a (diff) |
Merge pull request #7296 from jaimeMF/xml_attrib_unicode
Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (…
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/ard.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/bbc.py | 8 | ||||
-rw-r--r-- | youtube_dl/extractor/bilibili.py | 6 | ||||
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/common.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/crunchyroll.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/vevo.py | 6 |
8 files changed, 22 insertions, 18 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 6f465789b..73be6d204 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -14,8 +14,8 @@ from ..utils import ( parse_duration, unified_strdate, xpath_text, - parse_xml, ) +from ..compat import compat_etree_fromstring class ARDMediathekIE(InfoExtractor): @@ -161,7 +161,7 @@ class ARDMediathekIE(InfoExtractor): raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True) if re.search(r'[\?&]rss($|[=&])', url): - doc = parse_xml(webpage) + doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return GenericIE()._extract_rss(url, video_id, doc) diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py index 2cdce1eb9..a55a6dbc9 100644 --- a/youtube_dl/extractor/bbc.py +++ b/youtube_dl/extractor/bbc.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import re -import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( @@ -14,7 +13,10 @@ from ..utils import ( remove_end, unescapeHTML, ) -from ..compat import compat_HTTPError +from ..compat import ( + compat_etree_fromstring, + compat_HTTPError, +) class BBCCoUkIE(InfoExtractor): @@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor): url, programme_id, 'Downloading media selection XML') except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: - media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8')) + media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8')) else: raise return self._process_media_selector(media_selection, programme_id) diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py index ecc17ebeb..6c66a1236 100644 --- a/youtube_dl/extractor/bilibili.py +++ b/youtube_dl/extractor/bilibili.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import re import itertools import json -import xml.etree.ElementTree as ET from .common import InfoExtractor +from ..compat import ( + compat_etree_fromstring, +) from ..utils import ( int_or_none, unified_strdate, @@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor): except ValueError: pass - lq_doc = ET.fromstring(lq_page) + lq_doc = compat_etree_fromstring(lq_page) lq_durls = lq_doc.findall('./durl') hq_doc = self._download_xml( diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 4721c2293..1686cdde1 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -3,10 +3,10 @@ from __future__ import unicode_literals import re import json -import xml.etree.ElementTree from .common import InfoExtractor from ..compat import ( + compat_etree_fromstring, compat_parse_qs, compat_str, compat_urllib_parse, @@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor): object_str = fix_xml_ampersands(object_str) try: - object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) + object_doc = compat_etree_fromstring(object_str.encode('utf-8')) except compat_xml_parse_error: return diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 1f09fbb47..5e263f8b5 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -10,7 +10,6 @@ import re import socket import sys import time -import xml.etree.ElementTree from ..compat import ( compat_cookiejar, @@ -23,6 +22,7 @@ from ..compat import ( compat_urllib_request, compat_urlparse, compat_str, + compat_etree_fromstring, ) from ..utils import ( NO_DEFAULT, @@ -461,7 +461,7 @@ class InfoExtractor(object): return xml_string if transform_source: xml_string = transform_source(xml_string) - return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + return compat_etree_fromstring(xml_string.encode('utf-8')) def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index f8ce10111..0c9b8ca02 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -5,12 +5,12 @@ import re import json import base64 import zlib -import xml.etree.ElementTree from hashlib import sha1 from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( + compat_etree_fromstring, compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_request, @@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text return output def _extract_subtitles(self, subtitle): - sub_root = xml.etree.ElementTree.fromstring(subtitle) + sub_root = compat_etree_fromstring(subtitle) return [{ 'ext': 'srt', 'data': self._convert_subtitles_to_srt(sub_root), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a84135032..ee5419f51 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -9,6 +9,7 @@ import sys from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( + compat_etree_fromstring, compat_urllib_parse_unquote, compat_urllib_request, compat_urlparse, @@ -21,7 +22,6 @@ from ..utils import ( HEADRequest, is_html, orderedSet, - parse_xml, smuggle_url, unescapeHTML, unified_strdate, @@ -1238,7 +1238,7 @@ class GenericIE(InfoExtractor): # Is it an RSS feed, a SMIL file or a XSPF playlist? try: - doc = parse_xml(webpage) + doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': return self._extract_rss(url, video_id, doc) elif re.match(r'^(?:{[^}]+})?smil$', doc.tag): diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index c17094f81..4c0de354f 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -1,10 +1,10 @@ from __future__ import unicode_literals import re -import xml.etree.ElementTree from .common import InfoExtractor from ..compat import ( + compat_etree_fromstring, compat_urllib_request, ) from ..utils import ( @@ -97,7 +97,7 @@ class VevoIE(InfoExtractor): if last_version['version'] == -1: raise ExtractorError('Unable to extract last version of the video') - renditions = xml.etree.ElementTree.fromstring(last_version['data']) + renditions = compat_etree_fromstring(last_version['data']) formats = [] # Already sorted from worst to best quality for rend in renditions.findall('rendition'): @@ -114,7 +114,7 @@ class VevoIE(InfoExtractor): def _formats_from_smil(self, smil_xml): formats = [] - smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) + smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8')) els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') for el in els: src = el.attrib['src'] |