Use the new '_download_xml' helper in more extractors

author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-11-26 18:48:52 +0100
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-11-26 19:17:25 +0100
commit: e26f8712289c727a43d74a4669aee4924b9f75f2 (patch)
tree: ce2e34f10baacc0eb24d0527154e5374560901cd
parent: 6e47b51eef26dbaa3634b73914e4ee7213ad38f7 (diff)
28 files changed, 38 insertions, 105 deletions
diff --git a/youtube_dl/extractor/anitube.py b/youtube_dl/extractor/anitube.py
index 691d5a844..2b019daa9 100644
--- a/youtube_dl/extractor/anitube.py
+++ b/youtube_dl/extractor/anitube.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 
@@ -28,9 +27,8 @@ class AnitubeIE(InfoExtractor):
         key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)',
                                       webpage, u'key')
 
-        webpage_config = self._download_webpage('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
+        config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key,
                                                 key)
-        config_xml = xml.etree.ElementTree.fromstring(webpage_config.encode('utf-8'))
 
         video_title = config_xml.find('title').text
 
diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py
index 44d0b5d70..8b62ee774 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dl/extractor/arte.py
@@ -1,7 +1,6 @@
 # encoding: utf-8
 import re
 import json
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -78,8 +77,7 @@ class ArteTvIE(InfoExtractor):
         """Extract from videos.arte.tv"""
         ref_xml_url = url.replace('/videos/', '/do_delegate/videos/')
         ref_xml_url = ref_xml_url.replace('.html', ',view,asPlayerXml.xml')
-        ref_xml = self._download_webpage(ref_xml_url, video_id, note=u'Downloading metadata')
-        ref_xml_doc = xml.etree.ElementTree.fromstring(ref_xml)
+        ref_xml_doc = self._download_xml(ref_xml_url, video_id, note=u'Downloading metadata')
         config_node = find_xpath_attr(ref_xml_doc, './/video', 'lang', lang)
         config_xml_url = config_node.attrib['ref']
         config_xml = self._download_webpage(config_xml_url, video_id, note=u'Downloading configuration')
@@ -109,9 +107,8 @@ class ArteTvIE(InfoExtractor):
         """Extract form http://liveweb.arte.tv/"""
         webpage = self._download_webpage(url, name)
         video_id = self._search_regex(r'eventId=(\d+?)("|&)', webpage, u'event id')
-        config_xml = self._download_webpage('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
+        config_doc = self._download_xml('http://download.liveweb.arte.tv/o21/liveweb/events/event-%s.xml' % video_id,
                                             video_id, u'Downloading information')
-        config_doc = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
         event_doc = config_doc.find('event')
         url_node = event_doc.find('video').find('urlHd')
         if url_node is None:
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dl/extractor/canalplus.py
index bfa2a8b40..7cdcd8399 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dl/extractor/canalplus.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import unified_strdate
@@ -31,11 +30,10 @@ class CanalplusIE(InfoExtractor):
             webpage = self._download_webpage(url, mobj.group('path'))
             video_id = self._search_regex(r'videoId = "(\d+)";', webpage, u'video id')
         info_url = self._VIDEO_INFO_TEMPLATE % video_id
-        info_page = self._download_webpage(info_url,video_id, 
+        doc = self._download_xml(info_url,video_id, 
                                            u'Downloading video info')
 
         self.report_extraction(video_id)
-        doc = xml.etree.ElementTree.fromstring(info_page.encode('utf-8'))
         video_info = [video for video in doc if video.find('ID').text == video_id][0]
         infos = video_info.find('INFOS')
         media = video_info.find('MEDIA')
diff --git a/youtube_dl/extractor/clipfish.py b/youtube_dl/extractor/clipfish.py
index 95449da3c..5f0b5602f 100644
--- a/youtube_dl/extractor/clipfish.py
+++ b/youtube_dl/extractor/clipfish.py
@@ -1,6 +1,5 @@
 import re
 import time
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 
@@ -25,9 +24,8 @@ class ClipfishIE(InfoExtractor):
 
         info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
                     (video_id, int(time.time())))
-        info_xml = self._download_webpage(
+        doc = self._download_xml(
             info_url, video_id, note=u'Downloading info page')
-        doc = xml.etree.ElementTree.fromstring(info_xml)
         title = doc.find('title').text
         video_url = doc.find('filename').text
         thumbnail = doc.find('imageurl').text
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dl/extractor/cnn.py
index 34adf6dda..a034bb2fb 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dl/extractor/cnn.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import determine_ext
@@ -33,8 +32,7 @@ class CNNIE(InfoExtractor):
         path = mobj.group('path')
         page_title = mobj.group('title')
         info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
-        info_xml = self._download_webpage(info_url, page_title)
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
+        info = self._download_xml(info_url, page_title)
 
         formats = []
         for f in info.findall('files/file'):
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dl/extractor/comedycentral.py
index 725849d2e..23647f99e 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dl/extractor/comedycentral.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from .mtv import MTVIE, _media_xml_tag
@@ -158,13 +157,12 @@ class ComedyCentralShowsIE(InfoExtractor):
 
         uri = mMovieParams[0][1]
         indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri})
-        indexXml = self._download_webpage(indexUrl, epTitle,
+        idoc = self._download_xml(indexUrl, epTitle,
                                           u'Downloading show index',
                                           u'unable to download episode index')
 
         results = []
 
-        idoc = xml.etree.ElementTree.fromstring(indexXml)
         itemEls = idoc.findall('.//item')
         for partNum,itemEl in enumerate(itemEls):
             mediaId = itemEl.findall('./guid')[0].text
@@ -175,10 +173,9 @@ class ComedyCentralShowsIE(InfoExtractor):
 
             configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
                         compat_urllib_parse.urlencode({'uri': mediaId}))
-            configXml = self._download_webpage(configUrl, epTitle,
+            cdoc = self._download_xml(configUrl, epTitle,
                                                u'Downloading configuration for %s' % shortMediaId)
 
-            cdoc = xml.etree.ElementTree.fromstring(configXml)
             turls = []
             for rendition in cdoc.findall('.//rendition'):
                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
index a804e83bd..3d1dcb793 100644
--- a/youtube_dl/extractor/daum.py
+++ b/youtube_dl/extractor/daum.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -32,14 +31,12 @@ class DaumIE(InfoExtractor):
         full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
             webpage, u'full id')
         query = compat_urllib_parse.urlencode({'vid': full_id})
-        info_xml = self._download_webpage(
+        info = self._download_xml(
             'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
             u'Downloading video info')
-        urls_xml = self._download_webpage(
+        urls = self._download_xml(
             'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
             video_id, u'Downloading video formats info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
-        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
 
         self.to_screen(u'%s: Getting video urls' % video_id)
         formats = []
@@ -49,10 +46,9 @@ class DaumIE(InfoExtractor):
                 'vid': full_id,
                 'profile': profile,
             })
-            url_xml = self._download_webpage(
+            url_doc = self._download_xml(
                 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
                 video_id, note=False)
-            url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
             format_url = url_doc.find('result/url').text
             formats.append({
                 'url': format_url,
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
index 765cb1f37..3cb382e12 100644
--- a/youtube_dl/extractor/dreisat.py
+++ b/youtube_dl/extractor/dreisat.py
@@ -1,7 +1,6 @@
 # coding: utf-8
 
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -30,8 +29,7 @@ class DreiSatIE(InfoExtractor):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
         details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?ak=web&id=%s' % video_id
-        details_xml = self._download_webpage(details_url, video_id, note=u'Downloading video details')
-        details_doc = xml.etree.ElementTree.fromstring(details_xml.encode('utf-8'))
+        details_doc = self._download_xml(details_url, video_id, note=u'Downloading video details')
 
         thumbnail_els = details_doc.findall('.//teaserimage')
         thumbnails = [{
diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dl/extractor/ebaumsworld.py
index f02c6998b..877113d63 100644
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dl/extractor/ebaumsworld.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import determine_ext
@@ -21,9 +20,8 @@ class EbaumsWorldIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        config_xml = self._download_webpage(
+        config = self._download_xml(
             'http://www.ebaumsworld.com/video/player/%s' % video_id, video_id)
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
         video_url = config.find('file').text
 
         return {
diff --git a/youtube_dl/extractor/faz.py b/youtube_dl/extractor/faz.py
index 89ed08db4..c0169de04 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dl/extractor/faz.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -28,9 +27,8 @@ class FazIE(InfoExtractor):
         webpage = self._download_webpage(url, video_id)
         config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage,
             u'config xml url')
-        config_xml = self._download_webpage(config_xml_url, video_id,
+        config = self._download_xml(config_xml_url, video_id,
             u'Downloading config xml')
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
 
         encodings = config.find('ENCODINGS')
         formats = []
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
index 086cafca0..6e1971043 100644
--- a/youtube_dl/extractor/francetv.py
+++ b/youtube_dl/extractor/francetv.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 import json
 
 from .common import InfoExtractor
@@ -11,11 +10,10 @@ from ..utils import (
 
 class FranceTVBaseInfoExtractor(InfoExtractor):
     def _extract_video(self, video_id):
-        xml_desc = self._download_webpage(
+        info = self._download_xml(
             'http://www.francetvinfo.fr/appftv/webservices/video/'
             'getInfosOeuvre.php?id-diffusion='
             + video_id, video_id, 'Downloading XML config')
-        info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
 
         manifest_url = info.find('videos/video/url').text
         video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py
index be8e05f53..16a6f73c8 100644
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ b/youtube_dl/extractor/internetvideoarchive.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -43,9 +42,8 @@ class InternetVideoArchiveIE(InfoExtractor):
         video_id = query_dic['publishedid'][0]
         url = self._build_url(query)
 
-        flashconfiguration_xml = self._download_webpage(url, video_id,
+        flashconfiguration = self._download_xml(url, video_id,
             u'Downloading flash configuration')
-        flashconfiguration = xml.etree.ElementTree.fromstring(flashconfiguration_xml.encode('utf-8'))
         file_url = flashconfiguration.find('file').text
         file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
         # Replace some of the parameters in the query to get the best quality
@@ -53,9 +51,8 @@ class InternetVideoArchiveIE(InfoExtractor):
         file_url = re.sub(r'(?<=\?)(.+)$',
             lambda m: self._clean_query(m.group()),
             file_url)
-        info_xml = self._download_webpage(file_url, video_id,
+        info = self._download_xml(file_url, video_id,
             u'Downloading video info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
         item = info.find('channel/item')
 
         def _bp(p):
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py
index 0020c47cf..caf9d8c85 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dl/extractor/jeuxvideo.py
@@ -2,7 +2,6 @@
 
 import json
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 
@@ -32,12 +31,9 @@ class JeuxVideoIE(InfoExtractor):
             r'http://www\.jeuxvideo\.com/config/\w+/\d+/(.*?)/\d+_player\.xml',
             xml_link, u'video ID')
 
-        xml_config = self._download_webpage(
+        config = self._download_xml(
             xml_link, title, u'Downloading XML config')
-        config = xml.etree.ElementTree.fromstring(xml_config.encode('utf-8'))
-        info_json = self._search_regex(
-            r'(?sm)<format\.json>(.*?)</format\.json>',
-            xml_config, u'JSON information')
+        info_json = config.find('format.json').text
         info = json.loads(info_json)['versions'][0]
         
         video_url = 'http://video720.jeuxvideo.com/' + info['file']
diff --git a/youtube_dl/extractor/justintv.py b/youtube_dl/extractor/justintv.py
index f60017992..e9bde0c18 100644
--- a/youtube_dl/extractor/justintv.py
+++ b/youtube_dl/extractor/justintv.py
@@ -1,7 +1,6 @@
 import json
 import os
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -94,10 +93,9 @@ class JustinTVIE(InfoExtractor):
             archive_id = m.group(1)
 
             api = api_base + '/broadcast/by_chapter/%s.xml' % chapter_id
-            chapter_info_xml = self._download_webpage(api, chapter_id,
+            doc = self._download_xml(api, chapter_id,
                                              note=u'Downloading chapter information',
                                              errnote=u'Chapter information download failed')
-            doc = xml.etree.ElementTree.fromstring(chapter_info_xml)
             for a in doc.findall('.//archive'):
                 if archive_id == a.find('./id').text:
                     break
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dl/extractor/livestream.py
index 5f548437c..9bc35b115 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dl/extractor/livestream.py
@@ -1,6 +1,5 @@
 import re
 import json
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -80,8 +79,7 @@ class LivestreamOriginalIE(InfoExtractor):
         user = mobj.group('user')
         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id)
 
-        api_response = self._download_webpage(api_url, video_id)
-        info = xml.etree.ElementTree.fromstring(api_response.encode('utf-8'))
+        info = self._download_xml(api_url, video_id)
         item = info.find('channel').find('item')
         ns = {'media': 'http://search.yahoo.com/mrss'}
         thumbnail_url = item.find(xpath_with_ns('media:thumbnail', ns)).attrib['url']
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index 04afd6c4c..42aee58be 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -109,9 +109,8 @@ class MTVIE(InfoExtractor):
     def _get_videos_info(self, uri):
         video_id = self._id_from_uri(uri)
         data = compat_urllib_parse.urlencode({'uri': uri})
-        infoXml = self._download_webpage(self._FEED_URL +'?' + data, video_id,
+        idoc = self._download_xml(self._FEED_URL +'?' + data, video_id,
                                          u'Downloading info')
-        idoc = xml.etree.ElementTree.fromstring(infoXml.encode('utf-8'))
         return [self._get_video_info(item) for item in idoc.findall('.//item')]
 
     def _real_extract(self, url):
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
index 107665d15..0067bf134 100644
--- a/youtube_dl/extractor/myspass.py
+++ b/youtube_dl/extractor/myspass.py
@@ -1,5 +1,4 @@
 import os.path
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -33,8 +32,7 @@ class MySpassIE(InfoExtractor):
 
         # get metadata
         metadata_url = META_DATA_URL_TEMPLATE % video_id
-        metadata_text = self._download_webpage(metadata_url, video_id)
-        metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
+        metadata = self._download_xml(metadata_url, video_id)
 
         # extract values from metadata
         url_flv_el = metadata.find('url_flv')
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
index 9df236d69..d290397c7 100644
--- a/youtube_dl/extractor/naver.py
+++ b/youtube_dl/extractor/naver.py
@@ -1,6 +1,5 @@
 # encoding: utf-8
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -38,14 +37,12 @@ class NaverIE(InfoExtractor):
             'protocol': 'p2p',
             'inKey': key,
         })
-        info_xml = self._download_webpage(
+        info = self._download_xml(
             'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
             video_id, u'Downloading video info')
-        urls_xml = self._download_webpage(
+        urls = self._download_xml(
             'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
             video_id, u'Downloading video formats info')
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
-        urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
 
         formats = []
         for format_el in urls.findall('EncodingOptions/EncodingOption'):
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
index 3bc9dae6d..e8bbfff7b 100644
--- a/youtube_dl/extractor/nbc.py
+++ b/youtube_dl/extractor/nbc.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import find_xpath_attr, compat_str
@@ -21,8 +20,8 @@ class NBCNewsIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
-        info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
+        all_info = self._download_xml('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
+        info = all_info.find('video')
 
         return {'id': video_id,
                 'title': info.find('headline').text,
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dl/extractor/nhl.py
index 458fe4063..2edd806a3 100644
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dl/extractor/nhl.py
@@ -1,6 +1,5 @@
 import re
 import json
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -26,9 +25,8 @@ class NHLBaseInfoExtractor(InfoExtractor):
             'path': initial_video_url.replace('.mp4', '_sd.mp4'),
         })
         path_url = 'http://video.nhl.com/videocenter/servlets/encryptvideopath?' + data
-        path_response = self._download_webpage(path_url, video_id,
+        path_doc = self._download_xml(path_url, video_id,
             u'Downloading final video url')
-        path_doc = xml.etree.ElementTree.fromstring(path_response)
         video_url = path_doc.find('path').text
 
         join = compat_urlparse.urljoin
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py
index 729607ea3..46774317c 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -2,7 +2,6 @@
 
 import re
 import socket
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -81,7 +80,7 @@ class NiconicoIE(InfoExtractor):
         # the cookies in order to be able to download the info webpage
         self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id)
 
-        video_info_webpage = self._download_webpage(
+        video_info = self._download_xml(
             'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id,
             note=u'Downloading video info page')
 
@@ -92,7 +91,6 @@ class NiconicoIE(InfoExtractor):
         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0]
 
         # Start extracting information
-        video_info = xml.etree.ElementTree.fromstring(video_info_webpage)
         video_title = video_info.find('.//title').text
         video_extension = video_info.find('.//movie_type').text
         video_format = video_extension.upper()
@@ -107,13 +105,11 @@ class NiconicoIE(InfoExtractor):
         video_uploader = video_uploader_id
         url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id
         try:
-            user_info_webpage = self._download_webpage(
+            user_info = self._download_xml(
                 url, video_id, note=u'Downloading user information')
+            video_uploader = user_info.find('.//nickname').text
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.report_warning(u'Unable to download user info webpage: %s' % compat_str(err))
-        else:
-            user_info = xml.etree.ElementTree.fromstring(user_info_webpage)
-            video_uploader = user_info.find('.//nickname').text
 
         return {
             'id':          video_id,
diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py
index 14b1c656c..74a87fe56 100644
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dl/extractor/sina.py
@@ -1,7 +1,6 @@
 # coding: utf-8
 
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -35,12 +34,11 @@ class SinaIE(InfoExtractor):
 
     def _extract_video(self, video_id):
         data = compat_urllib_parse.urlencode({'vid': video_id})
-        url_page = self._download_webpage('http://v.iask.com/v_play.php?%s' % data,
+        url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data,
             video_id, u'Downloading video url')
         image_page = self._download_webpage(
             'http://interface.video.sina.com.cn/interface/common/getVideoImage.php?%s' % data,
             video_id, u'Downloading thumbnail info')
-        url_doc = xml.etree.ElementTree.fromstring(url_page.encode('utf-8'))
 
         return {'id': video_id,
                 'url': url_doc.find('./durl/url').text,
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py
index 19ce585cf..695520524 100644
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dl/extractor/spiegel.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 
@@ -33,12 +32,10 @@ class SpiegelIE(InfoExtractor):
             r'<div class="module-title">(.*?)</div>', webpage, u'title')
 
         xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
-        xml_code = self._download_webpage(
+        idoc = self._download_xml(
             xml_url, video_id,
             note=u'Downloading XML', errnote=u'Failed to download XML')
 
-        idoc = xml.etree.ElementTree.fromstring(xml_code)
-
         formats = [
             {
                 'format_id': n.tag.rpartition('type')[2],
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 165d9f88b..2bf26d056 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -32,8 +31,7 @@ class TeamcocoIE(InfoExtractor):
         self.report_extraction(video_id)
 
         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id
-        data_xml = self._download_webpage(data_url, video_id, 'Downloading data webpage')
-        data = xml.etree.ElementTree.fromstring(data_xml.encode('utf-8'))
+        data = self._download_xml(data_url, video_id, 'Downloading data webpage')
 
 
         qualities = ['500k', '480p', '1000k', '720p', '1080p']
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py
index 2f728d3dc..1e9598ef6 100644
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dl/extractor/toutv.py
@@ -1,6 +1,5 @@
 # coding: utf-8
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -40,11 +39,9 @@ class TouTvIE(InfoExtractor):
             r'"idMedia":\s*"([^"]+)"', webpage, u'media ID')
 
         streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId
-        streams_webpage = self._download_webpage(
+        streams_doc = self._download_xml(
             streams_url, video_id, note=u'Downloading stream list')
 
-        streams_doc = xml.etree.ElementTree.fromstring(
-            streams_webpage.encode('utf-8'))
         video_url = next(n.text
                          for n in streams_doc.findall('.//choice/url')
                          if u'//ad.doubleclick' not in n.text)
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dl/extractor/trilulilu.py
index 0bf028f61..1c49e580d 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dl/extractor/trilulilu.py
@@ -1,6 +1,5 @@
 import json
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 
@@ -36,12 +35,10 @@ class TriluliluIE(InfoExtractor):
 
         format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
                       u'video-formats2' % log)
-        format_str = self._download_webpage(
+        format_doc = self._download_xml(
             format_url, video_id,
             note=u'Downloading formats',
             errnote=u'Error while downloading formats')
-
-        format_doc = xml.etree.ElementTree.fromstring(format_str)
  
         video_url_template = (
             u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dl/extractor/videofyme.py
index 94f64ffa5..912802d9a 100644
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dl/extractor/videofyme.py
@@ -1,5 +1,4 @@
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -27,9 +26,8 @@ class VideofyMeIE(InfoExtractor):
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
-        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
+        config = self._download_xml('http://sunshine.videofy.me/?videoId=%s' % video_id,
                                             video_id)
-        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
         video = config.find('video')
         sources = video.find('sources')
         url_node = next(node for node in [find_xpath_attr(sources, 'source', 'id', 'HQ %s' % key) 
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 4c43d5739..a76a9071a 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -11,7 +11,6 @@ import socket
 import string
 import struct
 import traceback
-import xml.etree.ElementTree
 import zlib
 
 from .common import InfoExtractor, SearchInfoExtractor
@@ -1144,8 +1143,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'asrs': 1,
             })
             list_url = caption_url + '&' + list_params
-            list_page = self._download_webpage(list_url, video_id)
-            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            caption_list = self._download_xml(list_url, video_id)
             original_lang_node = caption_list.find('track')
             if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                 self._downloader.report_warning(u'Video doesn\'t have automatic captions')
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-11-26 18:48:52 +0100
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-11-26 19:17:25 +0100
commit	e26f8712289c727a43d74a4669aee4924b9f75f2 (patch)
tree	ce2e34f10baacc0eb24d0527154e5374560901cd
parent	6e47b51eef26dbaa3634b73914e4ee7213ad38f7 (diff)