diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-24 14:59:19 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-24 14:59:19 +0100 | 
| commit | 267ed0c5d3547c68f1d34203c2ae4b0d826a29d9 (patch) | |
| tree | dc86b8beea51b1e445280992ec2db99ed128b016 | |
| parent | f459d17018812dc896324f8208cdfe2ada04ea50 (diff) | |
[collegehumor] Encode the xml before calling xml.etree.ElementTree.fromstring (fixes #1822)
Uses a new helper method in InfoExtractor: _download_xml
| -rw-r--r-- | youtube_dl/extractor/collegehumor.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 6 | 
2 files changed, 8 insertions, 5 deletions
diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 0c29acfb1..b27c1dfc5 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -1,5 +1,4 @@  import re -import xml.etree.ElementTree  from .common import InfoExtractor  from ..utils import ( @@ -46,11 +45,10 @@ class CollegeHumorIE(InfoExtractor):          self.report_extraction(video_id)          xmlUrl = 'http://www.collegehumor.com/moogaloop/video/' + video_id -        metaXml = self._download_webpage(xmlUrl, video_id, +        mdoc = self._download_xml(xmlUrl, video_id,                                           u'Downloading info XML',                                           u'Unable to download video info XML') -        mdoc = xml.etree.ElementTree.fromstring(metaXml)          try:              videoNode = mdoc.findall('./video')[0]              youtubeIdNode = videoNode.find('./youtubeID') @@ -65,11 +63,10 @@ class CollegeHumorIE(InfoExtractor):          if next_url.endswith(u'manifest.f4m'):              manifest_url = next_url + '?hdcore=2.10.3' -            manifestXml = self._download_webpage(manifest_url, video_id, +            adoc = self._download_xml(manifest_url, video_id,                                           u'Downloading XML manifest',                                           u'Unable to download video info XML') -            adoc = xml.etree.ElementTree.fromstring(manifestXml)              try:                  video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text              except IndexError: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 3cebeaf29..482a231ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -4,6 +4,7 @@ import re  import socket  import sys  import netrc +import xml.etree.ElementTree  from ..utils import (      compat_http_client, @@ -208,6 +209,11 @@ class InfoExtractor(object):          """ Returns the data of the page as a string """          return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0] +    def _download_xml(self, url_or_request, video_id, note=u'Downloading XML', errnote=u'Unable to downloand XML'): +        """Return the xml as an xml.etree.ElementTree.Element""" +        xml_string = self._download_webpage(url_or_request, video_id, note, errnote) +        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) +      def to_screen(self, msg):          """Print msg to screen, prefixing it with '[ie_name]'"""          self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))  | 
