diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-12-10 12:45:22 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-12-10 12:45:22 +0100 |
commit | e2b38da93112c97d46d612bf89c329b22ac2d00d (patch) | |
tree | 1bf6d351696e3cff54a3b247f0c842b04dcb1cbc /youtube_dl/extractor | |
parent | a30a60d8eb027a55ec14c912bad4359b3128997e (diff) |
[mtv] Fixup incorrectly encoded XML documents
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 5 | ||||
-rw-r--r-- | youtube_dl/extractor/mtv.py | 9 |
2 files changed, 11 insertions, 3 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 534908a2b..69a083b68 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -230,9 +230,12 @@ class InfoExtractor(object): return content def _download_xml(self, url_or_request, video_id, - note=u'Downloading XML', errnote=u'Unable to download XML'): + note=u'Downloading XML', errnote=u'Unable to download XML', + transform_source=None): """Return the xml as an xml.etree.ElementTree.Element""" xml_string = self._download_webpage(url_or_request, video_id, note, errnote) + if transform_source: + xml_string = transform_source(xml_string) return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) def to_screen(self, msg): diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 6b3feb560..5b2bd9633 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -82,8 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor): def _get_videos_info(self, uri): video_id = self._id_from_uri(uri) data = compat_urllib_parse.urlencode({'uri': uri}) - idoc = self._download_xml(self._FEED_URL +'?' + data, video_id, - u'Downloading info') + + def fix_ampersand(s): + """ Fix unencoded ampersand in XML """ + return s.replace(u'& ', '& ') + idoc = self._download_xml( + self._FEED_URL + '?' + data, video_id, + u'Downloading info', transform_source=fix_ampersand) return [self._get_video_info(item) for item in idoc.findall('.//item')] |