diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-08-25 18:03:01 +0200 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-08-25 18:03:01 +0200 |
commit | 0990305d2acc4c1b7869dae2773c1f24125804bd (patch) | |
tree | 629d42ba5f42c637d875299f95a864b34d27fba7 /youtube_dl | |
parent | 829476b80a86819c79511f60f4fc25f09ab186b7 (diff) |
[generic] Fix rss under Python 2.x and move test to extractor
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/generic.py | 10 | ||||
-rw-r--r-- | youtube_dl/utils.py | 15 |
2 files changed, 24 insertions, 1 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0ec23a365..44f7ea3fd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -341,6 +341,16 @@ class GenericIE(InfoExtractor): 'uploader': 'www.handjobhub.com', 'title': 'Busty Blonde Siri Tit Fuck While Wank at Handjob Hub', } + }, + # RSS feed + { + 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'info_dict': { + 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', + 'title': 'Zero Punctuation', + 'description': 're:' + }, + 'playlist_mincount': 11, } ] diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 53977cd2a..16bc7408a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1458,6 +1458,12 @@ def urlencode_postdata(*args, **kargs): return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii') +try: + etree_iter = xml.etree.ElementTree.Element.iter +except AttributeError: # Python <=2.6 + etree_iter = lambda n: n.findall('.//*') + + def parse_xml(s): class TreeBuilder(xml.etree.ElementTree.TreeBuilder): def doctype(self, name, pubid, system): @@ -1465,7 +1471,14 @@ def parse_xml(s): parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder()) kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {} - return xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) + tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs) + # Fix up XML parser in Python 2.x + if sys.version_info < (3, 0): + for n in etree_iter(tree): + if n.text is not None: + if not isinstance(n.text, compat_str): + n.text = n.text.decode('utf-8') + return tree if sys.version_info < (3, 0) and sys.platform == 'win32': |