diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2015-03-02 15:21:11 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2015-03-02 18:21:31 +0100 |
commit | 76c73715fb1e0eee61ace5ff7855d8237abdcd54 (patch) | |
tree | 18b355f14ccb055e1bc540f36b64a261a0deac78 /youtube_dl/extractor/generic.py | |
parent | c75f0b361a1b00f6ac1298615d6fee101994b2b9 (diff) |
[generic] Parse RSS enclosure URLs (Fixes #5091)
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r-- | youtube_dl/extractor/generic.py | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 27e2bc300..5dc53685c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, + xpath_text, ) from .brightcove import BrightcoveIE from .ooyala import OoyalaIE @@ -569,6 +570,16 @@ class GenericIE(InfoExtractor): 'title': 'John Carlson Postgame 2/25/15', }, }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + } ] def report_following_redirect(self, new_url): @@ -580,11 +591,24 @@ class GenericIE(InfoExtractor): playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - entries = [{ - '_type': 'url', - 'url': e.find('link').text, - 'title': e.find('title').text, - } for e in doc.findall('./channel/item')] + entries = [] + for it in doc.findall('./channel/item'): + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not next_url: + continue + + entries.append({ + '_type': 'url', + 'url': next_url, + 'title': it.find('title').text, + }) return { '_type': 'playlist', |