aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2015-03-02 15:21:11 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2015-03-02 18:21:31 +0100
commit76c73715fb1e0eee61ace5ff7855d8237abdcd54 (patch)
tree18b355f14ccb055e1bc540f36b64a261a0deac78 /youtube_dl/extractor/generic.py
parentc75f0b361a1b00f6ac1298615d6fee101994b2b9 (diff)
[generic] Parse RSS enclosure URLs (Fixes #5091)
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py34
1 files changed, 29 insertions, 5 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 27e2bc300..5dc53685c 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -26,6 +26,7 @@ from ..utils import (
unsmuggle_url,
UnsupportedError,
url_basename,
+ xpath_text,
)
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
@@ -569,6 +570,16 @@ class GenericIE(InfoExtractor):
'title': 'John Carlson Postgame 2/25/15',
},
},
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ 'ext': 'm4v',
+ 'upload_date': '20150228',
+ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ }
+ }
]
def report_following_redirect(self, new_url):
@@ -580,11 +591,24 @@ class GenericIE(InfoExtractor):
playlist_desc_el = doc.find('./channel/description')
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
- entries = [{
- '_type': 'url',
- 'url': e.find('link').text,
- 'title': e.find('title').text,
- } for e in doc.findall('./channel/item')]
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = xpath_text(it, 'link', fatal=False)
+ if not next_url:
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
+ if not next_url:
+ continue
+
+ entries.append({
+ '_type': 'url',
+ 'url': next_url,
+ 'title': it.find('title').text,
+ })
return {
'_type': 'playlist',