aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-02-20 13:14:05 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-02-20 13:14:09 +0100
commit4fc946b546c2a471774646f7da291105f8a0cb99 (patch)
tree06fc9716e83bcb2e92a30a3ebf3bad58267e5d1e
parent280bc5dad651728e493b3b25a672a9aaef590683 (diff)
downloadyoutube-dl-4fc946b546c2a471774646f7da291105f8a0cb99.tar.xz
[generic] Add support for RSS feeds (Fixes #667)
-rw-r--r--test/test_playlists.py9
-rw-r--r--youtube_dl/extractor/generic.py28
2 files changed, 37 insertions, 0 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py
index 1de9e8ec1..25bec9f1c 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'python language')
self.assertTrue(len(result['entries']) == 15)
+ def test_generic_rss_feed(self):
+ dl = FakeYDL()
+ ie = GenericIE(dl)
+ result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
+ self.assertEqual(result['title'], 'Zero Punctuation')
+ self.assertTrue(len(result['entries']) > 10)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 5bcc78bf7..30160d59d 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
import os
import re
+import xml.etree.ElementTree
from .common import InfoExtractor
from .youtube import YoutubeIE
@@ -159,6 +160,25 @@ class GenericIE(InfoExtractor):
raise ExtractorError('Invalid URL protocol')
return response
+ def _extract_rss(self, url, video_id, doc):
+ playlist_title = doc.find('./channel/title').text
+ playlist_desc_el = doc.find('./channel/description')
+ playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
+
+ entries = [{
+ '_type': 'url',
+ 'url': e.find('link').text,
+ 'title': e.find('title').text,
+ } for e in doc.findall('./channel/item')]
+
+ return {
+ '_type': 'playlist',
+ 'id': url,
+ 'title': playlist_title,
+ 'description': playlist_desc,
+ 'entries': entries,
+ }
+
def _real_extract(self, url):
parsed_url = compat_urlparse.urlparse(url)
if not parsed_url.scheme:
@@ -219,6 +239,14 @@ class GenericIE(InfoExtractor):
self.report_extraction(video_id)
+ # Is it an RSS feed?
+ try:
+ doc = xml.etree.ElementTree.fromstring(webpage)
+ if doc.tag == 'rss':
+ return self._extract_rss(url, video_id, doc)
+ except xml.etree.ElementTree.ParseError:
+ pass
+
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name