diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2014-02-20 13:14:05 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2014-02-20 13:14:09 +0100 | 
| commit | 4fc946b546c2a471774646f7da291105f8a0cb99 (patch) | |
| tree | 06fc9716e83bcb2e92a30a3ebf3bad58267e5d1e | |
| parent | 280bc5dad651728e493b3b25a672a9aaef590683 (diff) | |
[generic] Add support for RSS feeds (Fixes #667)
| -rw-r--r-- | test/test_playlists.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 28 | 
2 files changed, 37 insertions, 0 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py index 1de9e8ec1..25bec9f1c 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['title'], 'python language')          self.assertTrue(len(result['entries']) == 15) +    def test_generic_rss_feed(self): +        dl = FakeYDL() +        ie = GenericIE(dl) +        result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml') +        self.assertEqual(result['title'], 'Zero Punctuation') +        self.assertTrue(len(result['entries']) > 10) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5bcc78bf7..30160d59d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals  import os  import re +import xml.etree.ElementTree  from .common import InfoExtractor  from .youtube import YoutubeIE @@ -159,6 +160,25 @@ class GenericIE(InfoExtractor):              raise ExtractorError('Invalid URL protocol')          return response +    def _extract_rss(self, url, video_id, doc): +        playlist_title = doc.find('./channel/title').text +        playlist_desc_el = doc.find('./channel/description') +        playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text + +        entries = [{ +            '_type': 'url', +            'url': e.find('link').text, +            'title': e.find('title').text, +        } for e in doc.findall('./channel/item')] + +        return { +            '_type': 'playlist', +            'id': url, +            'title': playlist_title, +            'description': playlist_desc, +            'entries': entries, +        } +      def _real_extract(self, url):          parsed_url = compat_urlparse.urlparse(url)          if not parsed_url.scheme: @@ -219,6 +239,14 @@ class GenericIE(InfoExtractor):          self.report_extraction(video_id) +        # Is it an RSS feed? +        try: +            doc = xml.etree.ElementTree.fromstring(webpage) +            if doc.tag == 'rss': +                return self._extract_rss(url, video_id, doc) +        except xml.etree.ElementTree.ParseError: +            pass +          # it's tempting to parse this further, but you would          # have to take into account all the variations like          #   Video Title - Site Name  | 
