[vimeo] add an extractor for channels

author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-07-29 13:12:09 +0200
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-07-29 13:12:09 +0200
commit: caeefc29ebce8dbfb0c25a79887719055276c9eb (patch)
tree: ad086bcdc90395225fc6cd88ada58e293542521f /youtube_dl/extractor
parent: a3c736def2058547e2c9e9fa4013f7c547c72c55 (diff)
2 files changed, 30 insertions, 1 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 063a3d065..9f4dd3c24 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -71,7 +71,7 @@ from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
-from .vimeo import VimeoIE
+from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .c56 import C56IE
 from .wat import WatIE
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index ac32043c1..cc9c8d018 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -1,5 +1,6 @@
 import json
 import re
+import itertools
 
 from .common import InfoExtractor
 from ..utils import (
@@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
             'thumbnail':    video_thumbnail,
             'description':  video_description,
         }]
+
+
+class VimeoChannelIE(InfoExtractor):
+    IE_NAME = u'vimeo:channel'
+    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
+    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        channel_id =  mobj.group('id')
+        video_ids = []
+
+        for pagenum in itertools.count(1):
+            webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
+                                             channel_id, u'Downloading page %s' % pagenum)
+            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
+            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+                break
+
+        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+                   for video_id in video_ids]
+        channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
+                                                webpage, u'channel title')
+        return {'_type': 'playlist',
+                'id': channel_id,
+                'title': channel_title,
+                'entries': entries,
+                }
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-07-29 13:12:09 +0200
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-07-29 13:12:09 +0200
commit	caeefc29ebce8dbfb0c25a79887719055276c9eb (patch)
tree	ad086bcdc90395225fc6cd88ada58e293542521f /youtube_dl/extractor
parent	a3c736def2058547e2c9e9fa4013f7c547c72c55 (diff)