diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2015-02-10 04:53:21 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2015-02-10 04:53:21 +0100 |
commit | bf8f082a90f4396a0a70464b55bf5c11fdb36c47 (patch) | |
tree | 9705cd92310dba38ab92fbdd7db405098614b49f | |
parent | 2f543a2142e05bac988ccd6566e0c9fc5cadca90 (diff) |
[vimeo:album] Add support for album passwords (Fixes #4917)
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 59 |
1 files changed, 52 insertions, 7 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1bb473514..303e81447 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): password_request = compat_urllib_request.Request(pass_url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Cookie', 'xsrft=%s' % token) - self._download_webpage(password_request, video_id, - 'Verifying the password', - 'Wrong password') + return self._download_webpage( + password_request, video_id, + 'Verifying the password', 'Wrong password') def _verify_player_video_password(self, url, video_id): password = self._downloader.params.get('videopassword', None) @@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') - if re.search('<form[^>]+?id="pw_form"', webpage) is not None: + if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None: self._verify_video_password(url, video_id, webpage) return self._real_extract(url) else: @@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor): def _extract_list_title(self, webpage): return self._html_search_regex(self._TITLE_RE, webpage, 'list title') + def _login_list_password(self, page_url, list_id, webpage): + login_form = self._search_regex( + r'(?s)<form[^>]+?id="pw_form"(.*?)</form>', + webpage, 'login form', default=None) + if not login_form: + return webpage + + password = self._downloader.params.get('videopassword', None) + if password is None: + raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) + fields = dict(re.findall(r'''(?x)<input\s+ + type="hidden"\s+ + name="([^"]+)"\s+ + value="([^"]*)" + ''', login_form)) + token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + fields['token'] = token + fields['password'] = password + post = compat_urllib_parse.urlencode(fields) + password_path = self._search_regex( + r'action="([^"]+)"', login_form, 'password URL') + password_url = compat_urlparse.urljoin(page_url, password_path) + password_request = compat_urllib_request.Request(password_url, post) + password_request.add_header('Content-type', 'application/x-www-form-urlencoded') + self._set_cookie('vimeo.com', 'xsrft', token) + + return self._download_webpage( + password_request, list_id, + 'Verifying the password', 'Wrong password') + def _extract_videos(self, list_id, base_url): video_ids = [] for pagenum in itertools.count(1): + page_url = self._page_url(base_url, pagenum) webpage = self._download_webpage( - self._page_url(base_url, pagenum), list_id, + page_url, list_id, 'Downloading page %s' % pagenum) + + if pagenum == 1: + webpage = self._login_list_password(page_url, list_id, webpage) + video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break @@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE): 'title': 'Staff Favorites: November 2013', }, 'playlist_mincount': 13, + }, { + 'note': 'Password-protected album', + 'url': 'https://vimeo.com/album/3253534', + 'info_dict': { + 'title': 'test', + 'id': '3253534', + }, + 'playlist_count': 1, + 'params': { + 'videopassword': 'youtube-dl', + } }] def _page_url(self, base_url, pagenum): return '%s/page:%d/' % (base_url, pagenum) def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - album_id = mobj.group('id') + album_id = self._match_id(url) return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) |