aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/vimeo.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/vimeo.py')
-rw-r--r--youtube_dl/extractor/vimeo.py138
1 files changed, 108 insertions, 30 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index c52986af6..7e854f326 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -16,6 +16,7 @@ from ..utils import (
ExtractorError,
InAdvancePagedList,
int_or_none,
+ NO_DEFAULT,
RegexNotFoundError,
sanitized_Request,
smuggle_url,
@@ -56,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info')
+ def _verify_video_password(self, url, video_id, webpage):
+ password = self._downloader.params.get('videopassword')
+ if password is None:
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ data = urlencode_postdata({
+ 'password': password,
+ 'token': token,
+ })
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ password_request = sanitized_Request(url + '/password', data)
+ password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ password_request.add_header('Referer', url)
+ self._set_vimeo_cookie('vuid', vuid)
+ return self._download_webpage(
+ password_request, video_id,
+ 'Verifying the password', 'Wrong password')
+
def _extract_xsrft_and_vuid(self, webpage):
xsrft = self._search_regex(
r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
@@ -146,7 +167,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
\.
)?
vimeo(?P<pro>pro)?\.com/
- (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
+ (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)?
(?:
(?:
@@ -227,8 +248,6 @@ class VimeoIE(VimeoBaseInfoExtractor):
{
'url': 'http://vimeo.com/channels/keypeele/75629013',
'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
- 'note': 'Video is freely available via original URL '
- 'and protected with password when accessed via http://vimeo.com/75629013',
'info_dict': {
'id': '75629013',
'ext': 'mp4',
@@ -272,7 +291,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
{
# contains original format
'url': 'https://vimeo.com/33951933',
- 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
+ 'md5': '2d9f5475e0537f013d0073e812ab89e6',
'info_dict': {
'id': '33951933',
'ext': 'mp4',
@@ -285,6 +304,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
},
},
{
+ # only available via https://vimeo.com/channels/tributes/6213729 and
+ # not via https://vimeo.com/6213729
+ 'url': 'https://vimeo.com/channels/tributes/6213729',
+ 'info_dict': {
+ 'id': '6213729',
+ 'ext': 'mp4',
+ 'title': 'Vimeo Tribute: The Shining',
+ 'uploader': 'Casey Donahue',
+ 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue',
+ 'uploader_id': 'caseydonahue',
+ 'upload_date': '20090821',
+ 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
+ 'only_matching': True,
+ },
+ {
'url': 'https://vimeo.com/109815029',
'note': 'Video not completely processed, "failed" seed status',
'only_matching': True,
@@ -294,6 +336,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'only_matching': True,
},
{
+ 'url': 'https://vimeo.com/album/2632481/video/79010983',
+ 'only_matching': True,
+ },
+ {
# source file returns 403: Forbidden
'url': 'https://vimeo.com/7809605',
'only_matching': True,
@@ -318,26 +364,11 @@ class VimeoIE(VimeoBaseInfoExtractor):
r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
if mobj:
return mobj.group(1)
-
- def _verify_video_password(self, url, video_id, webpage):
- password = self._downloader.params.get('videopassword')
- if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = urlencode_postdata({
- 'password': password,
- 'token': token,
- })
- if url.startswith('http://'):
- # vimeo only supports https now, but the user can give an http url
- url = url.replace('http://', 'https://')
- password_request = sanitized_Request(url + '/password', data)
- password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Referer', url)
- self._set_vimeo_cookie('vuid', vuid)
- return self._download_webpage(
- password_request, video_id,
- 'Verifying the password', 'Wrong password')
+ # Look more for non-standard embedded Vimeo player
+ mobj = re.search(
+ r'<video[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)(?P=q1)', webpage)
+ if mobj:
+ return mobj.group('url')
def _verify_player_video_password(self, url, video_id):
password = self._downloader.params.get('videopassword')
@@ -369,7 +400,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
orig_url = url
if mobj.group('pro') or mobj.group('player'):
url = 'https://player.vimeo.com/video/' + video_id
- else:
+ elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
@@ -630,8 +661,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
webpage = self._login_list_password(page_url, list_id, webpage)
yield self._extract_list_title(webpage)
- for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
- yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
+ # Try extracting href first since not all videos are available via
+ # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+ clips = re.findall(
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
+ if clips:
+ for video_id, video_url in clips:
+ yield self.url_result(
+ compat_urlparse.urljoin(base_url, video_url),
+ VimeoIE.ie_key(), video_id=video_id)
+ # More relaxed fallback
+ else:
+ for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+ yield self.url_result(
+ 'https://vimeo.com/%s' % video_id,
+ VimeoIE.ie_key(), video_id=video_id)
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
@@ -668,7 +712,7 @@ class VimeoUserIE(VimeoChannelIE):
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
- _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
+ _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'https://vimeo.com/album/2632481',
@@ -688,6 +732,13 @@ class VimeoAlbumIE(VimeoChannelIE):
'params': {
'videopassword': 'youtube-dl',
}
+ }, {
+ 'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail',
+ 'only_matching': True,
+ }, {
+ # TODO: respect page number
+ 'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail',
+ 'only_matching': True,
}]
def _page_url(self, base_url, pagenum):
@@ -746,12 +797,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
'thumbnail': 're:^https?://.*\.jpg$',
'uploader_id': 'user22258446',
}
+ }, {
+ 'note': 'Password protected',
+ 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
+ 'info_dict': {
+ 'id': '138823582',
+ 'ext': 'mp4',
+ 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
+ 'uploader': 'TMB',
+ 'uploader_id': 'user37284429',
+ },
+ 'params': {
+ 'videopassword': 'holygrail',
+ },
}]
+ def _real_initialize(self):
+ self._login()
+
+ def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
+ webpage = self._download_webpage(webpage_url, video_id)
+ config_url = self._html_search_regex(
+ r'data-config-url="([^"]+)"', webpage, 'config URL',
+ default=NO_DEFAULT if video_password_verified else None)
+ if config_url is None:
+ self._verify_video_password(webpage_url, video_id, webpage)
+ config_url = self._get_config_url(
+ webpage_url, video_id, video_password_verified=True)
+ return config_url
+
def _real_extract(self, url):
video_id = self._match_id(url)
- config = self._download_json(
- 'https://player.vimeo.com/video/%s/config' % video_id, video_id)
+ config_url = self._get_config_url(url, video_id)
+ config = self._download_json(config_url, video_id)
info_dict = self._parse_config(config, video_id)
self._vimeo_sort_formats(info_dict['formats'])
info_dict['id'] = video_id