aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/vimeo.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/vimeo.py')
-rw-r--r--youtube_dl/extractor/vimeo.py139
1 files changed, 71 insertions, 68 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index fa1b22049..f392ccf1c 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -8,15 +8,15 @@ import itertools
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
- compat_urllib_parse,
- compat_urllib_request,
compat_urlparse,
)
from ..utils import (
+ encode_dict,
ExtractorError,
InAdvancePagedList,
int_or_none,
RegexNotFoundError,
+ sanitized_Request,
smuggle_url,
std_headers,
unified_strdate,
@@ -40,17 +40,17 @@ class VimeoBaseInfoExtractor(InfoExtractor):
self.report_login()
webpage = self._download_webpage(self._LOGIN_URL, None, False)
token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = urlencode_postdata({
+ data = urlencode_postdata(encode_dict({
'action': 'login',
'email': username,
'password': password,
'service': 'vimeo',
'token': token,
- })
- login_request = compat_urllib_request.Request(self._LOGIN_URL, data)
+ }))
+ login_request = sanitized_Request(self._LOGIN_URL, data)
login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- login_request.add_header('Cookie', 'vuid=%s' % vuid)
login_request.add_header('Referer', self._LOGIN_URL)
+ self._set_vimeo_cookie('vuid', vuid)
self._download_webpage(login_request, None, False, 'Wrong login info')
def _extract_xsrft_and_vuid(self, webpage):
@@ -62,6 +62,9 @@ class VimeoBaseInfoExtractor(InfoExtractor):
webpage, 'vuid', group='vuid')
return xsrft, vuid
+ def _set_vimeo_cookie(self, name, value):
+ self._set_cookie('vimeo.com', name, value)
+
class VimeoIE(VimeoBaseInfoExtractor):
"""Information extractor for vimeo.com."""
@@ -133,7 +136,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'user18948128',
'uploader': 'Jaime Marquínez Ferrándiz',
'duration': 10,
- 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.',
+ 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026',
},
'params': {
'videopassword': 'youtube-dl',
@@ -181,6 +184,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_id': 'user28849593',
},
},
+ {
+ 'url': 'https://vimeo.com/109815029',
+ 'note': 'Video not completely processed, "failed" seed status',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
+ 'only_matching': True,
+ },
]
@staticmethod
@@ -203,17 +215,17 @@ class VimeoIE(VimeoBaseInfoExtractor):
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = urlencode_postdata({
+ data = urlencode_postdata(encode_dict({
'password': password,
'token': token,
- })
+ }))
if url.startswith('http://'):
# vimeo only supports https now, but the user can give an http url
url = url.replace('http://', 'https://')
- password_request = compat_urllib_request.Request(url + '/password', data)
+ password_request = sanitized_Request(url + '/password', data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Cookie', 'clip_test2=1; vuid=%s' % vuid)
password_request.add_header('Referer', url)
+ self._set_vimeo_cookie('vuid', vuid)
return self._download_webpage(
password_request, video_id,
'Verifying the password', 'Wrong password')
@@ -222,9 +234,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
password = self._downloader.params.get('videopassword', None)
if password is None:
raise ExtractorError('This video is protected by a password, use the --video-password option')
- data = compat_urllib_parse.urlencode({'password': password})
+ data = urlencode_postdata(encode_dict({'password': password}))
pass_url = url + '/check-password'
- password_request = compat_urllib_request.Request(pass_url, data)
+ password_request = sanitized_Request(pass_url, data)
password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
return self._download_json(
password_request, video_id,
@@ -253,7 +265,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
url = 'https://vimeo.com/' + video_id
# Retrieve video webpage to extract further information
- request = compat_urllib_request.Request(url, None, headers)
+ request = sanitized_Request(url, None, headers)
try:
webpage = self._download_webpage(request, video_id)
except ExtractorError as ee:
@@ -273,20 +285,30 @@ class VimeoIE(VimeoBaseInfoExtractor):
self.report_extraction(video_id)
vimeo_config = self._search_regex(
- r'vimeo\.config\s*=\s*({.+?});', webpage,
+ r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
'vimeo config', default=None)
if vimeo_config:
seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
if seed_status.get('state') == 'failed':
raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, seed_status['title']),
+ '%s said: %s' % (self.IE_NAME, seed_status['title']),
expected=True)
# Extract the config JSON
try:
try:
config_url = self._html_search_regex(
- r' data-config-url="(.+?)"', webpage, 'config URL')
+ r' data-config-url="(.+?)"', webpage,
+ 'config URL', default=None)
+ if not config_url:
+ # Sometimes new react-based page is served instead of old one that require
+ # different config URL extraction approach (see
+ # https://github.com/rg3/youtube-dl/pull/7209)
+ vimeo_clip_page_config = self._search_regex(
+ r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
+ 'vimeo clip page config')
+ config_url = self._parse_json(
+ vimeo_clip_page_config, video_id)['player']['config_url']
config_json = self._download_webpage(config_url, video_id)
config = json.loads(config_json)
except RegexNotFoundError:
@@ -369,47 +391,29 @@ class VimeoIE(VimeoBaseInfoExtractor):
like_count = None
comment_count = None
- # Vimeo specific: extract request signature and timestamp
- sig = config['request']['signature']
- timestamp = config['request']['timestamp']
-
- # Vimeo specific: extract video codec and quality information
- # First consider quality, then codecs, then take everything
- codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')]
- files = {'hd': [], 'sd': [], 'other': []}
- config_files = config["video"].get("files") or config["request"].get("files")
- for codec_name, codec_extension in codecs:
- for quality in config_files.get(codec_name, []):
- format_id = '-'.join((codec_name, quality)).lower()
- key = quality if quality in files else 'other'
- video_url = None
- if isinstance(config_files[codec_name], dict):
- file_info = config_files[codec_name][quality]
- video_url = file_info.get('url')
- else:
- file_info = {}
- if video_url is None:
- video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- % (video_id, sig, timestamp, quality, codec_name.upper())
-
- files[key].append({
- 'ext': codec_extension,
- 'url': video_url,
- 'format_id': format_id,
- 'width': int_or_none(file_info.get('width')),
- 'height': int_or_none(file_info.get('height')),
- 'tbr': int_or_none(file_info.get('bitrate')),
- })
formats = []
- m3u8_url = config_files.get('hls', {}).get('all')
+ config_files = config['video'].get('files') or config['request'].get('files', {})
+ for f in config_files.get('progressive', []):
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http-%s' % f.get('quality'),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'fps': int_or_none(f.get('fps')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ })
+ m3u8_url = config_files.get('hls', {}).get('url')
if m3u8_url:
m3u8_formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', 0, 'hls', fatal=False)
if m3u8_formats:
formats.extend(m3u8_formats)
- for key in ('other', 'sd', 'hd'):
- formats += files[key]
- self._sort_formats(formats)
+ # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified. This lead to wrong sorting.
+ self._sort_formats(formats, field_preference=('height', 'width', 'fps', 'format_id'))
subtitles = {}
text_tracks = config['request'].get('text_tracks')
@@ -473,21 +477,20 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
token, vuid = self._extract_xsrft_and_vuid(webpage)
fields['token'] = token
fields['password'] = password
- post = urlencode_postdata(fields)
+ post = urlencode_postdata(encode_dict(fields))
password_path = self._search_regex(
r'action="([^"]+)"', login_form, 'password URL')
password_url = compat_urlparse.urljoin(page_url, password_path)
- password_request = compat_urllib_request.Request(password_url, post)
+ password_request = sanitized_Request(password_url, post)
password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
- password_request.add_header('Cookie', 'vuid=%s' % vuid)
- self._set_cookie('vimeo.com', 'xsrft', token)
+ self._set_vimeo_cookie('vuid', vuid)
+ self._set_vimeo_cookie('xsrft', token)
return self._download_webpage(
password_request, list_id,
'Verifying the password', 'Wrong password')
- def _extract_videos(self, list_id, base_url):
- video_ids = []
+ def _title_and_entries(self, list_id, base_url):
for pagenum in itertools.count(1):
page_url = self._page_url(base_url, pagenum)
webpage = self._download_webpage(
@@ -496,18 +499,18 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
if pagenum == 1:
webpage = self._login_list_password(page_url, list_id, webpage)
+ yield self._extract_list_title(webpage)
+
+ for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
+ yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
- video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
- entries = [self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
- for video_id in video_ids]
- return {'_type': 'playlist',
- 'id': list_id,
- 'title': self._extract_list_title(webpage),
- 'entries': entries,
- }
+ def _extract_videos(self, list_id, base_url):
+ title_and_entries = self._title_and_entries(list_id, base_url)
+ list_title = next(title_and_entries)
+ return self.playlist_result(title_and_entries, list_id, list_title)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
@@ -568,7 +571,7 @@ class VimeoAlbumIE(VimeoChannelIE):
class VimeoGroupsIE(VimeoAlbumIE):
IE_NAME = 'vimeo:group'
- _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
_TESTS = [{
'url': 'https://vimeo.com/groups/rolexawards',
'info_dict': {
@@ -637,7 +640,7 @@ class VimeoWatchLaterIE(VimeoChannelIE):
def _page_url(self, base_url, pagenum):
url = '%s/page:%d/' % (base_url, pagenum)
- request = compat_urllib_request.Request(url)
+ request = sanitized_Request(url)
# Set the header to get a partial html page with the ids,
# the normal page doesn't contain them.
request.add_header('X-Requested-With', 'XMLHttpRequest')