aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-06-23 01:28:36 +0700
committerSergey M․ <dstftw@gmail.com>2016-06-23 01:28:36 +0700
commitc8e3e0974b4ffb6792694336664f90eff38fc762 (patch)
treec81aea2d60aa649d1664fa0ca2cea06d134d19b5
parentdfc8f46e1c0e47a3b080d2e38d7d6da279f18fd2 (diff)
[vimeo:channel] Improve playlist extraction
-rw-r--r--youtube_dl/extractor/vimeo.py25
1 files changed, 21 insertions, 4 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 32490a8ed..26a3d9931 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -146,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
\.
)?
vimeo(?P<pro>pro)?\.com/
- (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/)
+ (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
(?:.*?/)?
(?:
(?:
@@ -315,6 +315,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
'only_matching': True,
},
{
+ 'url': 'https://vimeo.com/album/2632481/video/79010983',
+ 'only_matching': True,
+ },
+ {
# source file returns 403: Forbidden
'url': 'https://vimeo.com/7809605',
'only_matching': True,
@@ -651,8 +655,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
webpage = self._login_list_password(page_url, list_id, webpage)
yield self._extract_list_title(webpage)
- for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
- yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo', video_id=video_id)
+ # Try extracting href first since not all videos are available via
+ # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+ clips = re.findall(
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
+ if clips:
+ for video_id, video_url in clips:
+ yield self.url_result(
+ compat_urlparse.urljoin(base_url, video_url),
+ VimeoIE.ie_key(), video_id=video_id)
+ # More relaxed fallback
+ else:
+ for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+ yield self.url_result(
+ 'https://vimeo.com/%s' % video_id,
+ VimeoIE.ie_key(), video_id=video_id)
if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
break
@@ -689,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE):
class VimeoAlbumIE(VimeoChannelIE):
IE_NAME = 'vimeo:album'
- _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
+ _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)/?(?:$|[?#])'
_TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
_TESTS = [{
'url': 'https://vimeo.com/album/2632481',