diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-06-23 01:28:36 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-06-23 01:28:36 +0700 |
commit | c8e3e0974b4ffb6792694336664f90eff38fc762 (patch) | |
tree | c81aea2d60aa649d1664fa0ca2cea06d134d19b5 /youtube_dl/extractor | |
parent | dfc8f46e1c0e47a3b080d2e38d7d6da279f18fd2 (diff) |
[vimeo:channel] Improve playlist extraction
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/vimeo.py | 25 |
1 files changed, 21 insertions, 4 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 32490a8ed..26a3d9931 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -146,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor): \. )? vimeo(?P<pro>pro)?\.com/ - (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/) + (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -315,6 +315,10 @@ class VimeoIE(VimeoBaseInfoExtractor): 'only_matching': True, }, { + 'url': 'https://vimeo.com/album/2632481/video/79010983', + 'only_matching': True, + }, + { # source file returns 403: Forbidden 'url': 'https://vimeo.com/7809605', 'only_matching': True, @@ -651,8 +655,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): webpage = self._login_list_password(page_url, list_id, webpage) yield self._extract_list_title(webpage) - for video_id in re.findall(r'id="clip_(\d+?)"', webpage): - yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo', video_id=video_id) + # Try extracting href first since not all videos are available via + # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) + clips = re.findall( + r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage) + if clips: + for video_id, video_url in clips: + yield self.url_result( + compat_urlparse.urljoin(base_url, video_url), + VimeoIE.ie_key(), video_id=video_id) + # More relaxed fallback + else: + for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): + yield self.url_result( + 'https://vimeo.com/%s' % video_id, + VimeoIE.ie_key(), video_id=video_id) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break @@ -689,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE): class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)' + _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)/?(?:$|[?#])' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' _TESTS = [{ 'url': 'https://vimeo.com/album/2632481', |