1 files changed, 98 insertions, 20 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index bc01d7fbf..06b0bed41 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -7,18 +7,20 @@ import itertools
 
 from .common import InfoExtractor
 from .subtitles import SubtitlesInfoExtractor
-from ..utils import (
+from ..compat import (
     compat_HTTPError,
     compat_urllib_parse,
     compat_urllib_request,
-    clean_html,
-    get_element_by_attribute,
+    compat_urlparse,
+)
+from ..utils import (
     ExtractorError,
+    InAdvancePagedList,
+    int_or_none,
     RegexNotFoundError,
     std_headers,
     unsmuggle_url,
     urlencode_postdata,
-    int_or_none,
 )
 
 
@@ -54,7 +56,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
 
     # _VALID_URL matches Vimeo URLs
     _VALID_URL = r'''(?x)
-        (?P<proto>(?:https?:)?//)?
+        https?://
         (?:(?:www|(?P<player>player))\.)?
         vimeo(?P<pro>pro)?\.com/
         (?!channels/[^/?#]+/?(?:$|[?#])|album/)
@@ -89,6 +91,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 'uploader_id': 'openstreetmapus',
                 'uploader': 'OpenStreetMap US',
                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+                'description': 'md5:380943ec71b89736ff4bf27183233d09',
                 'duration': 1595,
             },
         },
@@ -103,6 +106,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 'uploader': 'The BLN & Business of Software',
                 'uploader_id': 'theblnbusinessofsoftware',
                 'duration': 3610,
+                'description': None,
             },
         },
         {
@@ -117,6 +121,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 'uploader_id': 'user18948128',
                 'uploader': 'Jaime Marquínez Ferrándiz',
                 'duration': 10,
+                'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.',
             },
             'params': {
                 'videopassword': 'youtube-dl',
@@ -152,6 +157,18 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 'duration': 62,
             }
         },
+        {
+            # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+            'url': 'https://player.vimeo.com/video/98044508',
+            'note': 'The js code contains assignments to the same variable as the config',
+            'info_dict': {
+                'id': '98044508',
+                'ext': 'mp4',
+                'title': 'Pier Solar OUYA Official Trailer',
+                'uploader': 'Tulio Gonçalves',
+                'uploader_id': 'user28849593',
+            },
+        },
     ]
 
     def _verify_video_password(self, url, video_id, webpage):
@@ -203,6 +220,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
         # Extract ID from URL
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('id')
+        orig_url = url
         if mobj.group('pro') or mobj.group('player'):
             url = 'http://player.vimeo.com/video/' + video_id
 
@@ -238,11 +256,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 # We try to find out to which variable is assigned the config dic
                 m_variable_name = re.search('(\w)\.video\.id', webpage)
                 if m_variable_name is not None:
-                    config_re = r'%s=({.+?});' % re.escape(m_variable_name.group(1))
+                    config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
                 else:
                     config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
                 config = self._search_regex(config_re, webpage, 'info section',
-                    flags=re.DOTALL)
+                                            flags=re.DOTALL)
                 config = json.loads(config)
         except Exception as e:
             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
@@ -273,18 +291,23 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
                 _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
 
         # Extract video description
-        video_description = None
-        try:
-            video_description = get_element_by_attribute("class", "description_wrapper", webpage)
-            if video_description:
-                video_description = clean_html(video_description)
-        except AssertionError as err:
-            # On some pages like (http://player.vimeo.com/video/54469442) the
-            # html tags are not closed, python 2.6 cannot handle it
-            if err.args[0] == 'we should not get here!':
-                pass
-            else:
-                raise
+
+        video_description = self._html_search_regex(
+            r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
+            webpage, 'description', default=None)
+        if not video_description:
+            video_description = self._html_search_meta(
+                'description', webpage, default=None)
+        if not video_description and mobj.group('pro'):
+            orig_webpage = self._download_webpage(
+                orig_url, video_id,
+                note='Downloading webpage for description',
+                fatal=False)
+            if orig_webpage:
+                video_description = self._html_search_meta(
+                    'description', orig_webpage, default=None)
+        if not video_description and not mobj.group('player'):
+            self._downloader.report_warning('Cannot find video description')
 
         # Extract video duration
         video_duration = int_or_none(config["video"].get("duration"))
@@ -491,7 +514,7 @@ class VimeoReviewIE(InfoExtractor):
         'info_dict': {
             'id': '91613211',
             'ext': 'mp4',
-            'title': 'Death by dogma versus assembling agile - Sander Hoogendoorn',
+            'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
             'uploader': 'DevWeek Events',
             'duration': 2773,
             'thumbnail': 're:^https?://.*\.jpg$',
@@ -529,3 +552,58 @@ class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE):
 
     def _real_extract(self, url):
         return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater')
+
+
+class VimeoLikesIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
+    IE_NAME = 'vimeo:likes'
+    IE_DESC = 'Vimeo user likes'
+    _TEST = {
+        'url': 'https://vimeo.com/user755559/likes/',
+        'playlist_mincount': 293,
+        "info_dict": {
+            "description": "See all the videos urza likes",
+            "title": 'Videos urza likes',
+        },
+    }
+
+    def _real_extract(self, url):
+        user_id = self._match_id(url)
+        webpage = self._download_webpage(url, user_id)
+        page_count = self._int(
+            self._search_regex(
+                r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
+                    .*?</a></li>\s*<li\s+class="pagination_next">
+                ''', webpage, 'page count'),
+            'page count', fatal=True)
+        PAGE_SIZE = 12
+        title = self._html_search_regex(
+            r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False)
+        description = self._html_search_meta('description', webpage)
+
+        def _get_page(idx):
+            page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % (
+                self.http_scheme(), user_id, idx + 1)
+            webpage = self._download_webpage(
+                page_url, user_id,
+                note='Downloading page %d/%d' % (idx + 1, page_count))
+            video_list = self._search_regex(
+                r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>',
+                webpage, 'video content')
+            paths = re.findall(
+                r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list)
+            for path in paths:
+                yield {
+                    '_type': 'url',
+                    'url': compat_urlparse.urljoin(page_url, path),
+                }
+
+        pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+
+        return {
+            '_type': 'playlist',
+            'id': 'user%s_likes' % user_id,
+            'title': title,
+            'description': description,
+            'entries': pl,
+        }