diff options
| -rw-r--r-- | youtube_dl/extractor/wimp.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 30 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
3 files changed, 25 insertions, 22 deletions
| diff --git a/youtube_dl/extractor/wimp.py b/youtube_dl/extractor/wimp.py index 9a6bb0c76..79fd53e0c 100644 --- a/youtube_dl/extractor/wimp.py +++ b/youtube_dl/extractor/wimp.py @@ -6,14 +6,15 @@ from .common import InfoExtractor  class WimpIE(InfoExtractor): -    _VALID_URL = r'(?:http://)?(?:www\.)?wimp\.com/([^/]+)/' +    _VALID_URL = r'http://(?:www\.)?wimp\.com/([^/]+)/'      _TEST = { -        'url': 'http://www.wimp.com/deerfence/', -        'file': 'deerfence.flv', -        'md5': '8b215e2e0168c6081a1cf84b2846a2b5', +        'url': 'http://www.wimp.com/maruexhausted/', +        'md5': 'f1acced123ecb28d9bb79f2479f2b6a1',          'info_dict': { -            "title": "Watch Till End: Herd of deer jump over a fence.", -            "description": "These deer look as fluid as running water when they jump over this fence as a herd. This video is one that needs to be watched until the very end for the true majesty to be witnessed, but once it comes, it's sure to take your breath away.", +            'id': 'maruexhausted', +            'ext': 'flv', +            'title': 'Maru is exhausted.', +            'description': 'md5:57e099e857c0a4ea312542b684a869b8',          }      } @@ -30,4 +31,4 @@ class WimpIE(InfoExtractor):              'title': self._og_search_title(webpage),              'thumbnail': self._og_search_thumbnail(webpage),              'description': self._og_search_description(webpage), -        } +        }
\ No newline at end of file diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5b0d30ed1..49cca4c63 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1457,9 +1457,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):                       |                          ((?:PL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,})                       )""" -    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' +    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'      _MORE_PAGES_INDICATOR = r'data-link-type="next"' -    _VIDEO_RE = r'href="/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' +    _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)'      IE_NAME = u'youtube:playlist'      def _real_initialize(self): @@ -1507,29 +1507,31 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):              raise ExtractorError(u'For downloading YouTube.com top lists, use '                  u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) +        url = self._TEMPLATE_URL % playlist_id +        page = self._download_webpage(url, playlist_id) +        more_widget_html = content_html = page +          # Extract the video ids from the playlist pages          ids = []          for page_num in itertools.count(1): -            url = self._TEMPLATE_URL % (playlist_id, page_num) -            page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) -            matches = re.finditer(self._VIDEO_RE, page) +            matches = re.finditer(self._VIDEO_RE, content_html)              # We remove the duplicates and the link with index 0              # (it's not the first video of the playlist)              new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0')              ids.extend(new_ids) -            if re.search(self._MORE_PAGES_INDICATOR, page) is None: +            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) +            if not mobj:                  break -        try: -            playlist_title = self._og_search_title(page) -        except RegexNotFoundError: -            self.report_warning( -                u'Playlist page is missing OpenGraph title, falling back ...', -                playlist_id) -            playlist_title = self._html_search_regex( -                r'<h1 class="pl-header-title">(.*?)</h1>', page, u'title') +            more = self._download_json( +                'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num) +            content_html = more['content_html'] +            more_widget_html = more['load_more_widget_html'] + +        playlist_title = self._html_search_regex( +                r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')          url_results = self._ids_to_results(ids)          return self.playlist_result(url_results, playlist_id, playlist_title) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index b722f8175..c2660a316 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.02.20' +__version__ = '2014.02.21' | 
