diff options
| -rw-r--r-- | test/test_youtube_lists.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 35 | 
3 files changed, 44 insertions, 0 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 95f07d129..33db09f43 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -15,6 +15,7 @@ from youtube_dl.extractor import (      YoutubeIE,      YoutubeChannelIE,      YoutubeShowIE, +    YoutubeTopListIE,  ) @@ -116,5 +117,12 @@ class TestYoutubeLists(unittest.TestCase):          original_video = entries[0]          self.assertEqual(original_video['id'], 'rjFaenf1T-Y') +    def test_youtube_toplist(self): +        dl = FakeYDL() +        ie = YoutubeTopListIE(dl) +        result = ie.extract('yttoplist:music:Top Tracks') +        entries = result['entries'] +        self.assertTrue(len(entries) >= 9) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 664639b53..0abf86e44 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -194,6 +194,7 @@ from .youtube import (      YoutubeWatchLaterIE,      YoutubeFavouritesIE,      YoutubeHistoryIE, +    YoutubeTopListIE,  )  from .zdf import ZDFIE diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 765b4a9bf..a1a4d896d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1576,6 +1576,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id              # Mixes require a custom extraction process              return self._extract_mix(playlist_id) +        if playlist_id.startswith('TL'): +            raise ExtractorError(u'For downloading YouTube.com top lists, use ' +                u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)          # Extract the video ids from the playlist pages          ids = [] @@ -1598,6 +1601,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          return self.playlist_result(url_results, playlist_id, playlist_title) +class YoutubeTopListIE(YoutubePlaylistIE): +    IE_NAME = u'youtube:toplist' +    IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' +        u' (Example: "yttoplist:music:Top Tracks")') +    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        channel = mobj.group('chann') +        title = mobj.group('title') +        query = compat_urllib_parse.urlencode({'title': title}) +        playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) +        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) +        link = self._html_search_regex(playlist_re, channel_page, u'list') +        url = compat_urlparse.urljoin('https://www.youtube.com/', link) +         +        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' +        ids = [] +        # sometimes the webpage doesn't contain the videos +        # retry until we get them +        for i in itertools.count(0): +            msg = u'Downloading Youtube mix' +            if i > 0: +                msg += ', retry #%d' % i +            webpage = self._download_webpage(url, title, msg) +            ids = orderedSet(re.findall(video_re, webpage)) +            if ids: +                break +        url_results = self._ids_to_results(ids) +        return self.playlist_result(url_results, playlist_title=title) + +  class YoutubeChannelIE(InfoExtractor):      IE_DESC = u'YouTube.com channels'      _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"  | 
