diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-12-09 04:49:32 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-12-09 04:49:32 +0100 |
commit | ffa8f0df0a878463078467709f615b1e57c61ec1 (patch) | |
tree | ca1d84977f876fcc72e6989cd4de60bc4f834167 /youtube_dl/extractor | |
parent | 693b8b2d310e119417787e7b06d1e1832d16f05d (diff) | |
parent | 0a688bc0b28c970e9af965b3fa0c7927507eeb97 (diff) |
Merge remote-tracking branch 'jaimeMF/yt-toplists'
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/youtube.py | 35 |
2 files changed, 36 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2b78cc84d..3f740baa1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -211,6 +211,7 @@ from .youtube import ( YoutubeWatchLaterIE, YoutubeFavouritesIE, YoutubeHistoryIE, + YoutubeTopListIE, ) from .zdf import ZDFIE diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7f7508c74..874429b78 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1572,6 +1572,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if playlist_id.startswith('RD'): # Mixes require a custom extraction process return self._extract_mix(playlist_id) + if playlist_id.startswith('TL'): + raise ExtractorError(u'For downloading YouTube.com top lists, use ' + u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) # Extract the video ids from the playlist pages ids = [] @@ -1594,6 +1597,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) +class YoutubeTopListIE(YoutubePlaylistIE): + IE_NAME = u'youtube:toplist' + IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' + u' (Example: "yttoplist:music:Top Tracks")') + _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel = mobj.group('chann') + title = mobj.group('title') + query = compat_urllib_parse.urlencode({'title': title}) + playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) + channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) + link = self._html_search_regex(playlist_re, channel_page, u'list') + url = compat_urlparse.urljoin('https://www.youtube.com/', link) + + video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' + ids = [] + # sometimes the webpage doesn't contain the videos + # retry until we get them + for i in itertools.count(0): + msg = u'Downloading Youtube mix' + if i > 0: + msg += ', retry #%d' % i + webpage = self._download_webpage(url, title, msg) + ids = orderedSet(re.findall(video_re, webpage)) + if ids: + break + url_results = self._ids_to_results(ids) + return self.playlist_result(url_results, playlist_title=title) + + class YoutubeChannelIE(InfoExtractor): IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" |