diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-30 14:56:51 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-30 14:56:51 +0100 |
commit | 0a688bc0b28c970e9af965b3fa0c7927507eeb97 (patch) | |
tree | c4f6db17e33552a5dd58699ebe0e73bfaa284bb8 /youtube_dl/extractor/youtube.py | |
parent | b138de72f2f0fc197fe46154bcaeceddb5713e7f (diff) |
[youtube] Add support for downloading top lists (fixes #1868)
It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie.
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 765b4a9bf..a1a4d896d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1576,6 +1576,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): if len(playlist_id) == 13: # 'RD' + 11 characters for the video id # Mixes require a custom extraction process return self._extract_mix(playlist_id) + if playlist_id.startswith('TL'): + raise ExtractorError(u'For downloading YouTube.com top lists, use ' + u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True) # Extract the video ids from the playlist pages ids = [] @@ -1598,6 +1601,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self.playlist_result(url_results, playlist_id, playlist_title) +class YoutubeTopListIE(YoutubePlaylistIE): + IE_NAME = u'youtube:toplist' + IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"' + u' (Example: "yttoplist:music:Top Tracks")') + _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel = mobj.group('chann') + title = mobj.group('title') + query = compat_urllib_parse.urlencode({'title': title}) + playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query) + channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) + link = self._html_search_regex(playlist_re, channel_page, u'list') + url = compat_urlparse.urljoin('https://www.youtube.com/', link) + + video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' + ids = [] + # sometimes the webpage doesn't contain the videos + # retry until we get them + for i in itertools.count(0): + msg = u'Downloading Youtube mix' + if i > 0: + msg += ', retry #%d' % i + webpage = self._download_webpage(url, title, msg) + ids = orderedSet(re.findall(video_re, webpage)) + if ids: + break + url_results = self._ids_to_results(ids) + return self.playlist_result(url_results, playlist_title=title) + + class YoutubeChannelIE(InfoExtractor): IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" |