aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-30 14:56:51 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-30 14:56:51 +0100
commit0a688bc0b28c970e9af965b3fa0c7927507eeb97 (patch)
treec4f6db17e33552a5dd58699ebe0e73bfaa284bb8 /youtube_dl/extractor/youtube.py
parentb138de72f2f0fc197fe46154bcaeceddb5713e7f (diff)
[youtube] Add support for downloading top lists (fixes #1868)
It needs to know the channel and the title of the list, because the ids change every time you browse the channels and are attached to a 'VISITOR_INFO1_LIVE' cookie.
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py35
1 files changed, 35 insertions, 0 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 765b4a9bf..a1a4d896d 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1576,6 +1576,9 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
# Mixes require a custom extraction process
return self._extract_mix(playlist_id)
+ if playlist_id.startswith('TL'):
+ raise ExtractorError(u'For downloading YouTube.com top lists, use '
+ u'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)
# Extract the video ids from the playlist pages
ids = []
@@ -1598,6 +1601,38 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
return self.playlist_result(url_results, playlist_id, playlist_title)
+class YoutubeTopListIE(YoutubePlaylistIE):
+ IE_NAME = u'youtube:toplist'
+ IE_DESC = (u'YouTube.com top lists, "yttoplist:{channel}:{list title}"'
+ u' (Example: "yttoplist:music:Top Tracks")')
+ _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ channel = mobj.group('chann')
+ title = mobj.group('title')
+ query = compat_urllib_parse.urlencode({'title': title})
+ playlist_re = 'href="([^"]+?%s[^"]+?)"' % re.escape(query)
+ channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title)
+ link = self._html_search_regex(playlist_re, channel_page, u'list')
+ url = compat_urlparse.urljoin('https://www.youtube.com/', link)
+
+ video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"'
+ ids = []
+ # sometimes the webpage doesn't contain the videos
+ # retry until we get them
+ for i in itertools.count(0):
+ msg = u'Downloading Youtube mix'
+ if i > 0:
+ msg += ', retry #%d' % i
+ webpage = self._download_webpage(url, title, msg)
+ ids = orderedSet(re.findall(video_re, webpage))
+ if ids:
+ break
+ url_results = self._ids_to_results(ids)
+ return self.playlist_result(url_results, playlist_title=title)
+
+
class YoutubeChannelIE(InfoExtractor):
IE_DESC = u'YouTube.com channels'
_VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"