diff options
author | Gergely Imreh <imrehg@gmail.com> | 2011-01-31 18:54:47 +0800 |
---|---|---|
committer | Gergely Imreh <imrehg@gmail.com> | 2011-01-31 19:00:51 +0800 |
commit | f74e22ae280ac1680251350c4672abfeb2a047fe (patch) | |
tree | 4d5de9396c4368885e82af6efda91359412348d3 | |
parent | 16c73c2e513829197c4af5ee62bde88b2b2272e4 (diff) |
Enable artist playlists in YoutubePlaylistIE
Artist playlist pages have different format compared to user playlists,
thus more format checking is needed to construct the correct URL.
From the artist playlist this method downloads all listed below the
"Videos by [Artist Name]" header, plus usually there's one more
video on the side, titled "Youtube Mix for [Artist Name]", which
has a link format that currently cannot be distinguished from the other
videos in the list.
-rwxr-xr-x | youtube-dl | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/youtube-dl b/youtube-dl index a4c8f2494..dd875a38e 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2096,8 +2096,8 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*' - _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/)([^&]+).*' + _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' _youtube_ie = None @@ -2125,13 +2125,19 @@ class YoutubePlaylistIE(InfoExtractor): return # Download playlist pages - playlist_id = mobj.group(1) + # prefix is 'p' as default for playlists but there are other types that need extra care + playlist_prefix = mobj.group(1) + if playlist_prefix == 'a': + playlist_access = 'artist' + else: + playlist_access = 'view_play_list' + playlist_id = mobj.group(2) video_ids = [] pagenum = 1 while True: self.report_download_page(playlist_id, pagenum) - request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) + request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: |