aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGergely Imreh <imrehg@gmail.com>2011-01-31 18:54:47 +0800
committerGergely Imreh <imrehg@gmail.com>2011-01-31 19:00:51 +0800
commitf74e22ae280ac1680251350c4672abfeb2a047fe (patch)
tree4d5de9396c4368885e82af6efda91359412348d3
parent16c73c2e513829197c4af5ee62bde88b2b2272e4 (diff)
Enable artist playlists in YoutubePlaylistIE
Artist playlist pages have different format compared to user playlists, thus more format checking is needed to construct the correct URL. From the artist playlist this method downloads all listed below the "Videos by [Artist Name]" header, plus usually there's one more video on the side, titled "Youtube Mix for [Artist Name]", which has a link format that currently cannot be distinguished from the other videos in the list.
-rwxr-xr-xyoutube-dl14
1 files changed, 10 insertions, 4 deletions
diff --git a/youtube-dl b/youtube-dl
index a4c8f2494..dd875a38e 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -2096,8 +2096,8 @@ class YahooSearchIE(InfoExtractor):
class YoutubePlaylistIE(InfoExtractor):
"""Information Extractor for YouTube playlists."""
- _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
- _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
+ _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/)([^&]+).*'
+ _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
_youtube_ie = None
@@ -2125,13 +2125,19 @@ class YoutubePlaylistIE(InfoExtractor):
return
# Download playlist pages
- playlist_id = mobj.group(1)
+ # prefix is 'p' as default for playlists but there are other types that need extra care
+ playlist_prefix = mobj.group(1)
+ if playlist_prefix == 'a':
+ playlist_access = 'artist'
+ else:
+ playlist_access = 'view_play_list'
+ playlist_id = mobj.group(2)
video_ids = []
pagenum = 1
while True:
self.report_download_page(playlist_id, pagenum)
- request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))
+ request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err: