aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-13 16:21:24 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-13 16:26:50 +0100
commitdcbb45803f9b70041ec0ef9c3c6547340bd1ef7a (patch)
treeb72bf97d64f68dac6453ca2920d504a80edae030 /youtube_dl
parent80b9bbce8687f800b79edb36edf8c193dcf26a78 (diff)
[youtube:playlist] Don't use the gdata api (closes #1508)
Parse the playlist pages instead
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/youtube.py43
1 files changed, 13 insertions, 30 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index c992cba97..d97ea8c83 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1506,8 +1506,9 @@ class YoutubePlaylistIE(InfoExtractor):
|
((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
)"""
- _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
- _MAX_RESULTS = 50
+ _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+ _MORE_PAGES_INDICATOR = r'data-link-type="next"'
+ _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
IE_NAME = u'youtube:playlist'
@classmethod
@@ -1532,41 +1533,23 @@ class YoutubePlaylistIE(InfoExtractor):
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- # Download playlist videos from API
- videos = []
+ # Extract the video ids from the playlist pages
+ ids = []
for page_num in itertools.count(1):
- start_index = self._MAX_RESULTS * (page_num - 1) + 1
- if start_index >= 1000:
- self._downloader.report_warning(u'Max number of results reached')
- break
- url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
+ url = self._TEMPLATE_URL % (playlist_id, page_num)
page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
+ # The ids are duplicated
+ new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+ ids.extend(new_ids)
- try:
- response = json.loads(page)
- except ValueError as err:
- raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
-
- if 'feed' not in response:
- raise ExtractorError(u'Got a malformed response from YouTube API')
- playlist_title = response['feed']['title']['$t']
- if 'entry' not in response['feed']:
- # Number of videos is a multiple of self._MAX_RESULTS
+ if re.search(self._MORE_PAGES_INDICATOR, page) is None:
break
- for entry in response['feed']['entry']:
- index = entry['yt$position']['$t']
- if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
- videos.append((
- index,
- 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
- ))
-
- videos = [v[1] for v in sorted(videos)]
+ playlist_title = self._og_search_title(page)
- url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
- return [self.playlist_result(url_results, playlist_id, playlist_title)]
+ url_results = [self.url_result(vid, 'Youtube') for vid in ids]
+ return self.playlist_result(url_results, playlist_id, playlist_title)
class YoutubeChannelIE(InfoExtractor):