diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2013-11-22 20:11:54 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2013-11-22 20:11:54 +0100 |
commit | 3f8ced5144a76a3f9ab7ee8cd06cc79bb75dc564 (patch) | |
tree | 240d40bd76180dafa9dccf4e9b33e5f87456185d /youtube_dl/extractor | |
parent | 00ea0f11eb76e7a67648790524a50f7254b9578f (diff) | |
parent | 880e1c529de1d0f7f0a065afc4148320894a25b4 (diff) |
Merge remote-tracking branch 'jaimeMF/yt-playlists'
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 48 |
1 files changed, 17 insertions, 31 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 41838237c..9b09793eb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): }) return results -class YoutubePlaylistIE(InfoExtractor): +class YoutubePlaylistIE(YoutubeBaseInfoExtractor): IE_DESC = u'YouTube.com playlists' _VALID_URL = r"""(?: (?:https?://)? @@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor): | ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,}) )""" - _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' - _MAX_RESULTS = 50 + _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' + _MORE_PAGES_INDICATOR = r'data-link-type="next"' + _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&' IE_NAME = u'youtube:playlist' @classmethod @@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor): """Receives a URL and returns True if suitable for this IE.""" return re.match(cls._VALID_URL, url, re.VERBOSE) is not None + def _real_initialize(self): + self._login() + def _real_extract(self, url): # Extract playlist id mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -1552,41 +1556,23 @@ class YoutubePlaylistIE(InfoExtractor): else: self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) - # Download playlist videos from API - videos = [] + # Extract the video ids from the playlist pages + ids = [] for page_num in itertools.count(1): - start_index = self._MAX_RESULTS * (page_num - 1) + 1 - if start_index >= 1000: - self._downloader.report_warning(u'Max number of results reached') - break - url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index) + url = self._TEMPLATE_URL % (playlist_id, page_num) page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) + # The ids are duplicated + new_ids = orderedSet(re.findall(self._VIDEO_RE, page)) + ids.extend(new_ids) - try: - response = json.loads(page) - except ValueError as err: - raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) - - if 'feed' not in response: - raise ExtractorError(u'Got a malformed response from YouTube API') - playlist_title = response['feed']['title']['$t'] - if 'entry' not in response['feed']: - # Number of videos is a multiple of self._MAX_RESULTS + if re.search(self._MORE_PAGES_INDICATOR, page) is None: break - for entry in response['feed']['entry']: - index = entry['yt$position']['$t'] - if 'media$group' in entry and 'yt$videoid' in entry['media$group']: - videos.append(( - index, - 'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t'] - )) - - videos = [v[1] for v in sorted(videos)] + playlist_title = self._og_search_title(page) - url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] - return [self.playlist_result(url_results, playlist_id, playlist_title)] + url_results = [self.url_result(vid, 'Youtube') for vid in ids] + return self.playlist_result(url_results, playlist_id, playlist_title) class YoutubeChannelIE(InfoExtractor): |