Merge remote-tracking branch 'jaimeMF/yt-playlists'

author: Philipp Hagemeister <phihag@phihag.de> 2013-11-22 20:11:54 +0100
committer: Philipp Hagemeister <phihag@phihag.de> 2013-11-22 20:11:54 +0100
commit: 3f8ced5144a76a3f9ab7ee8cd06cc79bb75dc564 (patch)
tree: 240d40bd76180dafa9dccf4e9b33e5f87456185d /youtube_dl/extractor
parent: 00ea0f11eb76e7a67648790524a50f7254b9578f (diff)
parent: 880e1c529de1d0f7f0a065afc4148320894a25b4 (diff)
1 files changed, 17 insertions, 31 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 41838237c..9b09793eb 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             })
         return results
 
-class YoutubePlaylistIE(InfoExtractor):
+class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
     IE_DESC = u'YouTube.com playlists'
     _VALID_URL = r"""(?:
                         (?:https?://)?
@@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):
                      |
                         ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})
                      )"""
-    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none'
-    _MAX_RESULTS = 50
+    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s'
+    _MORE_PAGES_INDICATOR = r'data-link-type="next"'
+    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&amp;'
     IE_NAME = u'youtube:playlist'
 
     @classmethod
@@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):
         """Receives a URL and returns True if suitable for this IE."""
         return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
 
+    def _real_initialize(self):
+        self._login()
+
     def _real_extract(self, url):
         # Extract playlist id
         mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1552,41 +1556,23 @@ class YoutubePlaylistIE(InfoExtractor):
             else:
                 self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
 
-        # Download playlist videos from API
-        videos = []
+        # Extract the video ids from the playlist pages
+        ids = []
 
         for page_num in itertools.count(1):
-            start_index = self._MAX_RESULTS * (page_num - 1) + 1
-            if start_index >= 1000:
-                self._downloader.report_warning(u'Max number of results reached')
-                break
-            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index)
+            url = self._TEMPLATE_URL % (playlist_id, page_num)
             page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num)
+            # The ids are duplicated
+            new_ids = orderedSet(re.findall(self._VIDEO_RE, page))
+            ids.extend(new_ids)
 
-            try:
-                response = json.loads(page)
-            except ValueError as err:
-                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
-
-            if 'feed' not in response:
-                raise ExtractorError(u'Got a malformed response from YouTube API')
-            playlist_title = response['feed']['title']['$t']
-            if 'entry' not in response['feed']:
-                # Number of videos is a multiple of self._MAX_RESULTS
+            if re.search(self._MORE_PAGES_INDICATOR, page) is None:
                 break
 
-            for entry in response['feed']['entry']:
-                index = entry['yt$position']['$t']
-                if 'media$group' in entry and 'yt$videoid' in entry['media$group']:
-                    videos.append((
-                        index,
-                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t']
-                    ))
-
-        videos = [v[1] for v in sorted(videos)]
+        playlist_title = self._og_search_title(page)
 
-        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
-        return [self.playlist_result(url_results, playlist_id, playlist_title)]
+        url_results = [self.url_result(vid, 'Youtube') for vid in ids]
+        return self.playlist_result(url_results, playlist_id, playlist_title)
 
 
 class YoutubeChannelIE(InfoExtractor):
author	Philipp Hagemeister <phihag@phihag.de>	2013-11-22 20:11:54 +0100
committer	Philipp Hagemeister <phihag@phihag.de>	2013-11-22 20:11:54 +0100
commit	3f8ced5144a76a3f9ab7ee8cd06cc79bb75dc564 (patch)
tree	240d40bd76180dafa9dccf4e9b33e5f87456185d /youtube_dl/extractor
parent	00ea0f11eb76e7a67648790524a50f7254b9578f (diff)
parent	880e1c529de1d0f7f0a065afc4148320894a25b4 (diff)