Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once. Before this commit, youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download took quite some time - now it's almost instantaneous. As an example, the youtube:user extractor has been converted. Fixes #2175
author: Philipp Hagemeister <phihag@phihag.de> 2014-01-20 11:36:47 +0100
committer: Philipp Hagemeister <phihag@phihag.de> 2014-01-20 11:36:47 +0100
commit: b7ab05908440915c6c5faa541abe00c62a88bc27 (patch)
tree: 3b7e87361b7dce60ff7bdbe13bd33844fcb7d18e /youtube_dl/extractor/youtube.py
parent: c91778f8c0ba120378cb806f694fdc3f94a5634c (diff)
download: youtube-dl-b7ab05908440915c6c5faa541abe00c62a88bc27.tar.xz
1 files changed, 10 insertions, 18 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 248b30ffb..dd1a58f3f 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -27,6 +27,7 @@ from ..utils import (
     get_element_by_id,
     get_element_by_attribute,
     ExtractorError,
+    PagedList,
     RegexNotFoundError,
     unescapeHTML,
     unified_strdate,
@@ -1580,44 +1581,35 @@ class YoutubeUserIE(InfoExtractor):
         # page by page until there are no video ids - it means we got
         # all of them.
 
-        url_results = []
-
-        for pagenum in itertools.count(0):
+        def download_page(pagenum):
             start_index = pagenum * self._GDATA_PAGE_SIZE + 1
 
             gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
-            page = self._download_webpage(gdata_url, username,
-                                          u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
+            page = self._download_webpage(
+                gdata_url, username,
+                u'Downloading video ids from %d to %d' % (
+                    start_index, start_index + self._GDATA_PAGE_SIZE))
 
             try:
                 response = json.loads(page)
             except ValueError as err:
                 raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
             if 'entry' not in response['feed']:
-                # Number of videos is a multiple of self._MAX_RESULTS
-                break
+                return
 
             # Extract video identifiers
             entries = response['feed']['entry']
             for entry in entries:
                 title = entry['title']['$t']
                 video_id = entry['id']['$t'].split('/')[-1]
-                url_results.append({
+                yield {
                     '_type': 'url',
                     'url': video_id,
                     'ie_key': 'Youtube',
                     'id': 'video_id',
                     'title': title,
-                })
-
-            # A little optimization - if current page is not
-            # "full", ie. does not contain PAGE_SIZE video ids then
-            # we can assume that this page is the last one - there
-            # are no more ids on further pages - no need to query
-            # again.
-
-            if len(entries) < self._GDATA_PAGE_SIZE:
-                break
+                }
+        url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
 
         return self.playlist_result(url_results, playlist_title=username)
author	Philipp Hagemeister <phihag@phihag.de>	2014-01-20 11:36:47 +0100
committer	Philipp Hagemeister <phihag@phihag.de>	2014-01-20 11:36:47 +0100
commit	b7ab05908440915c6c5faa541abe00c62a88bc27 (patch)
tree	3b7e87361b7dce60ff7bdbe13bd33844fcb7d18e /youtube_dl/extractor/youtube.py
parent	c91778f8c0ba120378cb806f694fdc3f94a5634c (diff)
download	youtube-dl-b7ab05908440915c6c5faa541abe00c62a88bc27.tar.xz