Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once. Before this commit, youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download took quite some time - now it's almost instantaneous. As an example, the youtube:user extractor has been converted. Fixes #2175
author: Philipp Hagemeister <phihag@phihag.de> 2014-01-20 11:36:47 +0100
committer: Philipp Hagemeister <phihag@phihag.de> 2014-01-20 11:36:47 +0100
commit: b7ab05908440915c6c5faa541abe00c62a88bc27 (patch)
tree: 3b7e87361b7dce60ff7bdbe13bd33844fcb7d18e /youtube_dl/utils.py
parent: c91778f8c0ba120378cb806f694fdc3f94a5634c (diff)
1 files changed, 44 insertions, 0 deletions
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 73fe1ad0a..ff124d9e8 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -6,6 +6,7 @@ import datetime
 import email.utils
 import errno
 import gzip
+import itertools
 import io
 import json
 import locale
@@ -1161,3 +1162,46 @@ def check_executable(exe, args=[]):
     except OSError:
         return False
     return exe
+
+
+class PagedList(object):
+    def __init__(self, pagefunc, pagesize):
+        self._pagefunc = pagefunc
+        self._pagesize = pagesize
+
+    def getslice(self, start=0, end=None):
+        res = []
+        for pagenum in itertools.count(start // self._pagesize):
+            firstid = pagenum * self._pagesize
+            nextfirstid = pagenum * self._pagesize + self._pagesize
+            if start >= nextfirstid:
+                continue
+
+            page_results = list(self._pagefunc(pagenum))
+
+            startv = (
+                start % self._pagesize
+                if firstid <= start < nextfirstid
+                else 0)
+
+            endv = (
+                ((end - 1) % self._pagesize) + 1
+                if (end is not None and firstid <= end <= nextfirstid)
+                else None)
+
+            if startv != 0 or endv is not None:
+                page_results = page_results[startv:endv]
+            res.extend(page_results)
+
+            # A little optimization - if current page is not "full", ie. does
+            # not contain page_size videos then we can assume that this page
+            # is the last one - there are no more ids on further pages -
+            # i.e. no need to query again.
+            if len(page_results) + startv < self._pagesize:
+                break
+
+            # If we got the whole page, but the next page is not interesting,
+            # break out early as well
+            if end == nextfirstid:
+                break
+        return res
author	Philipp Hagemeister <phihag@phihag.de>	2014-01-20 11:36:47 +0100
committer	Philipp Hagemeister <phihag@phihag.de>	2014-01-20 11:36:47 +0100
commit	b7ab05908440915c6c5faa541abe00c62a88bc27 (patch)
tree	3b7e87361b7dce60ff7bdbe13bd33844fcb7d18e /youtube_dl/utils.py
parent	c91778f8c0ba120378cb806f694fdc3f94a5634c (diff)