aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-01-22 20:00:16 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-01-22 20:00:16 +0100
commit65697b3bf3bf6eaeb91a34e5308a6d2239118071 (patch)
treee174cfbd658bd57da9d0fa990dc10949065db42b /youtube_dl/extractor
parent50317b111dadccba73bcdd828d9997d1da78a5f1 (diff)
parentb7ab05908440915c6c5faa541abe00c62a88bc27 (diff)
downloadyoutube-dl-65697b3bf3bf6eaeb91a34e5308a6d2239118071.tar.xz
Merge branch 'paged-lists'
Conflicts: test/test_utils.py youtube_dl/extractor/youtube.py
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/youtube.py28
1 files changed, 10 insertions, 18 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 870b7c4ca..57b8fdff7 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -28,6 +28,7 @@ from ..utils import (
get_element_by_attribute,
ExtractorError,
int_or_none,
+ PagedList,
RegexNotFoundError,
unescapeHTML,
unified_strdate,
@@ -1626,44 +1627,35 @@ class YoutubeUserIE(InfoExtractor):
# page by page until there are no video ids - it means we got
# all of them.
- url_results = []
-
- for pagenum in itertools.count(0):
+ def download_page(pagenum):
start_index = pagenum * self._GDATA_PAGE_SIZE + 1
gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)
- page = self._download_webpage(gdata_url, username,
- u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE))
+ page = self._download_webpage(
+ gdata_url, username,
+ u'Downloading video ids from %d to %d' % (
+ start_index, start_index + self._GDATA_PAGE_SIZE))
try:
response = json.loads(page)
except ValueError as err:
raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err))
if 'entry' not in response['feed']:
- # Number of videos is a multiple of self._MAX_RESULTS
- break
+ return
# Extract video identifiers
entries = response['feed']['entry']
for entry in entries:
title = entry['title']['$t']
video_id = entry['id']['$t'].split('/')[-1]
- url_results.append({
+ yield {
'_type': 'url',
'url': video_id,
'ie_key': 'Youtube',
'id': 'video_id',
'title': title,
- })
-
- # A little optimization - if current page is not
- # "full", ie. does not contain PAGE_SIZE video ids then
- # we can assume that this page is the last one - there
- # are no more ids on further pages - no need to query
- # again.
-
- if len(entries) < self._GDATA_PAGE_SIZE:
- break
+ }
+ url_results = PagedList(download_page, self._GDATA_PAGE_SIZE)
return self.playlist_result(url_results, playlist_title=username)