diff options
author | Philipp Hagemeister <phihag@phihag.de> | 2014-01-22 20:00:16 +0100 |
---|---|---|
committer | Philipp Hagemeister <phihag@phihag.de> | 2014-01-22 20:00:16 +0100 |
commit | 65697b3bf3bf6eaeb91a34e5308a6d2239118071 (patch) | |
tree | e174cfbd658bd57da9d0fa990dc10949065db42b /youtube_dl/extractor/youtube.py | |
parent | 50317b111dadccba73bcdd828d9997d1da78a5f1 (diff) | |
parent | b7ab05908440915c6c5faa541abe00c62a88bc27 (diff) |
Merge branch 'paged-lists'
Conflicts:
test/test_utils.py
youtube_dl/extractor/youtube.py
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 28 |
1 files changed, 10 insertions, 18 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 870b7c4ca..57b8fdff7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -28,6 +28,7 @@ from ..utils import ( get_element_by_attribute, ExtractorError, int_or_none, + PagedList, RegexNotFoundError, unescapeHTML, unified_strdate, @@ -1626,44 +1627,35 @@ class YoutubeUserIE(InfoExtractor): # page by page until there are no video ids - it means we got # all of them. - url_results = [] - - for pagenum in itertools.count(0): + def download_page(pagenum): start_index = pagenum * self._GDATA_PAGE_SIZE + 1 gdata_url = self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index) - page = self._download_webpage(gdata_url, username, - u'Downloading video ids from %d to %d' % (start_index, start_index + self._GDATA_PAGE_SIZE)) + page = self._download_webpage( + gdata_url, username, + u'Downloading video ids from %d to %d' % ( + start_index, start_index + self._GDATA_PAGE_SIZE)) try: response = json.loads(page) except ValueError as err: raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) if 'entry' not in response['feed']: - # Number of videos is a multiple of self._MAX_RESULTS - break + return # Extract video identifiers entries = response['feed']['entry'] for entry in entries: title = entry['title']['$t'] video_id = entry['id']['$t'].split('/')[-1] - url_results.append({ + yield { '_type': 'url', 'url': video_id, 'ie_key': 'Youtube', 'id': 'video_id', 'title': title, - }) - - # A little optimization - if current page is not - # "full", ie. does not contain PAGE_SIZE video ids then - # we can assume that this page is the last one - there - # are no more ids on further pages - no need to query - # again. - - if len(entries) < self._GDATA_PAGE_SIZE: - break + } + url_results = PagedList(download_page, self._GDATA_PAGE_SIZE) return self.playlist_result(url_results, playlist_title=username) |