aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-12-06 14:02:19 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-12-06 14:02:19 +0100
commitb82f815f373818ba99ee43660e9255e8f4ecac62 (patch)
treea55b858e743c06742aa44aa71f9be51468eb3456
parent158f8cadc0c29e9d43e1440a672a3f72f0e454c7 (diff)
downloadyoutube-dl-b82f815f373818ba99ee43660e9255e8f4ecac62.tar.xz
Allow iterators for playlist result entries
-rwxr-xr-xyoutube_dl/YoutubeDL.py20
-rw-r--r--youtube_dl/extractor/common.py4
-rw-r--r--youtube_dl/extractor/youtube.py18
3 files changed, 26 insertions, 16 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index f89ac4e1d..56dc3d461 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -7,6 +7,7 @@ import collections
import datetime
import errno
import io
+import itertools
import json
import locale
import os
@@ -654,21 +655,28 @@ class YoutubeDL(object):
if playlistend == -1:
playlistend = None
- if isinstance(ie_result['entries'], list):
- n_all_entries = len(ie_result['entries'])
- entries = ie_result['entries'][playliststart:playlistend]
+ ie_entries = ie_result['entries']
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
(ie_result['extractor'], playlist, n_all_entries, n_entries))
- else:
- assert isinstance(ie_result['entries'], PagedList)
- entries = ie_result['entries'].getslice(
+ elif isinstance(ie_entries, PagedList):
+ entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
self.to_screen(
"[%s] playlist %s: Downloading %d videos" %
(ie_result['extractor'], playlist, n_entries))
+ else: # iterable
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ self.to_screen(
+ "[%s] playlist %s: Downloading %d videos" %
+ (ie_result['extractor'], playlist, n_entries))
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 5b5e33cea..cb6081dd0 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -158,8 +158,8 @@ class InfoExtractor(object):
_type "playlist" indicates multiple videos.
- There must be a key "entries", which is a list or a PagedList object, each
- element of which is a valid dictionary under this specfication.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "title" and "id" attributes with the same
semantics as videos (see above).
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index addef9594..8b6e591a4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1262,8 +1262,12 @@ class YoutubeChannelIE(InfoExtractor):
# The videos are contained in a single page
# the ajax pages can't be used, they are empty
video_ids = self.extract_videos_from_page(channel_page)
- else:
- # Download all channel pages using the json-based channel_ajax query
+ entries = [
+ self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in video_ids]
+ return self.playlist_result(entries, channel_id)
+
+ def _entries():
for pagenum in itertools.count(1):
url = self._MORE_PAGES_URL % (pagenum, channel_id)
page = self._download_json(
@@ -1271,16 +1275,14 @@ class YoutubeChannelIE(InfoExtractor):
transform_source=uppercase_escape)
ids_in_page = self.extract_videos_from_page(page['content_html'])
- video_ids.extend(ids_in_page)
+ for video_id in ids_in_page:
+ yield self.url_result(
+ video_id, 'Youtube', video_id=video_id)
if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
break
- self._downloader.to_screen('[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
-
- url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in video_ids]
- return self.playlist_result(url_entries, channel_id)
+ return self.playlist_result(_entries(), channel_id)
class YoutubeUserIE(InfoExtractor):