[youtube:feed] Implement lazy playlist extraction (closes #10184)

author: Sergey M․ <dstftw@gmail.com> 2018-04-22 06:07:32 +0700
committer: Sergey M․ <dstftw@gmail.com> 2018-04-22 06:07:32 +0700
commit: 3853309fe238bb709b7c5db261724c33b48a8693 (patch)
tree: a629d2b5884039eb601a2f76a48f7928bf26b0f2
parent: 6cdaaf703149f1d6f1d24cfdb5a538ca41d08a26 (diff)
1 files changed, 10 insertions, 6 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 617be8e96..e9965509c 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2699,10 +2699,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
     def _real_initialize(self):
         self._login()
 
-    def _real_extract(self, url):
-        page = self._download_webpage(
-            'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
-
+    def _entries(self, page):
         # The extraction process is the same as for playlists, but the regex
         # for the video ids doesn't contain an index
         ids = []
@@ -2713,12 +2710,15 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             # 'recommended' feed has infinite 'load more' and each new portion spins
             # the same videos in (sometimes) slightly different order, so we'll check
             # for unicity and break when portion has no new videos
-            new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
+            new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
             if not new_ids:
                 break
 
             ids.extend(new_ids)
 
+            for entry in self._ids_to_results(new_ids):
+                yield entry
+
             mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
             if not mobj:
                 break
@@ -2730,8 +2730,12 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
             content_html = more['content_html']
             more_widget_html = more['load_more_widget_html']
 
+    def _real_extract(self, url):
+        page = self._download_webpage(
+            'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+            self._PLAYLIST_TITLE)
         return self.playlist_result(
-            self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
+            self._entries(page), playlist_title=self._PLAYLIST_TITLE)
 
 
 class YoutubeWatchLaterIE(YoutubePlaylistIE):
author	Sergey M․ <dstftw@gmail.com>	2018-04-22 06:07:32 +0700
committer	Sergey M․ <dstftw@gmail.com>	2018-04-22 06:07:32 +0700
commit	3853309fe238bb709b7c5db261724c33b48a8693 (patch)
tree	a629d2b5884039eb601a2f76a48f7928bf26b0f2
parent	6cdaaf703149f1d6f1d24cfdb5a538ca41d08a26 (diff)