aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2021-02-04 04:36:57 +0700
committerSergey M․ <dstftw@gmail.com>2021-02-04 04:42:14 +0700
commit89c5a7d5aabd138a14c76453d79d5d66ef573bde (patch)
tree66c02f3f699d8d267b4235810c21046ceb6dce8c
parent2adc0c51cdf38e039fba0ede11f65bbd9c71bde8 (diff)
[pornhub] Implement lazy playlist extraction
-rw-r--r--youtube_dl/extractor/pornhub.py21
1 files changed, 11 insertions, 10 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 83773aebb..b7631e4e1 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -547,13 +547,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
<button[^>]+\bid=["\']moreDataBtn
''', webpage) is not None
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- item_id = mobj.group('id')
-
- self._login(host)
-
+ def _entries(self, url, host, item_id):
page = self._extract_page(url)
VIDEOS = '/videos'
@@ -566,7 +560,6 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
def is_404(e):
return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404
- entries = []
base_url = url
has_page = page is not None
first_page = page if has_page else 1
@@ -590,11 +583,19 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
- entries.extend(page_entries)
+ for e in page_entries:
+ yield e
if not self._has_more(webpage):
break
- return self.playlist_result(orderedSet(entries), item_id)
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ self._login(host)
+
+ return self.playlist_result(self._entries(url, host, item_id), item_id)
class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):