diff options
author | Sergey M․ <dstftw@gmail.com> | 2021-02-04 04:36:57 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2021-02-04 04:42:14 +0700 |
commit | 89c5a7d5aabd138a14c76453d79d5d66ef573bde (patch) | |
tree | 66c02f3f699d8d267b4235810c21046ceb6dce8c | |
parent | 2adc0c51cdf38e039fba0ede11f65bbd9c71bde8 (diff) |
[pornhub] Implement lazy playlist extraction
-rw-r--r-- | youtube_dl/extractor/pornhub.py | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 83773aebb..b7631e4e1 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -547,13 +547,7 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): <button[^>]+\bid=["\']moreDataBtn ''', webpage) is not None - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - host = mobj.group('host') - item_id = mobj.group('id') - - self._login(host) - + def _entries(self, url, host, item_id): page = self._extract_page(url) VIDEOS = '/videos' @@ -566,7 +560,6 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): def is_404(e): return isinstance(e.cause, compat_HTTPError) and e.cause.code == 404 - entries = [] base_url = url has_page = page is not None first_page = page if has_page else 1 @@ -590,11 +583,19 @@ class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): page_entries = self._extract_entries(webpage, host) if not page_entries: break - entries.extend(page_entries) + for e in page_entries: + yield e if not self._has_more(webpage): break - return self.playlist_result(orderedSet(entries), item_id) + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + host = mobj.group('host') + item_id = mobj.group('id') + + self._login(host) + + return self.playlist_result(self._entries(url, host, item_id), item_id) class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): |