aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/pornhub.py
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-01-04 05:32:18 +0700
committerSergey M․ <dstftw@gmail.com>2017-01-04 05:32:18 +0700
commit96d315c2be1c00339b4cb77e80b3d5f63770a735 (patch)
tree3c38b84a0b1d7164b75489c40c4f9405d9bd28d6 /youtube_dl/extractor/pornhub.py
parent1911d77d28594db62d803e5e5e5e5fb7171adae5 (diff)
[pornhub:playlist] Improve extraction (closes #11594)
Diffstat (limited to 'youtube_dl/extractor/pornhub.py')
-rw-r--r--youtube_dl/extractor/pornhub.py17
1 files changed, 12 insertions, 5 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 40dbe6967..3eaf56973 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -229,7 +229,14 @@ class PornHubPlaylistBaseIE(InfoExtractor):
webpage = self._download_webpage(url, playlist_id)
- entries = self._extract_entries(webpage)
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/rg3/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
+ entries = self._extract_entries(container)
playlist = self._parse_json(
self._search_regex(
@@ -243,12 +250,12 @@ class PornHubPlaylistBaseIE(InfoExtractor):
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)'
_TESTS = [{
- 'url': 'http://www.pornhub.com/playlist/6201671',
+ 'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
- 'id': '6201671',
- 'title': 'P0p4',
+ 'id': '4667351',
+ 'title': 'Nataly Hot',
},
- 'playlist_mincount': 35,
+ 'playlist_mincount': 2,
}]