diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-01-04 05:32:18 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-01-04 05:32:18 +0700 |
commit | 96d315c2be1c00339b4cb77e80b3d5f63770a735 (patch) | |
tree | 3c38b84a0b1d7164b75489c40c4f9405d9bd28d6 /youtube_dl | |
parent | 1911d77d28594db62d803e5e5e5e5fb7171adae5 (diff) |
[pornhub:playlist] Improve extraction (closes #11594)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/pornhub.py | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 40dbe6967..3eaf56973 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -229,7 +229,14 @@ class PornHubPlaylistBaseIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) - entries = self._extract_entries(webpage) + # Only process container div with main playlist content skipping + # drop-down menu that uses similar pattern for videos (see + # https://github.com/rg3/youtube-dl/issues/11594). + container = self._search_regex( + r'(?s)(<div[^>]+class=["\']container.+)', webpage, + 'container', default=webpage) + + entries = self._extract_entries(container) playlist = self._parse_json( self._search_regex( @@ -243,12 +250,12 @@ class PornHubPlaylistBaseIE(InfoExtractor): class PornHubPlaylistIE(PornHubPlaylistBaseIE): _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/6201671', + 'url': 'http://www.pornhub.com/playlist/4667351', 'info_dict': { - 'id': '6201671', - 'title': 'P0p4', + 'id': '4667351', + 'title': 'Nataly Hot', }, - 'playlist_mincount': 35, + 'playlist_mincount': 2, }] |