diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-02-18 22:29:17 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-02-18 22:29:17 +0600 |
commit | 40e146aa1e1a8fd57d3f84b0a541174f56fa5dba (patch) | |
tree | 39c21294e36ce4e014cbf291deb29f7d9a80b83f | |
parent | f3f9cd9234aada1d677517e3042bd9312610081d (diff) |
[pornhub:user:videos] Add extractor (Closes #8548)
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/pornhub.py | 53 |
2 files changed, 39 insertions, 15 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4049141d9..1edbfbd28 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -555,6 +555,7 @@ from .pornhd import PornHdIE from .pornhub import ( PornHubIE, PornHubPlaylistIE, + PornHubUserVideosIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 91e574dc2..405dbf006 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -129,27 +129,20 @@ class PornHubIE(InfoExtractor): } -class PornHubPlaylistIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' - _TESTS = [{ - 'url': 'http://www.pornhub.com/playlist/6201671', - 'info_dict': { - 'id': '6201671', - 'title': 'P0p4', - }, - 'playlist_mincount': 35, - }] +class PornHubPlaylistBaseIE(InfoExtractor): + def _extract_entries(self, webpage): + return [ + self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') + for video_url in set(re.findall( + r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) + ] def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - entries = [ - self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') - for video_url in set(re.findall( - r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) - ] + entries = self._extract_entries(webpage) playlist = self._parse_json( self._search_regex( @@ -158,3 +151,33 @@ class PornHubPlaylistIE(InfoExtractor): return self.playlist_result( entries, playlist_id, playlist.get('title'), playlist.get('description')) + + +class PornHubPlaylistIE(PornHubPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' + _TESTS = [{ + 'url': 'http://www.pornhub.com/playlist/6201671', + 'info_dict': { + 'id': '6201671', + 'title': 'P0p4', + }, + 'playlist_mincount': 35, + }] + + +class PornHubUserVideosIE(PornHubPlaylistBaseIE): + _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos' + _TESTS = [{ + 'url': 'http://www.pornhub.com/users/rushandlia/videos', + 'info_dict': { + 'id': 'rushandlia', + }, + 'playlist_mincount': 13, + }] + + def _real_extract(self, url): + user_id = self._match_id(url) + + webpage = self._download_webpage(url, user_id) + + return self.playlist_result(self._extract_entries(webpage), user_id) |