diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-02-18 22:29:17 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-02-18 22:29:17 +0600 | 
| commit | 40e146aa1e1a8fd57d3f84b0a541174f56fa5dba (patch) | |
| tree | 39c21294e36ce4e014cbf291deb29f7d9a80b83f | |
| parent | f3f9cd9234aada1d677517e3042bd9312610081d (diff) | |
[pornhub:user:videos] Add extractor (Closes #8548)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 53 | 
2 files changed, 39 insertions, 15 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4049141d9..1edbfbd28 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -555,6 +555,7 @@ from .pornhd import PornHdIE  from .pornhub import (      PornHubIE,      PornHubPlaylistIE, +    PornHubUserVideosIE,  )  from .pornotube import PornotubeIE  from .pornovoisines import PornoVoisinesIE diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 91e574dc2..405dbf006 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -129,27 +129,20 @@ class PornHubIE(InfoExtractor):          } -class PornHubPlaylistIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' -    _TESTS = [{ -        'url': 'http://www.pornhub.com/playlist/6201671', -        'info_dict': { -            'id': '6201671', -            'title': 'P0p4', -        }, -        'playlist_mincount': 35, -    }] +class PornHubPlaylistBaseIE(InfoExtractor): +    def _extract_entries(self, webpage): +        return [ +            self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') +            for video_url in set(re.findall( +                r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) +        ]      def _real_extract(self, url):          playlist_id = self._match_id(url)          webpage = self._download_webpage(url, playlist_id) -        entries = [ -            self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') -            for video_url in set(re.findall( -                r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"', webpage)) -        ] +        entries = self._extract_entries(webpage)          playlist = self._parse_json(              self._search_regex( @@ -158,3 +151,33 @@ class PornHubPlaylistIE(InfoExtractor):          return self.playlist_result(              entries, playlist_id, playlist.get('title'), playlist.get('description')) + + +class PornHubPlaylistIE(PornHubPlaylistBaseIE): +    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' +    _TESTS = [{ +        'url': 'http://www.pornhub.com/playlist/6201671', +        'info_dict': { +            'id': '6201671', +            'title': 'P0p4', +        }, +        'playlist_mincount': 35, +    }] + + +class PornHubUserVideosIE(PornHubPlaylistBaseIE): +    _VALID_URL = r'https?://(?:www\.)?pornhub\.com/users/(?P<id>[^/]+)/videos' +    _TESTS = [{ +        'url': 'http://www.pornhub.com/users/rushandlia/videos', +        'info_dict': { +            'id': 'rushandlia', +        }, +        'playlist_mincount': 13, +    }] + +    def _real_extract(self, url): +        user_id = self._match_id(url) + +        webpage = self._download_webpage(url, user_id) + +        return self.playlist_result(self._extract_entries(webpage), user_id) | 
