diff options
| -rw-r--r-- | youtube_dl/extractor/pornhub.py | 155 | 
1 files changed, 131 insertions, 24 deletions
| diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index cb59d526f..72c351d56 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -387,18 +387,82 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):      }] -class PornHubUserVideosIE(PornHubPlaylistBaseIE): -    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' +class PornHubUserIE(PornHubPlaylistBaseIE): +    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))'      _TESTS = [{ -        'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', +        'url': 'https://www.pornhub.com/model/zoe_ph', +        'playlist_mincount': 118, +    }, { +        'url': 'https://www.pornhub.com/pornstar/liz-vicious',          'info_dict': { -            'id': 'zoe_ph', +            'id': 'liz-vicious',          }, -        'playlist_mincount': 171, +        'playlist_mincount': 118, +    }, { +        'url': 'https://www.pornhub.com/users/russianveet69', +        'playlist_mincount': 0, +    }, { +        'url': 'https://www.pornhub.com/channels/povd', +        'playlist_mincount': 0, +    }] + +    @classmethod +    def suitable(cls, url): +        return (False +                if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) +                else super(PornHubUserIE, cls).suitable(url)) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        user_id = mobj.group('id') +        return self.url_result( +            '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(), +            video_id=user_id) + + +class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        host = mobj.group('host') +        user_id = mobj.group('id') + +        page_url = self._make_page_url(url) + +        entries = [] +        for page_num in itertools.count(1): +            try: +                webpage = self._download_webpage( +                    page_url, user_id, 'Downloading page %d' % page_num, +                    query={'page': page_num}) +            except ExtractorError as e: +                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: +                    break +                raise +            page_entries = self._extract_entries(webpage, host) +            if not page_entries: +                break +            entries.extend(page_entries) +            if not self._has_more(webpage): +                break + +        return self.playlist_result(orderedSet(entries), user_id) + + +class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE): +    _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos' +    _TESTS = [{ +        'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', +        'only_matching': True,      }, {          'url': 'http://www.pornhub.com/users/rushandlia/videos',          'only_matching': True,      }, { +        'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', +        'info_dict': { +            'id': 'jenny-blighe', +        }, +        'playlist_mincount': 149, +    }, {          # default sorting as Top Rated Videos          'url': 'https://www.pornhub.com/channels/povd/videos',          'info_dict': { @@ -426,26 +490,69 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):      }, {          'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',          'only_matching': True, +    }, { +        # Most Viewed Videos +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', +        'only_matching': True, +    }, { +        # Top Rated Videos +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', +        'only_matching': True, +    }, { +        # Longest Videos +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', +        'only_matching': True, +    }, { +        # Newest Videos +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', +        'only_matching': True, +    }, { +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload', +        'only_matching': True, +    }, { +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', +        'only_matching': True, +    }, { +        'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', +        'only_matching': True,      }] -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        host = mobj.group('host') -        user_id = mobj.group('id') +    @classmethod +    def suitable(cls, url): +        return (False +                if PornHubUserVideosUploadIE.suitable(url) +                else super(PornHubUserVideosIE, cls).suitable(url)) + +    def _make_page_url(self, url): +        return url + +    @staticmethod +    def _has_more(webpage): +        return re.search( +            r'''(?x) +                <li[^>]+\bclass=["\']page_next| +                <link[^>]+\brel=["\']next| +                <button[^>]+\bid=["\']moreDataBtn +            ''', webpage) is not None -        entries = [] -        for page_num in itertools.count(1): -            try: -                webpage = self._download_webpage( -                    url, user_id, 'Downloading page %d' % page_num, -                    query={'page': page_num}) -            except ExtractorError as e: -                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: -                    break -                raise -            page_entries = self._extract_entries(webpage, host) -            if not page_entries: -                break -            entries.extend(page_entries) -        return self.playlist_result(entries, user_id) +class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): +    _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' +    _TESTS = [{ +        'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', +        'info_dict': { +            'id': 'jenny-blighe', +        }, +        'playlist_mincount': 129, +    }, { +        'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', +        'only_matching': True, +    }] + +    def _make_page_url(self, url): +        mobj = re.match(self._VALID_URL, url) +        return '%s/ajax' % mobj.group('url') + +    @staticmethod +    def _has_more(webpage): +        return True | 
