aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2019-06-22 05:34:46 +0700
committerSergey M․ <dstftw@gmail.com>2019-06-22 05:34:46 +0700
commit21b08463a777a79876721e49d3d07a19bc3fe05e (patch)
tree3e438d789905771d103d3e84be672b186cb9f50b
parent31ce6e996666e7512990da01ef58785933dcb2be (diff)
downloadyoutube-dl-21b08463a777a79876721e49d3d07a19bc3fe05e.tar.xz
[pornhub] Rework extractors (closes #11922, closes #16078, closes #17454, closes #17936)
-rw-r--r--youtube_dl/extractor/pornhub.py155
1 files changed, 131 insertions, 24 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index cb59d526f..72c351d56 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -387,18 +387,82 @@ class PornHubPlaylistIE(PornHubPlaylistBaseIE):
}]
-class PornHubUserVideosIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
+class PornHubUserIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))'
_TESTS = [{
- 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'url': 'https://www.pornhub.com/model/zoe_ph',
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
'info_dict': {
- 'id': 'zoe_ph',
+ 'id': 'liz-vicious',
},
- 'playlist_mincount': 171,
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/users/russianveet69',
+ 'playlist_mincount': 0,
+ }, {
+ 'url': 'https://www.pornhub.com/channels/povd',
+ 'playlist_mincount': 0,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if PornHubUserVideosIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
+ else super(PornHubUserIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('id')
+ return self.url_result(
+ '%s/videos' % mobj.group('url'), ie=PornHubUserVideosIE.ie_key(),
+ video_id=user_id)
+
+
+class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ user_id = mobj.group('id')
+
+ page_url = self._make_page_url(url)
+
+ entries = []
+ for page_num in itertools.count(1):
+ try:
+ webpage = self._download_webpage(
+ page_url, user_id, 'Downloading page %d' % page_num,
+ query={'page': page_num})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ break
+ raise
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ entries.extend(page_entries)
+ if not self._has_more(webpage):
+ break
+
+ return self.playlist_result(orderedSet(entries), user_id)
+
+
+class PornHubUserVideosIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
+ 'only_matching': True,
}, {
'url': 'http://www.pornhub.com/users/rushandlia/videos',
'only_matching': True,
}, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
+ 'info_dict': {
+ 'id': 'jenny-blighe',
+ },
+ 'playlist_mincount': 149,
+ }, {
# default sorting as Top Rated Videos
'url': 'https://www.pornhub.com/channels/povd/videos',
'info_dict': {
@@ -426,26 +490,69 @@ class PornHubUserVideosIE(PornHubPlaylistBaseIE):
}, {
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
+ 'only_matching': True,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
+ 'only_matching': True,
+ }, {
+ # Longest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
+ 'only_matching': True,
+ }, {
+ # Newest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/upload',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
+ 'only_matching': True,
}]
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- user_id = mobj.group('id')
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if PornHubUserVideosUploadIE.suitable(url)
+ else super(PornHubUserVideosIE, cls).suitable(url))
+
+ def _make_page_url(self, url):
+ return url
+
+ @staticmethod
+ def _has_more(webpage):
+ return re.search(
+ r'''(?x)
+ <li[^>]+\bclass=["\']page_next|
+ <link[^>]+\brel=["\']next|
+ <button[^>]+\bid=["\']moreDataBtn
+ ''', webpage) is not None
- entries = []
- for page_num in itertools.count(1):
- try:
- webpage = self._download_webpage(
- url, user_id, 'Downloading page %d' % page_num,
- query={'page': page_num})
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- break
- raise
- page_entries = self._extract_entries(webpage, host)
- if not page_entries:
- break
- entries.extend(page_entries)
- return self.playlist_result(entries, user_id)
+class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+ 'info_dict': {
+ 'id': 'jenny-blighe',
+ },
+ 'playlist_mincount': 129,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
+ 'only_matching': True,
+ }]
+
+ def _make_page_url(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ return '%s/ajax' % mobj.group('url')
+
+ @staticmethod
+ def _has_more(webpage):
+ return True