aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2019-01-23 03:51:29 +0700
committerSergey M․ <dstftw@gmail.com>2019-01-23 03:51:29 +0700
commit278d061a0c5eae20963c0a6df4b9b13fd1537186 (patch)
treec85679efac405ec39ea5944993fce07c67ad4c6d
parent503b604a316837b9dd6ef32045e4e9bbfb6a1363 (diff)
[pornhub] Bypass scrape detection (closes #5930)
-rw-r--r--youtube_dl/extractor/pornhub.py22
1 files changed, 22 insertions, 0 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index e377de196..f5f3e6593 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -10,7 +10,9 @@ from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
+ compat_urllib_request,
)
+from .openload import PhantomJSwrapper
from ..utils import (
ExtractorError,
int_or_none,
@@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor):
'only_matching': True,
}]
+ def _download_webpage_handle(self, *args, **kwargs):
+ def dl(*args, **kwargs):
+ return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs)
+
+ webpage, urlh = dl(*args, **kwargs)
+
+ if any(re.search(p, webpage) for p in (
+ r'<body\b[^>]+\bonload=["\']go\(\)',
+ r'document\.cookie\s*=\s*["\']RNKEY=',
+ r'document\.location\.reload\(true\)')):
+ url_or_request = args[0]
+ url = (url_or_request.get_full_url()
+ if isinstance(url_or_request, compat_urllib_request.Request)
+ else url_or_request)
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ phantom.get(url, html=webpage)
+ webpage, urlh = dl(*args, **kwargs)
+
+ return webpage, urlh
+
@staticmethod
def _extract_urls(webpage):
return re.findall(