[pornhub] Apply scrape detection bypass for all extractors

author: Sergey M․ <dstftw@gmail.com> 2019-01-23 04:12:06 +0700
committer: Sergey M․ <dstftw@gmail.com> 2019-01-23 04:12:06 +0700
commit: 71a1f61700789fb0d61fc6ad9681b6f0899d2f51 (patch)
tree: 0b63ed4378dfeff18effd359a13a509ad49f8a84 /youtube_dl/extractor
parent: 6510a3aa971c00525969040ad654249c0c73f125 (diff)
1 files changed, 24 insertions, 22 deletions
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index f5f3e6593..be93d5d48 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -24,7 +24,29 @@ from ..utils import (
 )
 
 
-class PornHubIE(InfoExtractor):
+class PornHubBaseIE(InfoExtractor):
+    def _download_webpage_handle(self, *args, **kwargs):
+        def dl(*args, **kwargs):
+            return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+        webpage, urlh = dl(*args, **kwargs)
+
+        if any(re.search(p, webpage) for p in (
+                r'<body\b[^>]+\bonload=["\']go\(\)',
+                r'document\.cookie\s*=\s*["\']RNKEY=',
+                r'document\.location\.reload\(true\)')):
+            url_or_request = args[0]
+            url = (url_or_request.get_full_url()
+                   if isinstance(url_or_request, compat_urllib_request.Request)
+                   else url_or_request)
+            phantom = PhantomJSwrapper(self, required_version='2.0')
+            phantom.get(url, html=webpage)
+            webpage, urlh = dl(*args, **kwargs)
+
+        return webpage, urlh
+
+
+class PornHubIE(PornHubBaseIE):
     IE_DESC = 'PornHub and Thumbzilla'
     _VALID_URL = r'''(?x)
                     https?://
@@ -128,26 +150,6 @@ class PornHubIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    def _download_webpage_handle(self, *args, **kwargs):
-        def dl(*args, **kwargs):
-            return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs)
-
-        webpage, urlh = dl(*args, **kwargs)
-
-        if any(re.search(p, webpage) for p in (
-                r'<body\b[^>]+\bonload=["\']go\(\)',
-                r'document\.cookie\s*=\s*["\']RNKEY=',
-                r'document\.location\.reload\(true\)')):
-            url_or_request = args[0]
-            url = (url_or_request.get_full_url()
-                   if isinstance(url_or_request, compat_urllib_request.Request)
-                   else url_or_request)
-            phantom = PhantomJSwrapper(self, required_version='2.0')
-            phantom.get(url, html=webpage)
-            webpage, urlh = dl(*args, **kwargs)
-
-        return webpage, urlh
-
     @staticmethod
     def _extract_urls(webpage):
         return re.findall(
@@ -329,7 +331,7 @@ class PornHubIE(InfoExtractor):
         }
 
 
-class PornHubPlaylistBaseIE(InfoExtractor):
+class PornHubPlaylistBaseIE(PornHubBaseIE):
     def _extract_entries(self, webpage, host):
         # Only process container div with main playlist content skipping
         # drop-down menu that uses similar pattern for videos (see
author	Sergey M․ <dstftw@gmail.com>	2019-01-23 04:12:06 +0700
committer	Sergey M․ <dstftw@gmail.com>	2019-01-23 04:12:06 +0700
commit	71a1f61700789fb0d61fc6ad9681b6f0899d2f51 (patch)
tree	0b63ed4378dfeff18effd359a13a509ad49f8a84 /youtube_dl/extractor
parent	6510a3aa971c00525969040ad654249c0c73f125 (diff)