aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-11-06 21:52:00 +0700
committerSergey M․ <dstftw@gmail.com>2016-11-06 21:52:00 +0700
commitb52c9ef1655042688a4822d241af398592b951f9 (patch)
treec48be224b93ba02740406ee6db8f0f1ce239e060 /youtube_dl
parente28ed498e64545f02f2d3dbccf97ecf0e47aa82a (diff)
downloadyoutube-dl-b52c9ef1655042688a4822d241af398592b951f9.tar.xz
[extractor/generic] Improve support for pornhub embeds (closes #11100)
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/generic.py10
-rw-r--r--youtube_dl/extractor/pornhub.py13
2 files changed, 11 insertions, 12 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index b1315a9c8..bde65fa27 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1983,11 +1983,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
- # Look for embedded PornHub player
- pornhub_url = PornHubIE._extract_url(webpage)
- if pornhub_url:
- return self.url_result(pornhub_url, 'PornHub')
-
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
@@ -1998,6 +1993,11 @@ class GenericIE(InfoExtractor):
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+
# Look for embedded DrTuber player
drtuber_urls = DrTuberIE._extract_urls(webpage)
if drtuber_urls:
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
index 0724efc09..40dbe6967 100644
--- a/youtube_dl/extractor/pornhub.py
+++ b/youtube_dl/extractor/pornhub.py
@@ -33,7 +33,7 @@ class PornHubIE(InfoExtractor):
(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)|
(?:www\.)?thumbzilla\.com/video/
)
- (?P<id>[0-9a-z]+)
+ (?P<id>[\da-z]+)
'''
_TESTS = [{
'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
@@ -96,12 +96,11 @@ class PornHubIE(InfoExtractor):
'only_matching': True,
}]
- @classmethod
- def _extract_url(cls, webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/\d+)\1', webpage)
- if mobj:
- return mobj.group('url')
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.com/embed/[\da-z]+)',
+ webpage)
def _extract_count(self, pattern, webpage, name):
return str_to_int(self._search_regex(