diff options
Diffstat (limited to 'youtube_dl/extractor/slutload.py')
-rw-r--r-- | youtube_dl/extractor/slutload.py | 57 |
1 files changed, 29 insertions, 28 deletions
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dl/extractor/slutload.py index 095adfc15..ecc0abfda 100644 --- a/youtube_dl/extractor/slutload.py +++ b/youtube_dl/extractor/slutload.py @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + import re from .common import InfoExtractor @@ -5,42 +7,41 @@ from ..utils import ( ExtractorError, ) + class SlutloadIE(InfoExtractor): - _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<videoid>[^/]+)/?$' + _VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' _TEST = { - u'url': u'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', - u'file': u'TD73btpBqSxc.mp4', - u'md5': u'0cf531ae8006b530bd9df947a6a0df77', - u'info_dict': { - u"title": u"virginie baisee en cam", - u"age_limit": 18, + 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', + 'md5': '0cf531ae8006b530bd9df947a6a0df77', + 'info_dict': { + 'id': 'TD73btpBqSxc', + 'ext': 'mp4', + "title": "virginie baisee en cam", + "age_limit": 18, + 'thumbnail': 're:https?://.*?\.jpg' } } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') - video_id = mobj.group('videoid') - - # Get webpage content webpage = self._download_webpage(url, video_id) - # Get the video title video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', - webpage, u'title').strip() - - # Get the video url - result = re.compile(r'<div id="vidPlayer"\s+data-url="([^"]+)"\s+previewer-file="([^"]+)"', re.S).search(webpage) - if result is None: - raise ExtractorError(u'ERROR: unable to extract video_url') - - video_url, video_thumb = result.group(1,2) - - info = {'id': video_id, - 'url': video_url, - 'title': video_title, - 'thumbnail': video_thumb, - 'ext': 'mp4', - 'age_limit': 18} - - return [info] + webpage, 'title').strip() + + video_url = self._html_search_regex( + r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', + webpage, 'video URL') + thumbnail = self._html_search_regex( + r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'url': video_url, + 'title': video_title, + 'thumbnail': thumbnail, + 'age_limit': 18 + } |