diff options
-rw-r--r-- | youtube_dl/extractor/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/extractor/beeg.py | 53 | ||||
-rw-r--r-- | youtube_dl/extractor/eporner.py | 55 | ||||
-rw-r--r-- | youtube_dl/utils.py | 1 |
4 files changed, 111 insertions, 0 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index f8cbfddc2..9f43bb8f4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -24,6 +24,7 @@ from .auengine import AUEngineIE from .bambuser import BambuserIE, BambuserChannelIE from .bandcamp import BandcampIE, BandcampAlbumIE from .bbccouk import BBCCoUkIE +from .beeg import BeegIE from .bilibili import BiliBiliIE from .blinkx import BlinkxIE from .bliptv import BlipTVIE, BlipTVUserIE @@ -86,6 +87,7 @@ from .ellentv import ( from .elpais import ElPaisIE from .empflix import EmpflixIE from .engadget import EngadgetIE +from .eporner import EpornerIE from .escapist import EscapistIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE diff --git a/youtube_dl/extractor/beeg.py b/youtube_dl/extractor/beeg.py new file mode 100644 index 000000000..c2692cfdc --- /dev/null +++ b/youtube_dl/extractor/beeg.py @@ -0,0 +1,53 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + + +class BeegIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' + _TEST = { + 'url': 'http://beeg.com/5416503', + 'md5': '634526ae978711f6b748fe0dd6c11f57', + 'info_dict': { + 'id': '5416503', + 'ext': 'mp4', + 'title': 'Sultry Striptease', + 'description': 'md5:6db3c6177972822aaba18652ff59c773', + 'categories': list, # NSFW + 'thumbnail': 're:https?://.*\.jpg$', + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + webpage = self._download_webpage(url, video_id) + + video_url = self._html_search_regex( + r"'480p'\s*:\s*'([^']+)'", webpage, 'video URL') + + title = self._html_search_regex( + r'<title>([^<]+)\s*-\s*beeg\.?</title>', webpage, 'title') + + description = self._html_search_regex( + r'<meta name="description" content="([^"]*)"', + webpage, 'description', fatal=False) + thumbnail = self._html_search_regex( + r'\'previewer.url\'\s*:\s*"([^"]*)"', + webpage, 'thumbnail', fatal=False) + + categories_str = self._html_search_regex( + r'<meta name="keywords" content="([^"]+)"', webpage, 'categories', fatal=False) + categories = categories_str.split(',') + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'categories': categories, + } diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py new file mode 100644 index 000000000..4c2c074cb --- /dev/null +++ b/youtube_dl/extractor/eporner.py @@ -0,0 +1,55 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + str_to_int, +) + + +class EpornerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eporner\.com/hd-porn/(?P<id>\d+)/(?P<title_dash>[\w-]+)/?' + _TEST = { + 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', + 'md5': '3b427ae4b9d60619106de3185c2987cd', + 'info_dict': { + 'id': '95008', + 'ext': 'flv', + 'title': 'Infamous Tiffany Teen Strip Tease Video', + 'duration': 194, + 'view_count': int, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + title = self._html_search_regex( + r'<title>(.*?) - EPORNER', webpage, 'title') + + redirect_code = self._html_search_regex( + r'<script type="text/javascript" src="/config5/%s/([a-f\d]+)/">' % video_id, + webpage, 'redirect_code') + redirect_url = 'http://www.eporner.com/config5/%s/%s' % (video_id, redirect_code) + webpage2 = self._download_webpage(redirect_url, video_id) + video_url = self._html_search_regex( + r'file: "(.*?)",', webpage2, 'video_url') + + duration = parse_duration(self._search_regex( + r'class="mbtim">([0-9:]+)</div>', webpage, 'duration', + fatal=False)) + view_count = str_to_int(self._search_regex( + r'id="cinemaviews">\s*([0-9,]+)\s*<small>views', + webpage, 'view count', fatal=False)) + + return { + 'id': video_id, + 'url': video_url, + 'title': title, + 'duration': duration, + 'view_count': view_count, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e07750434..8a36e619a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1318,6 +1318,7 @@ def str_or_none(v, default=None): def str_to_int(int_str): + """ A more relaxed version of int_or_none """ if int_str is None: return None int_str = re.sub(r'[,\.]', u'', int_str) |