diff options
-rw-r--r-- | test/helper.py | 2 | ||||
-rw-r--r-- | test/test_download.py | 4 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/eroprofile.py | 45 | ||||
-rw-r--r-- | youtube_dl/extractor/sunporno.py | 18 |
5 files changed, 60 insertions, 10 deletions
diff --git a/test/helper.py b/test/helper.py index 8a820526a..96d58b7c1 100644 --- a/test/helper.py +++ b/test/helper.py @@ -99,7 +99,7 @@ def gettestcases(include_onlymatching=False): md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() -def expect_info_dict(self, expected_dict, got_dict): +def expect_info_dict(self, got_dict, expected_dict): for info_field, expected in expected_dict.items(): if isinstance(expected, compat_str) and expected.startswith('re:'): got = got_dict.get(info_field) diff --git a/test/test_download.py b/test/test_download.py index a009aa475..412f3dbce 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -155,7 +155,7 @@ def generator(test_case): if is_playlist: self.assertEqual(res_dict['_type'], 'playlist') self.assertTrue('entries' in res_dict) - expect_info_dict(self, test_case.get('info_dict', {}), res_dict) + expect_info_dict(self, res_dict, test_case.get('info_dict', {})) if 'playlist_mincount' in test_case: assertGreaterEqual( @@ -204,7 +204,7 @@ def generator(test_case): with io.open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) - expect_info_dict(self, tc.get('info_dict', {}), info_dict) + expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: try_rm_tcs_files() if is_playlist and res_dict is not None and res_dict.get('entries'): diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3fd0240c3..32d616397 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -109,6 +109,7 @@ from .elpais import ElPaisIE from .empflix import EMPFlixIE from .engadget import EngadgetIE from .eporner import EpornerIE +from .eroprofile import EroProfileIE from .escapist import EscapistIE from .everyonesmixtape import EveryonesMixtapeIE from .exfm import ExfmIE diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py new file mode 100644 index 000000000..79e2fbd39 --- /dev/null +++ b/youtube_dl/extractor/eroprofile.py @@ -0,0 +1,45 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class EroProfileIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)' + _TEST = { + 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore', + 'md5': 'c26f351332edf23e1ea28ce9ec9de32f', + 'info_dict': { + 'id': '3733775', + 'display_id': 'sexy-babe-softcore', + 'ext': 'm4v', + 'title': 'sexy babe softcore', + 'thumbnail': 're:https?://.*\.jpg', + 'age_limit': 18, + } + } + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + video_id = self._search_regex( + [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'], + webpage, 'video id', default=None) + + video_url = self._search_regex( + r'<source src="([^"]+)', webpage, 'video url') + title = self._html_search_regex( + r'Title:</th><td>([^<]+)</td>', webpage, 'title') + thumbnail = self._search_regex( + r'onclick="showVideoPlayer\(\)"><img src="([^"]+)', + webpage, 'thumbnail', fatal=False) + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumbnail, + 'age_limit': 18, + } diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py index 263f09b46..8a333f1d2 100644 --- a/youtube_dl/extractor/sunporno.py +++ b/youtube_dl/extractor/sunporno.py @@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor): } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') + video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') - description = self._html_search_meta('description', webpage, 'description') + title = self._html_search_regex( + r'<title>([^<]+)</title>', webpage, 'title') + description = self._html_search_meta( + 'description', webpage, 'description') thumbnail = self._html_search_regex( r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False) duration = parse_duration(self._search_regex( - r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False)) + r'itemprop="duration">\s*(\d+:\d+)\s*<', + webpage, 'duration', fatal=False)) view_count = int_or_none(self._html_search_regex( - r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False)) + r'class="views">\s*(\d+)\s*<', + webpage, 'view count', fatal=False)) comment_count = int_or_none(self._html_search_regex( - r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False)) + r'(\d+)</b> Comments?', + webpage, 'comment count', fatal=False)) formats = [] quality = qualities(['mp4', 'flv']) |