aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/helper.py2
-rw-r--r--test/test_download.py4
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/eroprofile.py45
-rw-r--r--youtube_dl/extractor/sunporno.py18
5 files changed, 60 insertions, 10 deletions
diff --git a/test/helper.py b/test/helper.py
index 8a820526a..96d58b7c1 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -99,7 +99,7 @@ def gettestcases(include_onlymatching=False):
md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()
-def expect_info_dict(self, expected_dict, got_dict):
+def expect_info_dict(self, got_dict, expected_dict):
for info_field, expected in expected_dict.items():
if isinstance(expected, compat_str) and expected.startswith('re:'):
got = got_dict.get(info_field)
diff --git a/test/test_download.py b/test/test_download.py
index a009aa475..412f3dbce 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -155,7 +155,7 @@ def generator(test_case):
if is_playlist:
self.assertEqual(res_dict['_type'], 'playlist')
self.assertTrue('entries' in res_dict)
- expect_info_dict(self, test_case.get('info_dict', {}), res_dict)
+ expect_info_dict(self, res_dict, test_case.get('info_dict', {}))
if 'playlist_mincount' in test_case:
assertGreaterEqual(
@@ -204,7 +204,7 @@ def generator(test_case):
with io.open(info_json_fn, encoding='utf-8') as infof:
info_dict = json.load(infof)
- expect_info_dict(self, tc.get('info_dict', {}), info_dict)
+ expect_info_dict(self, info_dict, tc.get('info_dict', {}))
finally:
try_rm_tcs_files()
if is_playlist and res_dict is not None and res_dict.get('entries'):
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 3fd0240c3..32d616397 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -109,6 +109,7 @@ from .elpais import ElPaisIE
from .empflix import EMPFlixIE
from .engadget import EngadgetIE
from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
from .escapist import EscapistIE
from .everyonesmixtape import EveryonesMixtapeIE
from .exfm import ExfmIE
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dl/extractor/eroprofile.py
new file mode 100644
index 000000000..79e2fbd39
--- /dev/null
+++ b/youtube_dl/extractor/eroprofile.py
@@ -0,0 +1,45 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class EroProfileIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eroprofile\.com/m/videos/view/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.eroprofile.com/m/videos/view/sexy-babe-softcore',
+ 'md5': 'c26f351332edf23e1ea28ce9ec9de32f',
+ 'info_dict': {
+ 'id': '3733775',
+ 'display_id': 'sexy-babe-softcore',
+ 'ext': 'm4v',
+ 'title': 'sexy babe softcore',
+ 'thumbnail': 're:https?://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_id = self._search_regex(
+ [r"glbUpdViews\s*\('\d*','(\d+)'", r'p/report/video/(\d+)'],
+ webpage, 'video id', default=None)
+
+ video_url = self._search_regex(
+ r'<source src="([^"]+)', webpage, 'video url')
+ title = self._html_search_regex(
+ r'Title:</th><td>([^<]+)</td>', webpage, 'title')
+ thumbnail = self._search_regex(
+ r'onclick="showVideoPlayer\(\)"><img src="([^"]+)',
+ webpage, 'thumbnail', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dl/extractor/sunporno.py
index 263f09b46..8a333f1d2 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dl/extractor/sunporno.py
@@ -28,23 +28,27 @@ class SunPornoIE(InfoExtractor):
}
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
+ video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
- description = self._html_search_meta('description', webpage, 'description')
+ title = self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title')
+ description = self._html_search_meta(
+ 'description', webpage, 'description')
thumbnail = self._html_search_regex(
r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
duration = parse_duration(self._search_regex(
- r'Duration:\s*(\d+:\d+)\s*<', webpage, 'duration', fatal=False))
+ r'itemprop="duration">\s*(\d+:\d+)\s*<',
+ webpage, 'duration', fatal=False))
view_count = int_or_none(self._html_search_regex(
- r'class="views">\s*(\d+)\s*<', webpage, 'view count', fatal=False))
+ r'class="views">\s*(\d+)\s*<',
+ webpage, 'view count', fatal=False))
comment_count = int_or_none(self._html_search_regex(
- r'(\d+)</b> Comments?', webpage, 'comment count', fatal=False))
+ r'(\d+)</b> Comments?',
+ webpage, 'comment count', fatal=False))
formats = []
quality = qualities(['mp4', 'flv'])