aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-04-04 19:14:01 +0600
committerSergey M․ <dstftw@gmail.com>2015-04-04 19:14:01 +0600
commit3fde134791fa920c98a99610c2200a6b3572cca0 (patch)
tree21d7a70a9c8e6d23a30438177bf4fe320a1480db
parent536b94e56fdf0fd2dd1c6d6d92112866949505c4 (diff)
parent7c39a65543b809b681434246b84710349f5837aa (diff)
downloadyoutube-dl-3fde134791fa920c98a99610c2200a6b3572cca0.tar.xz
Merge branch 'Roman2K-pornovoisines'
-rw-r--r--test/test_utils.py1
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/pornovoisines.py96
-rw-r--r--youtube_dl/utils.py5
4 files changed, 102 insertions, 1 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index 4e524aca3..2e3a6480c 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(
unified_strdate('2/2/2015 6:47:40 PM', day_first=False),
'20150202')
+ self.assertEqual(unified_strdate('25-09-2014'), '20140925')
def test_find_xpath_attr(self):
testxml = '''<root>
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index c05a43641..0f7d44616 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -388,6 +388,7 @@ from .pornhub import (
PornHubPlaylistIE,
)
from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
from .pornoxo import PornoXOIE
from .primesharetv import PrimeShareTVIE
from .promptfile import PromptFileIE
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py
new file mode 100644
index 000000000..9688ed948
--- /dev/null
+++ b/youtube_dl/extractor/pornovoisines.py
@@ -0,0 +1,96 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_strdate,
+)
+
+
+class PornoVoisinesIE(InfoExtractor):
+ _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)'
+
+ _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \
+ '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4'
+
+ _SERVER_NUMBERS = (1, 2)
+
+ _TEST = {
+ 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/',
+ 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1',
+ 'info_dict': {
+ 'id': '1285',
+ 'display_id': 'recherche-appartement',
+ 'ext': 'mp4',
+ 'title': 'Recherche appartement',
+ 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'upload_date': '20140925',
+ 'duration': 120,
+ 'view_count': int,
+ 'average_rating': float,
+ 'categories': ['Débutante', 'Scénario', 'Sodomie'],
+ 'age_limit': 18,
+ }
+ }
+
+ @classmethod
+ def build_video_url(cls, num):
+ return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self.build_video_url(video_id)
+
+ title = self._html_search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL)
+ description = self._html_search_regex(
+ r'<article id="descriptif">(.+?)</article>',
+ webpage, "description", fatal=False, flags=re.DOTALL)
+
+ thumbnail = self._search_regex(
+ r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id,
+ webpage, 'thumbnail', fatal=False)
+ if thumbnail:
+ thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail
+
+ upload_date = unified_strdate(self._search_regex(
+ r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False))
+ duration = int_or_none(self._search_regex(
+ 'Durée (\d+)', webpage, 'duration', fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'(\d+) vues', webpage, 'view count', fatal=False))
+ average_rating = self._search_regex(
+ r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False)
+ if average_rating:
+ average_rating = float_or_none(average_rating.replace(',', '.'))
+
+ categories = self._html_search_meta(
+ 'keywords', webpage, 'categories', fatal=False)
+ if categories:
+ categories = [category.strip() for category in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'average_rating': average_rating,
+ 'categories': categories,
+ 'age_limit': 18,
+ }
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e1761265c..52f0dd09a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True):
# Replace commas
date_str = date_str.replace(',', ' ')
# %z (UTC offset) is only supported in python>=3.2
- date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
+ if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
+ date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
@@ -759,6 +760,7 @@ def unified_strdate(date_str, day_first=True):
]
if day_first:
format_expressions.extend([
+ '%d-%m-%Y',
'%d.%m.%Y',
'%d/%m/%Y',
'%d/%m/%y',
@@ -766,6 +768,7 @@ def unified_strdate(date_str, day_first=True):
])
else:
format_expressions.extend([
+ '%m-%d-%Y',
'%m.%d.%Y',
'%m/%d/%Y',
'%m/%d/%y',