diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-04-04 19:14:01 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-04-04 19:14:01 +0600 |
commit | 3fde134791fa920c98a99610c2200a6b3572cca0 (patch) | |
tree | 21d7a70a9c8e6d23a30438177bf4fe320a1480db | |
parent | 536b94e56fdf0fd2dd1c6d6d92112866949505c4 (diff) | |
parent | 7c39a65543b809b681434246b84710349f5837aa (diff) |
Merge branch 'Roman2K-pornovoisines'
-rw-r--r-- | test/test_utils.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
-rw-r--r-- | youtube_dl/extractor/pornovoisines.py | 96 | ||||
-rw-r--r-- | youtube_dl/utils.py | 5 |
4 files changed, 102 insertions, 1 deletions
diff --git a/test/test_utils.py b/test/test_utils.py index 4e524aca3..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase): self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') + self.assertEqual(unified_strdate('25-09-2014'), '20140925') def test_find_xpath_attr(self): testxml = '''<root> diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c05a43641..0f7d44616 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -388,6 +388,7 @@ from .pornhub import ( PornHubPlaylistIE, ) from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .primesharetv import PrimeShareTVIE from .promptfile import PromptFileIE diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py new file mode 100644 index 000000000..9688ed948 --- /dev/null +++ b/youtube_dl/extractor/pornovoisines.py @@ -0,0 +1,96 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import random + +from .common import InfoExtractor +from ..utils import ( + int_or_none, + float_or_none, + unified_strdate, +) + + +class PornoVoisinesIE(InfoExtractor): + _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' + + _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ + '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' + + _SERVER_NUMBERS = (1, 2) + + _TEST = { + 'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', + 'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', + 'info_dict': { + 'id': '1285', + 'display_id': 'recherche-appartement', + 'ext': 'mp4', + 'title': 'Recherche appartement', + 'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', + 'thumbnail': 're:^https?://.*\.jpg$', + 'upload_date': '20140925', + 'duration': 120, + 'view_count': int, + 'average_rating': float, + 'categories': ['Débutante', 'Scénario', 'Sodomie'], + 'age_limit': 18, + } + } + + @classmethod + def build_video_url(cls, num): + return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + display_id = mobj.group('display_id') + + webpage = self._download_webpage(url, video_id) + + video_url = self.build_video_url(video_id) + + title = self._html_search_regex( + r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL) + description = self._html_search_regex( + r'<article id="descriptif">(.+?)</article>', + webpage, "description", fatal=False, flags=re.DOTALL) + + thumbnail = self._search_regex( + r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id, + webpage, 'thumbnail', fatal=False) + if thumbnail: + thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail + + upload_date = unified_strdate(self._search_regex( + r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) + duration = int_or_none(self._search_regex( + 'Durée (\d+)', webpage, 'duration', fatal=False)) + view_count = int_or_none(self._search_regex( + r'(\d+) vues', webpage, 'view count', fatal=False)) + average_rating = self._search_regex( + r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False) + if average_rating: + average_rating = float_or_none(average_rating.replace(',', '.')) + + categories = self._html_search_meta( + 'keywords', webpage, 'categories', fatal=False) + if categories: + categories = [category.strip() for category in categories.split(',')] + + return { + 'id': video_id, + 'display_id': display_id, + 'url': video_url, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'upload_date': upload_date, + 'duration': duration, + 'view_count': view_count, + 'average_rating': average_rating, + 'categories': categories, + 'age_limit': 18, + } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e1761265c..52f0dd09a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True): # Replace commas date_str = date_str.replace(',', ' ') # %z (UTC offset) is only supported in python>=3.2 - date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) + if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): + date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) # Remove AM/PM + timezone date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) @@ -759,6 +760,7 @@ def unified_strdate(date_str, day_first=True): ] if day_first: format_expressions.extend([ + '%d-%m-%Y', '%d.%m.%Y', '%d/%m/%Y', '%d/%m/%y', @@ -766,6 +768,7 @@ def unified_strdate(date_str, day_first=True): ]) else: format_expressions.extend([ + '%m-%d-%Y', '%m.%d.%Y', '%m/%d/%Y', '%m/%d/%y', |