diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-04-04 19:14:01 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-04-04 19:14:01 +0600 | 
| commit | 3fde134791fa920c98a99610c2200a6b3572cca0 (patch) | |
| tree | 21d7a70a9c8e6d23a30438177bf4fe320a1480db | |
| parent | 536b94e56fdf0fd2dd1c6d6d92112866949505c4 (diff) | |
| parent | 7c39a65543b809b681434246b84710349f5837aa (diff) | |
Merge branch 'Roman2K-pornovoisines'
| -rw-r--r-- | test/test_utils.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/pornovoisines.py | 96 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 5 | 
4 files changed, 102 insertions, 1 deletions
| diff --git a/test/test_utils.py b/test/test_utils.py index 4e524aca3..2e3a6480c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -227,6 +227,7 @@ class TestUtil(unittest.TestCase):          self.assertEqual(              unified_strdate('2/2/2015 6:47:40 PM', day_first=False),              '20150202') +        self.assertEqual(unified_strdate('25-09-2014'), '20140925')      def test_find_xpath_attr(self):          testxml = '''<root> diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index c05a43641..0f7d44616 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -388,6 +388,7 @@ from .pornhub import (      PornHubPlaylistIE,  )  from .pornotube import PornotubeIE +from .pornovoisines import PornoVoisinesIE  from .pornoxo import PornoXOIE  from .primesharetv import PrimeShareTVIE  from .promptfile import PromptFileIE diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dl/extractor/pornovoisines.py new file mode 100644 index 000000000..9688ed948 --- /dev/null +++ b/youtube_dl/extractor/pornovoisines.py @@ -0,0 +1,96 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re +import random + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    float_or_none, +    unified_strdate, +) + + +class PornoVoisinesIE(InfoExtractor): +    _VALID_URL = r'http://(?:www\.)?pornovoisines\.com/showvideo/(?P<id>\d+)/(?P<display_id>[^/]+)' + +    _VIDEO_URL_TEMPLATE = 'http://stream%d.pornovoisines.com' \ +        '/static/media/video/transcoded/%s-640x360-1000-trscded.mp4' + +    _SERVER_NUMBERS = (1, 2) + +    _TEST = { +        'url': 'http://www.pornovoisines.com/showvideo/1285/recherche-appartement/', +        'md5': '5ac670803bc12e9e7f9f662ce64cf1d1', +        'info_dict': { +            'id': '1285', +            'display_id': 'recherche-appartement', +            'ext': 'mp4', +            'title': 'Recherche appartement', +            'description': 'md5:819ea0b785e2a04667a1a01cdc89594e', +            'thumbnail': 're:^https?://.*\.jpg$', +            'upload_date': '20140925', +            'duration': 120, +            'view_count': int, +            'average_rating': float, +            'categories': ['Débutante', 'Scénario', 'Sodomie'], +            'age_limit': 18, +        } +    } + +    @classmethod +    def build_video_url(cls, num): +        return cls._VIDEO_URL_TEMPLATE % (random.choice(cls._SERVER_NUMBERS), num) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        display_id = mobj.group('display_id') + +        webpage = self._download_webpage(url, video_id) + +        video_url = self.build_video_url(video_id) + +        title = self._html_search_regex( +            r'<h1>(.+?)</h1>', webpage, 'title', flags=re.DOTALL) +        description = self._html_search_regex( +            r'<article id="descriptif">(.+?)</article>', +            webpage, "description", fatal=False, flags=re.DOTALL) + +        thumbnail = self._search_regex( +            r'<div id="mediaspace%s">\s*<img src="/?([^"]+)"' % video_id, +            webpage, 'thumbnail', fatal=False) +        if thumbnail: +            thumbnail = 'http://www.pornovoisines.com/%s' % thumbnail + +        upload_date = unified_strdate(self._search_regex( +            r'Publié le ([\d-]+)', webpage, 'upload date', fatal=False)) +        duration = int_or_none(self._search_regex( +            'Durée (\d+)', webpage, 'duration', fatal=False)) +        view_count = int_or_none(self._search_regex( +            r'(\d+) vues', webpage, 'view count', fatal=False)) +        average_rating = self._search_regex( +            r'Note : (\d+,\d+)', webpage, 'average rating', fatal=False) +        if average_rating: +            average_rating = float_or_none(average_rating.replace(',', '.')) + +        categories = self._html_search_meta( +            'keywords', webpage, 'categories', fatal=False) +        if categories: +            categories = [category.strip() for category in categories.split(',')] + +        return { +            'id': video_id, +            'display_id': display_id, +            'url': video_url, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'upload_date': upload_date, +            'duration': duration, +            'view_count': view_count, +            'average_rating': average_rating, +            'categories': categories, +            'age_limit': 18, +        } diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e1761265c..52f0dd09a 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -730,7 +730,8 @@ def unified_strdate(date_str, day_first=True):      # Replace commas      date_str = date_str.replace(',', ' ')      # %z (UTC offset) is only supported in python>=3.2 -    date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) +    if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): +        date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)      # Remove AM/PM + timezone      date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) @@ -759,6 +760,7 @@ def unified_strdate(date_str, day_first=True):      ]      if day_first:          format_expressions.extend([ +            '%d-%m-%Y',              '%d.%m.%Y',              '%d/%m/%Y',              '%d/%m/%y', @@ -766,6 +768,7 @@ def unified_strdate(date_str, day_first=True):          ])      else:          format_expressions.extend([ +            '%m-%d-%Y',              '%m.%d.%Y',              '%m/%d/%Y',              '%m/%d/%y', | 
