diff options
| author | Sergey M․ <dstftw@gmail.com> | 2015-04-04 16:45:41 +0600 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2015-04-04 16:45:41 +0600 | 
| commit | 7cf97daf77f6419f2b965a199a3fb1e63b8771b5 (patch) | |
| tree | eb6e49896451f809e5bbe0eb9c6b3f1b93e936fe | |
| parent | cce23e43a94bb73b7ed4815c2f517a682457d1c4 (diff) | |
[radiojavan] Simplify and extract upload date
| -rw-r--r-- | youtube_dl/extractor/radiojavan.py | 75 | 
1 files changed, 35 insertions, 40 deletions
| diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dl/extractor/radiojavan.py index de90f9270..73ab78d6d 100644 --- a/youtube_dl/extractor/radiojavan.py +++ b/youtube_dl/extractor/radiojavan.py @@ -1,12 +1,14 @@ -# coding: utf-8  from __future__ import unicode_literals +import re +  from .common import InfoExtractor  from ..utils import( -    parse_duration, -    str_to_int +    unified_strdate, +    str_to_int,  ) +  class RadioJavanIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'      _TEST = { @@ -16,56 +18,49 @@ class RadioJavanIE(InfoExtractor):              'id': 'chaartaar-ashoobam',              'ext': 'mp4',              'title': 'Chaartaar - Ashoobam', -            'description': 'Chaartaar - Ashoobam',              'thumbnail': 're:^https?://.*\.jpe?g$', +            'upload_date': '20150215', +            'view_count': int, +            'like_count': int, +            'dislike_count': int,          }      }      def _real_extract(self, url): -        display_id = self._match_id(url) -        webpage = self._download_webpage(url, display_id) -        urls = list() -        prefix = 'https://media.rdjavan.com/media/music_video/' +        video_id = self._match_id(url) -        video_url_480 = self._search_regex( -            r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False) -        video_url_720 = self._search_regex( -            r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False) -        video_url_1080 = self._search_regex( -            r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False) +        webpage = self._download_webpage(url, video_id) -        if video_url_480: -            urls.append({'url': prefix + video_url_480, 'format': '480p'}) -        if video_url_720: -            urls.append({'url': prefix + video_url_720, 'format': '720p'}) -        if video_url_1080: -            urls.append({'url': prefix + video_url_1080, 'format': '1080p'}) +        formats = [{ +            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path, +            'format_id': '%sp' % height, +            'height': height, +        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]          title = self._og_search_title(webpage)          thumbnail = self._og_search_thumbnail(webpage) -        formats = [{ -            'url': url['url'], -            'format': url['format'] -        } for url in urls] -        likes = self._search_regex( -            r'<span class="rating">([\d,]+)\s*likes</span>', webpage, 'Likes Count', fatal=False ) -        likes = likes.replace(',', '') -        dislikes = self._search_regex( -            r'<span class="rating">([\d,]+)\s*dislikes</span>', webpage, 'Dislikes Count', fatal=False ) -        dislikes = dislikes.replace(',', '') +        upload_date = unified_strdate(self._search_regex( +            r'class="date_added">Date added: ([^<]+)<', +            webpage, 'upload date', fatal=False)) -        plays = self._search_regex( -            r'views_publish[">\s]*<span[^>]+class="views">Plays: ([\d,]+)</span>', webpage, 'Play Count', fatal=False ) -        plays = plays.replace(',', '') +        view_count = str_to_int(self._search_regex( +            r'class="views">Plays: ([\d,]+)', +            webpage, 'view count', fatal=False)) +        like_count = str_to_int(self._search_regex( +            r'class="rating">([\d,]+) likes', +            webpage, 'like count', fatal=False)) +        dislike_count = str_to_int(self._search_regex( +            r'class="rating">([\d,]+) dislikes', +            webpage, 'dislike count', fatal=False))          return { -            'formats': formats, -            'id': display_id, +            'id': video_id,              'title': title, -            'description': title, # no description provided in RadioJavan              'thumbnail': thumbnail, -            'like_count': str_to_int(likes), -            'dislike_count': str_to_int(dislikes), -            'viewCount': str_to_int(plays) -        }
\ No newline at end of file +            'upload_date': upload_date, +            'view_count': view_count, +            'like_count': like_count, +            'dislike_count': dislike_count, +            'formats': formats, +        } | 
