diff options
Diffstat (limited to 'youtube_dl/extractor/rutube.py')
| -rw-r--r-- | youtube_dl/extractor/rutube.py | 118 | 
1 files changed, 92 insertions, 26 deletions
| diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index e3e9bc07f..4922dd764 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,58 +1,124 @@  # encoding: utf-8 +from __future__ import unicode_literals +  import re  import json +import itertools  from .common import InfoExtractor  from ..utils import ( -    compat_urlparse,      compat_str, +    unified_strdate,      ExtractorError,  )  class RutubeIE(InfoExtractor): -    _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)' +    IE_NAME = 'rutube' +    IE_DESC = 'Rutube videos' +    _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'      _TEST = { -        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', -        u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4', -        u'info_dict': { -            u'title': u'Раненный кенгуру забежал в аптеку', -            u'uploader': u'NTDRussian', -            u'uploader_id': u'29790', +        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', +        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', +        'info_dict': { +            'title': 'Раненный кенгуру забежал в аптеку', +            'description': 'http://www.ntdtv.ru ', +            'duration': 80, +            'uploader': 'NTDRussian', +            'uploader_id': '29790', +            'upload_date': '20131016',          }, -        u'params': { +        'params': {              # It requires ffmpeg (m3u8 download) -            u'skip_download': True, +            'skip_download': True,          },      } -    def _get_api_response(self, short_id, subpath): -        api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id) -        response_json = self._download_webpage(api_url, short_id, -            u'Downloading %s json' % subpath) -        return json.loads(response_json) -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        long_id = mobj.group('long_id') -        webpage = self._download_webpage(url, long_id) -        og_video = self._og_search_video_url(webpage) -        short_id = compat_urlparse.urlparse(og_video).path[1:] -        options = self._get_api_response(short_id, 'options') -        trackinfo = self._get_api_response(short_id, 'trackinfo') +        video_id = mobj.group('id') +         +        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id, +                                              video_id, 'Downloading video JSON') +        video = json.loads(api_response) +         +        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, +                                              video_id, 'Downloading trackinfo JSON') +        trackinfo = json.loads(api_response) +                  # Some videos don't have the author field          author = trackinfo.get('author') or {}          m3u8_url = trackinfo['video_balancer'].get('m3u8')          if m3u8_url is None: -            raise ExtractorError(u'Couldn\'t find m3u8 manifest url') +            raise ExtractorError('Couldn\'t find m3u8 manifest url')          return { -            'id': trackinfo['id'], -            'title': trackinfo['title'], +            'id': video['id'], +            'title': video['title'], +            'description': video['description'], +            'duration': video['duration'], +            'view_count': video['hits'],              'url': m3u8_url,              'ext': 'mp4', -            'thumbnail': options['thumbnail_url'], +            'thumbnail': video['thumbnail_url'],              'uploader': author.get('name'),              'uploader_id': compat_str(author['id']) if author else None, +            'upload_date': unified_strdate(video['created_ts']), +            'age_limit': 18 if video['is_adult'] else 0,          } + + +class RutubeChannelIE(InfoExtractor): +    IE_NAME = 'rutube:channel' +    IE_DESC = 'Rutube channels' +    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + +    def _extract_videos(self, channel_id, channel_title=None): +        entries = [] +        for pagenum in itertools.count(1): +            api_response = self._download_webpage( +                self._PAGE_TEMPLATE % (channel_id, pagenum), +                channel_id, 'Downloading page %s' % pagenum) +            page = json.loads(api_response) +            results = page['results'] +            if not results: +                break +            entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) +            if not page['has_next']: +                break +        return self.playlist_result(entries, channel_id, channel_title) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        channel_id = mobj.group('id') +        return self._extract_videos(channel_id) + + +class RutubeMovieIE(RutubeChannelIE): +    IE_NAME = 'rutube:movie' +    IE_DESC = 'Rutube movies' +    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' + +    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' +    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        movie_id = mobj.group('id') +        api_response = self._download_webpage( +            self._MOVIE_TEMPLATE % movie_id, movie_id, +            'Downloading movie JSON') +        movie = json.loads(api_response) +        movie_name = movie['name'] +        return self._extract_videos(movie_id, movie_name) + + +class RutubePersonIE(RutubeChannelIE): +    IE_NAME = 'rutube:person' +    IE_DESC = 'Rutube person videos' +    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' | 
