diff options
| -rw-r--r-- | test/test_playlists.py | 9 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 7 | ||||
| -rw-r--r-- | youtube_dl/extractor/rutube.py | 113 | 
3 files changed, 102 insertions, 27 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py index a8f4a49f4..b3ce6f71e 100644 --- a/test/test_playlists.py +++ b/test/test_playlists.py @@ -33,6 +33,7 @@ from youtube_dl.extractor import (      ImdbListIE,      KhanAcademyIE,      EveryonesMixtapeIE, +    RutubeChannelIE,  ) @@ -219,6 +220,14 @@ class TestPlaylists(unittest.TestCase):          self.assertEqual(result['id'], 'm7m0jJAbMQi')          self.assertEqual(result['title'], 'Driving')          self.assertEqual(len(result['entries']), 24) +         +    def test_rutube_channel(self): +        dl = FakeYDL() +        ie = RutubeChannelIE(dl) +        result = ie.extract('http://rutube.ru/tags/video/1409') +        self.assertIsPlaylist(result) +        self.assertEqual(result['id'], '1409') +        self.assertTrue(len(result['entries']) >= 34)  if __name__ == '__main__': diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1e8556124..e89b5cf9d 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -161,7 +161,12 @@ from .ro220 import Ro220IE  from .rottentomatoes import RottenTomatoesIE  from .roxwel import RoxwelIE  from .rtlnow import RTLnowIE -from .rutube import RutubeIE +from .rutube import ( +    RutubeIE, +    RutubeChannelIE, +    RutubeMovieIE, +    RutubePersonIE, +)  from .servingsys import ServingSysIE  from .sina import SinaIE  from .slashdot import SlashdotIE diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index e3e9bc07f..9a20facfd 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,58 +1,119 @@  # encoding: utf-8 +from __future__ import unicode_literals +  import re  import json +import itertools  from .common import InfoExtractor  from ..utils import ( -    compat_urlparse,      compat_str, +    unified_strdate,      ExtractorError,  )  class RutubeIE(InfoExtractor): -    _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)' +    IE_NAME = 'rutube' +    IE_DESC = 'Rutube videos'     +    _VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'      _TEST = { -        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', -        u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4', -        u'info_dict': { -            u'title': u'Раненный кенгуру забежал в аптеку', -            u'uploader': u'NTDRussian', -            u'uploader_id': u'29790', +        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', +        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', +        'info_dict': { +            'title': 'Раненный кенгуру забежал в аптеку', +            'uploader': 'NTDRussian', +            'uploader_id': '29790',          }, -        u'params': { +        'params': {              # It requires ffmpeg (m3u8 download) -            u'skip_download': True, +            'skip_download': True,          },      } -    def _get_api_response(self, short_id, subpath): -        api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id) -        response_json = self._download_webpage(api_url, short_id, -            u'Downloading %s json' % subpath) -        return json.loads(response_json) -      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        long_id = mobj.group('long_id') -        webpage = self._download_webpage(url, long_id) -        og_video = self._og_search_video_url(webpage) -        short_id = compat_urlparse.urlparse(og_video).path[1:] -        options = self._get_api_response(short_id, 'options') -        trackinfo = self._get_api_response(short_id, 'trackinfo') +        video_id = mobj.group('id') +         +        api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id, +                                              video_id, 'Downloading video JSON') +        video = json.loads(api_response) +         +        api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id, +                                              video_id, 'Downloading trackinfo JSON') +        trackinfo = json.loads(api_response) +                  # Some videos don't have the author field          author = trackinfo.get('author') or {}          m3u8_url = trackinfo['video_balancer'].get('m3u8')          if m3u8_url is None: -            raise ExtractorError(u'Couldn\'t find m3u8 manifest url') +            raise ExtractorError('Couldn\'t find m3u8 manifest url')          return { -            'id': trackinfo['id'], -            'title': trackinfo['title'], +            'id': video['id'], +            'title': video['title'], +            'description': video['description'], +            'duration': video['duration'], +            'view_count': video['hits'],              'url': m3u8_url,              'ext': 'mp4', -            'thumbnail': options['thumbnail_url'], +            'thumbnail': video['thumbnail_url'],              'uploader': author.get('name'),              'uploader_id': compat_str(author['id']) if author else None, +            'upload_date': unified_strdate(video['created_ts']), +            'age_limit': 18 if video['is_adult'] else 0,          } + + +class RutubeChannelIE(InfoExtractor): +    IE_NAME = 'rutube:channel' +    IE_DESC = 'Rutube channels'     +    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + +    def _extract_videos(self, channel_id, channel_title=None): +        entries = [] +        for pagenum in itertools.count(1): +            api_response = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum), +                                                   channel_id, 'Downloading page %s' % pagenum) +            page = json.loads(api_response) +            results = page['results'] +            if len(results) == 0: +                break; +            entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results) +            if page['has_next'] is False: +                break; +        return self.playlist_result(entries, channel_id, channel_title) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        channel_id = mobj.group('id') +        return self._extract_videos(channel_id) + + +class RutubeMovieIE(RutubeChannelIE): +    IE_NAME = 'rutube:movie' +    IE_DESC = 'Rutube movies'     +    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' + +    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' +    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        movie_id = mobj.group('id') +        api_response = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id, +                                            'Downloading movie JSON') +        movie = json.loads(api_response) +        movie_name = movie['name'] +        return self._extract_videos(movie_id, movie_name) + + +class RutubePersonIE(RutubeChannelIE): +    IE_NAME = 'rutube:person' +    IE_DESC = 'Rutube person videos' +    _VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
\ No newline at end of file  | 
