diff options
| author | dst <dstftw@gmail.com> | 2014-01-28 06:56:09 +0700 | 
|---|---|---|
| committer | dst <dstftw@gmail.com> | 2014-01-28 06:56:09 +0700 | 
| commit | 1547c8cc881b85f9f400f29c30b3a352eb679608 (patch) | |
| tree | b5b10aa751a52b1d73a534302f3638a195a1eed1 | |
| parent | b21a918984ef1eaf551ca78f0a278ed27e8a3f49 (diff) | |
[rutube] Add support for channels and movies
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/rutube.py | 74 | 
2 files changed, 69 insertions, 11 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 1e8556124..19ca5d6b4 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -161,7 +161,11 @@ from .ro220 import Ro220IE  from .rottentomatoes import RottenTomatoesIE  from .roxwel import RoxwelIE  from .rtlnow import RTLnowIE -from .rutube import RutubeIE +from .rutube import ( +    RutubeIE, +    RutubeChannelIE, +    RutubeMovieIE +)  from .servingsys import ServingSysIE  from .sina import SinaIE  from .slashdot import SlashdotIE diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index e3e9bc07f..2001a83ef 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -1,6 +1,9 @@  # encoding: utf-8 +from __future__ import unicode_literals +  import re  import json +import itertools  from .common import InfoExtractor  from ..utils import ( @@ -11,26 +14,28 @@ from ..utils import (  class RutubeIE(InfoExtractor): +    IE_NAME = 'rutube' +    IE_DESC = 'Rutube videos'          _VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)'      _TEST = { -        u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', -        u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4', -        u'info_dict': { -            u'title': u'Раненный кенгуру забежал в аптеку', -            u'uploader': u'NTDRussian', -            u'uploader_id': u'29790', +        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', +        'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4', +        'info_dict': { +            'title': 'Раненный кенгуру забежал в аптеку', +            'uploader': 'NTDRussian', +            'uploader_id': '29790',          }, -        u'params': { +        'params': {              # It requires ffmpeg (m3u8 download) -            u'skip_download': True, +            'skip_download': True,          },      }      def _get_api_response(self, short_id, subpath):          api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)          response_json = self._download_webpage(api_url, short_id, -            u'Downloading %s json' % subpath) +            'Downloading %s json' % subpath)          return json.loads(response_json)      def _real_extract(self, url): @@ -45,7 +50,7 @@ class RutubeIE(InfoExtractor):          author = trackinfo.get('author') or {}          m3u8_url = trackinfo['video_balancer'].get('m3u8')          if m3u8_url is None: -            raise ExtractorError(u'Couldn\'t find m3u8 manifest url') +            raise ExtractorError('Couldn\'t find m3u8 manifest url')          return {              'id': trackinfo['id'], @@ -56,3 +61,52 @@ class RutubeIE(InfoExtractor):              'uploader': author.get('name'),              'uploader_id': compat_str(author['id']) if author else None,          } + + +class RutubeChannelIE(InfoExtractor): +    IE_NAME = 'rutube:channel' +    IE_DESC = 'Rutube channels'     +    _VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)' + +    _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' + +    def _extract_videos(self, channel_id, channel_title=None): +        entries = [] +        for pagenum in itertools.count(1): +            response_json = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum), +                                                   channel_id, 'Downloading page %s' % pagenum) +            page = json.loads(response_json) +            if 'detail' in page and page['detail'] == 'Not found': +                raise ExtractorError('Channel %s does not exist' % channel_id, expected=True) +            results = page['results'] +            if len(results) == 0: +                break; +            entries.extend(self.url_result(v['video_url'], 'Rutube') for v in results) +            if page['has_next'] is False: +                break; +        return self.playlist_result(entries, channel_id, channel_title) + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        channel_id = mobj.group('id') +        return self._extract_videos(channel_id) + + +class RutubeMovieIE(RutubeChannelIE): +    IE_NAME = 'rutube:movie' +    IE_DESC = 'Rutube movies'     +    _VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)' + +    _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' +    _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        movie_id = mobj.group('id') +        movie_json = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id, +                                            'Downloading movie JSON') +        movie = json.loads(movie_json) +        if 'detail' in movie and movie['detail'] == 'Not found': +            raise ExtractorError('Movie %s does not exist' % movie_id, expected=True) +        movie_name = movie['name'] +        return self._extract_videos(movie_id, movie_name)
\ No newline at end of file | 
