diff options
Diffstat (limited to 'youtube_dl/extractor/rutube.py')
| -rw-r--r-- | youtube_dl/extractor/rutube.py | 84 | 
1 files changed, 84 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/rutube.py b/youtube_dl/extractor/rutube.py index 889fa7628..a6b17c0ef 100644 --- a/youtube_dl/extractor/rutube.py +++ b/youtube_dl/extractor/rutube.py @@ -7,10 +7,14 @@ import itertools  from .common import InfoExtractor  from ..compat import (      compat_str, +    compat_parse_qs, +    compat_urllib_parse_urlparse,  )  from ..utils import (      determine_ext,      unified_strdate, +    try_get, +    int_or_none,  ) @@ -42,8 +46,24 @@ class RutubeIE(InfoExtractor):      }, {          'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661',          'only_matching': True, +    }, { +        'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', +        'only_matching': True,      }] +    @classmethod +    def suitable(cls, url): +        parts = compat_urllib_parse_urlparse(url) +        params = compat_parse_qs(parts.query) + +        # see if URL without parameters is OK +        res = super(RutubeIE, cls).suitable(url) + +        if params:  # we only allow pl_id parameter in the url +            res = res and 'pl_id' in params and len(params) == 1 + +        return res +      @staticmethod      def _extract_urls(webpage):          return [mobj.group('url') for mobj in re.finditer( @@ -193,3 +213,67 @@ class RutubePersonIE(RutubeChannelIE):      }]      _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' + + +class RutubePlaylistIE(InfoExtractor): +    IE_NAME = 'rutube:playlist' +    IE_DESC = 'Rutube playlists' +    _TESTS = [{ +        'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', +        'info_dict': { +            'id': '4252', +        }, +        'playlist_count': 25, +    }] + +    _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?(?:.+)?pl_id=(?P<id>\d+)' +    _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/source/%s/?page=%s' + +    @staticmethod +    def suitable(url): +        params = compat_parse_qs(compat_urllib_parse_urlparse(url).query) +        return params.get('pl_id') and int_or_none(params['pl_id'][0]) \ +            and params.get('pl_type') + +    def _real_extract(self, url): +        playlist_id = self._match_id(url) +        return self._extract_playlist(playlist_id) + +    def _extract_playlist(self, playlist_id): +        entries = [] +        for pagenum in itertools.count(1): +            page_url = self._PAGE_TEMPLATE % (playlist_id, pagenum) + +            # download_json will sent an accept: application/xml header +            page = self._download_json(page_url, playlist_id, +                                       "Downloading metadata for page %s" % pagenum, +                                       headers={'Accept': 'application/json'}) + +            if not page['results']: +                break + +            results = page['results'] +            for result in results: +                entry = self.url_result(result.get('video_url'), 'Rutube') +                category = try_get(result, lambda x: x['category']['name']) +                entry.update({ +                    'id': result.get('id'), +                    'uploader': try_get(result, lambda x: x['author']['name']), +                    'uploader_id': try_get(result, lambda x: x['author']['id']), +                    'upload_date': unified_strdate(result.get('created_ts')), +                    'title': result.get('title'), +                    'description': result.get('description'), +                    'thumbnail': result.get('thumbnail_url'), +                    'duration': int_or_none(result.get('duration')), +                    'category': [category] if category else None, +                    'age_limit': 18 if result.get('is_adult') else 0, +                    'view_count': int_or_none(result.get('hits')), +                    'is_live': result.get('is_livestream'), +                    'webpage_url': result.get('video_url'), +                }) +                entries.append(entry) + +            if page['has_next'] is False: +                break + +        return self.playlist_result(entries, playlist_id, page['name']) | 
