aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/vimple.py
blob: 92321d66e369626c0adfeda6cb4fae282a6f7abb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import int_or_none


class SprutoBaseIE(InfoExtractor):
    def _extract_spruto(self, spruto, video_id):
        playlist = spruto['playlist'][0]
        title = playlist['title']
        video_id = playlist.get('videoId') or video_id
        thumbnail = playlist.get('posterUrl') or playlist.get('thumbnailUrl')
        duration = int_or_none(playlist.get('duration'))

        formats = [{
            'url': f['url'],
        } for f in playlist['video']]
        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': title,
            'thumbnail': thumbnail,
            'duration': duration,
            'formats': formats,
        }


class VimpleIE(SprutoBaseIE):
    IE_DESC = 'Vimple - one-click video hosting'
    _VALID_URL = r'https?://(?:player\.vimple\.ru/iframe|vimple\.ru)/(?P<id>[\da-f-]{32,36})'
    _TESTS = [
        {
            'url': 'http://vimple.ru/c0f6b1687dcd4000a97ebe70068039cf',
            'md5': '2e750a330ed211d3fd41821c6ad9a279',
            'info_dict': {
                'id': 'c0f6b168-7dcd-4000-a97e-be70068039cf',
                'ext': 'mp4',
                'title': 'Sunset',
                'duration': 20,
                'thumbnail': 're:https?://.*?\.jpg',
            },
        }, {
            'url': 'http://player.vimple.ru/iframe/52e1beec-1314-4a83-aeac-c61562eadbf9',
            'only_matching': True,
        }
    ]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://player.vimple.ru/iframe/%s' % video_id, video_id)

        spruto = self._parse_json(
            self._search_regex(
                r'sprutoData\s*:\s*({.+?}),\r\n', webpage, 'spruto data'),
            video_id)

        return self._extract_spruto(spruto, video_id)