diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-02-05 13:37:27 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-02-05 13:37:27 +0700 | 
| commit | 6ef3e65a7b244d5e432e764772177c7d48cab237 (patch) | |
| tree | 04e9dab315cc8af3a0e9a0f6eba475769d25a0e3 /youtube_dl/extractor/videopress.py | |
| parent | 6fd138bed892ac8ae1714d64f4a53d8ea7a1d5bb (diff) | |
[videopress] Add extractor
Diffstat (limited to 'youtube_dl/extractor/videopress.py')
| -rw-r--r-- | youtube_dl/extractor/videopress.py | 99 | 
1 files changed, 99 insertions, 0 deletions
diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py new file mode 100644 index 000000000..049db25a5 --- /dev/null +++ b/youtube_dl/extractor/videopress.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    determine_ext, +    float_or_none, +    parse_age_limit, +    qualities, +    try_get, +    unified_timestamp, +    urljoin, +) + + +class VideoPressIE(InfoExtractor): +    _VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)' +    _TESTS = [{ +        'url': 'https://videopress.com/embed/kUJmAcSf', +        'md5': '706956a6c875873d51010921310e4bc6', +        'info_dict': { +            'id': 'kUJmAcSf', +            'ext': 'mp4', +            'title': 'VideoPress Demo', +            'thumbnail': r're:^https?://.*\.jpg', +            'duration': 634.6, +            'timestamp': 1434983935, +            'upload_date': '20150622', +            'age_limit': 0, +        }, +    }, { +        # 17+, requires birth_* params +        'url': 'https://videopress.com/embed/iH3gstfZ', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_urls(webpage): +        return re.findall( +            r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)', +            webpage) + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        video = self._download_json( +            'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, +            video_id, query={ +                'birth_month': random.randint(1, 12), +                'birth_day': random.randint(1, 31), +                'birth_year': random.randint(1950, 1995), +            }) + +        title = video['title'] + +        def base_url(scheme): +            return try_get( +                video, lambda x: x['file_url_base'][scheme], compat_str) + +        base_url = base_url('https') or base_url('http') + +        QUALITIES = ('std', 'dvd', 'hd') +        quality = qualities(QUALITIES) + +        formats = [] +        for format_id, f in video['files'].items(): +            if not isinstance(f, dict): +                continue +            for ext, path in f.items(): +                if ext in ('mp4', 'ogg'): +                    formats.append({ +                        'url': urljoin(base_url, path), +                        'format_id': '%s-%s' % (format_id, ext), +                        'ext': determine_ext(path, ext), +                        'quality': quality(format_id), +                    }) +        original_url = try_get(video, lambda x: x['original'], compat_str) +        if original_url: +            formats.append({ +                'url': original_url, +                'format_id': 'original', +                'quality': len(QUALITIES), +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': video.get('description'), +            'thumbnail': video.get('poster'), +            'duration': float_or_none(video.get('duration'), 1000), +            'timestamp': unified_timestamp(video.get('upload_date')), +            'age_limit': parse_age_limit(video.get('rating')), +            'formats': formats, +        }  | 
