diff options
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/videopress.py | 99 | 
3 files changed, 122 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 32420937c..cf608faee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1095,6 +1095,7 @@ from .videomore import (      VideomoreSeasonIE,  )  from .videopremium import VideoPremiumIE +from .videopress import VideoPressIE  from .vidio import VidioIE  from .vidme import (      VidmeIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a23486620..4156cf27d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -81,6 +81,7 @@ from .videa import VideaIE  from .twentymin import TwentyMinutenIE  from .ustream import UstreamIE  from .openload import OpenloadIE +from .videopress import VideoPressIE  class GenericIE(InfoExtractor): @@ -1473,6 +1474,21 @@ class GenericIE(InfoExtractor):                  'skip_download': True,              },              'add_ie': [TwentyMinutenIE.ie_key()], +        }, +        { +            # VideoPress embed +            'url': 'https://en.support.wordpress.com/videopress/', +            'info_dict': { +                'id': 'OcobLTqC', +                'ext': 'm4v', +                'title': 'IMG_5786', +                'timestamp': 1435711927, +                'upload_date': '20150701', +            }, +            'params': { +                'skip_download': True, +            }, +            'add_ie': [VideoPressIE.ie_key()],          }          # {          #     # TODO: find another test @@ -2438,6 +2454,12 @@ class GenericIE(InfoExtractor):              return _playlist_from_matches(                  openload_urls, ie=OpenloadIE.ie_key()) +        # Look for VideoPress embeds +        videopress_urls = VideoPressIE._extract_urls(webpage) +        if videopress_urls: +            return _playlist_from_matches( +                videopress_urls, ie=VideoPressIE.ie_key()) +          # Looking for http://schema.org/VideoObject          json_ld = self._search_json_ld(              webpage, video_id, default={}, expected_type='VideoObject') diff --git a/youtube_dl/extractor/videopress.py b/youtube_dl/extractor/videopress.py new file mode 100644 index 000000000..049db25a5 --- /dev/null +++ b/youtube_dl/extractor/videopress.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import random +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    determine_ext, +    float_or_none, +    parse_age_limit, +    qualities, +    try_get, +    unified_timestamp, +    urljoin, +) + + +class VideoPressIE(InfoExtractor): +    _VALID_URL = r'https?://videopress\.com/embed/(?P<id>[\da-zA-Z]+)' +    _TESTS = [{ +        'url': 'https://videopress.com/embed/kUJmAcSf', +        'md5': '706956a6c875873d51010921310e4bc6', +        'info_dict': { +            'id': 'kUJmAcSf', +            'ext': 'mp4', +            'title': 'VideoPress Demo', +            'thumbnail': r're:^https?://.*\.jpg', +            'duration': 634.6, +            'timestamp': 1434983935, +            'upload_date': '20150622', +            'age_limit': 0, +        }, +    }, { +        # 17+, requires birth_* params +        'url': 'https://videopress.com/embed/iH3gstfZ', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_urls(webpage): +        return re.findall( +            r'<iframe[^>]+src=["\']((?:https?://)?videopress\.com/embed/[\da-zA-Z]+)', +            webpage) + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        video = self._download_json( +            'https://public-api.wordpress.com/rest/v1.1/videos/%s' % video_id, +            video_id, query={ +                'birth_month': random.randint(1, 12), +                'birth_day': random.randint(1, 31), +                'birth_year': random.randint(1950, 1995), +            }) + +        title = video['title'] + +        def base_url(scheme): +            return try_get( +                video, lambda x: x['file_url_base'][scheme], compat_str) + +        base_url = base_url('https') or base_url('http') + +        QUALITIES = ('std', 'dvd', 'hd') +        quality = qualities(QUALITIES) + +        formats = [] +        for format_id, f in video['files'].items(): +            if not isinstance(f, dict): +                continue +            for ext, path in f.items(): +                if ext in ('mp4', 'ogg'): +                    formats.append({ +                        'url': urljoin(base_url, path), +                        'format_id': '%s-%s' % (format_id, ext), +                        'ext': determine_ext(path, ext), +                        'quality': quality(format_id), +                    }) +        original_url = try_get(video, lambda x: x['original'], compat_str) +        if original_url: +            formats.append({ +                'url': original_url, +                'format_id': 'original', +                'quality': len(QUALITIES), +            }) +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': video.get('description'), +            'thumbnail': video.get('poster'), +            'duration': float_or_none(video.get('duration'), 1000), +            'timestamp': unified_timestamp(video.get('upload_date')), +            'age_limit': parse_age_limit(video.get('rating')), +            'formats': formats, +        } | 
