diff options
| author | Sergey M․ <dstftw@gmail.com> | 2018-07-30 03:05:36 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2018-07-30 03:05:36 +0700 | 
| commit | 9d1b213845f35af4de40dd057754f8f285091bfa (patch) | |
| tree | 6bb2512e13966a4f1c5b4b66069f6204a393fd2b | |
| parent | 548482841867a16d3f68e18f78091e59f768a880 (diff) | |
[viqeo] Add extractor (closes #17066)
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/viqeo.py | 99 | 
3 files changed, 115 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 29fab5b9a..c7a91a986 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1291,6 +1291,7 @@ from .viki import (      VikiIE,      VikiChannelIE,  ) +from .viqeo import ViqeoIE  from .viu import (      ViuIE,      ViuPlaylistIE, diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e5a8ffbe8..43218c3a4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -113,6 +113,7 @@ from .peertube import PeerTubeIE  from .indavideo import IndavideoEmbedIE  from .apa import APAIE  from .foxnews import FoxNewsIE +from .viqeo import ViqeoIE  class GenericIE(InfoExtractor): @@ -2060,6 +2061,15 @@ class GenericIE(InfoExtractor):              },              'skip': 'TODO: fix nested playlists processing in tests',          }, +        { +            # Viqeo embeds +            'url': 'https://viqeo.tv/', +            'info_dict': { +                'id': 'viqeo', +                'title': 'All-new video platform', +            }, +            'playlist_count': 6, +        },          # {          #     # TODO: find another test          #     # http://schema.org/VideoObject @@ -3094,6 +3104,11 @@ class GenericIE(InfoExtractor):              return self.playlist_from_matches(                  sharevideos_urls, video_id, video_title) +        viqeo_urls = ViqeoIE._extract_urls(webpage) +        if viqeo_urls: +            return self.playlist_from_matches( +                viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key()) +          # Look for HTML5 media          entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')          if entries: diff --git a/youtube_dl/extractor/viqeo.py b/youtube_dl/extractor/viqeo.py new file mode 100644 index 000000000..be7dfa814 --- /dev/null +++ b/youtube_dl/extractor/viqeo.py @@ -0,0 +1,99 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    str_or_none, +    url_or_none, +) + + +class ViqeoIE(InfoExtractor): +    _VALID_URL = r'''(?x) +                        (?: +                            viqeo:| +                            https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=| +                            https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])= +                        ) +                        (?P<id>[\da-f]+) +                    ''' +    _TESTS = [{ +        'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837', +        'md5': 'a169dd1a6426b350dca4296226f21e76', +        'info_dict': { +            'id': 'cde96f09d25f39bee837', +            'ext': 'mp4', +            'title': 'cde96f09d25f39bee837', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 76, +        }, +    }, { +        'url': 'viqeo:cde96f09d25f39bee837', +        'only_matching': True, +    }, { +        'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112', +        'only_matching': True, +    }] + +    @staticmethod +    def _extract_urls(webpage): +        return [ +            mobj.group('url') +            for mobj in re.finditer( +                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1', +                webpage)] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        webpage = self._download_webpage( +            'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id) + +        data = self._parse_json( +            self._search_regex( +                r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'), +            video_id) + +        formats = [] +        thumbnails = [] +        for media_file in data['mediaFiles']: +            if not isinstance(media_file, dict): +                continue +            media_url = url_or_none(media_file.get('url')) +            if not media_url or not media_url.startswith(('http', '//')): +                continue +            media_type = str_or_none(media_file.get('type')) +            if not media_type: +                continue +            media_kind = media_type.split('/')[0].lower() +            f = { +                'url': media_url, +                'width': int_or_none(media_file.get('width')), +                'height': int_or_none(media_file.get('height')), +            } +            format_id = str_or_none(media_file.get('quality')) +            if media_kind == 'image': +                f['id'] = format_id +                thumbnails.append(f) +            elif media_kind in ('video', 'audio'): +                is_audio = media_kind == 'audio' +                f.update({ +                    'format_id': 'audio' if is_audio else format_id, +                    'fps': int_or_none(media_file.get('fps')), +                    'vcodec': 'none' if is_audio else None, +                }) +                formats.append(f) +        self._sort_formats(formats) + +        duration = int_or_none(data.get('duration')) + +        return { +            'id': video_id, +            'title': video_id, +            'duration': duration, +            'thumbnails': thumbnails, +            'formats': formats, +        } | 
