diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/discoverygo.py | 98 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | 
2 files changed, 99 insertions, 0 deletions
diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py new file mode 100644 index 000000000..adb68b96c --- /dev/null +++ b/youtube_dl/extractor/discoverygo.py @@ -0,0 +1,98 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( +    extract_attributes, +    int_or_none, +    parse_age_limit, +    unescapeHTML, +) + + +class DiscoveryGoIE(InfoExtractor): +    _VALID_URL = r'https?://(?:www\.)?discoverygo\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' +    _TEST = { +        'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', +        'info_dict': { +            'id': '57a33c536b66d1cd0345eeb1', +            'ext': 'mp4', +            'title': 'Kiss First, Ask Questions Later!', +            'description': 'md5:fe923ba34050eae468bffae10831cb22', +            'duration': 2579, +            'series': 'Love at First Kiss', +            'season_number': 1, +            'episode_number': 1, +            'age_limit': 14, +        }, +    } + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        container = extract_attributes( +            self._search_regex( +                r'(<div[^>]+class=["\']video-player-container[^>]+>)', +                webpage, 'video container')) + +        video = self._parse_json( +            unescapeHTML(container.get('data-video') or container.get('data-json')), +            display_id) + +        title = video['name'] + +        stream = video['stream'] +        STREAM_URL_SUFFIX = 'streamUrl' +        formats = [] +        for stream_kind in ('', 'hds'): +            suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX +            stream_url = stream.get('%s%s' % (stream_kind, suffix)) +            if not stream_url: +                continue +            if stream_kind == '': +                formats.extend(self._extract_m3u8_formats( +                    stream_url, display_id, 'mp4', entry_protocol='m3u8_native', +                    m3u8_id='hls', fatal=False)) +            elif stream_kind == 'hds': +                formats.extend(self._extract_f4m_formats( +                    stream_url, display_id, f4m_id=stream_kind, fatal=False)) +        self._sort_formats(formats) + +        video_id = video.get('id') or display_id +        description = video.get('description', {}).get('detailed') +        duration = int_or_none(video.get('duration')) + +        series = video.get('show', {}).get('name') +        season_number = int_or_none(video.get('season', {}).get('number')) +        episode_number = int_or_none(video.get('episodeNumber')) + +        tags = video.get('tags') +        age_limit = parse_age_limit(video.get('parental', {}).get('rating')) + +        subtitles = {} +        captions = stream.get('captions') +        if isinstance(captions, list): +            for caption in captions: +                subtitle_url = caption.get('fileUrl') +                if (not subtitle_url or not isinstance(subtitle_url, compat_str) or +                        not subtitle_url.startswith('http')): +                    continue +                lang = caption.get('fileLang', 'en') +                subtitles.setdefault(lang, []).append({'url': subtitle_url}) + +        return { +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'duration': duration, +            'series': series, +            'season_number': season_number, +            'episode_number': episode_number, +            'tags': tags, +            'age_limit': age_limit, +            'formats': formats, +            'subtitles': subtitles, +        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 11b64eeaa..c2c4617ee 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -221,6 +221,7 @@ from .dvtv import DVTVIE  from .dumpert import DumpertIE  from .defense import DefenseGouvFrIE  from .discovery import DiscoveryIE +from .discoverygo import DiscoveryGoIE  from .dispeak import DigitallySpeakingIE  from .dropbox import DropboxIE  from .dw import (  | 
