diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-21 19:36:33 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-04-21 19:36:33 +0800 | 
| commit | ec59d657e7d898cce8f3a1b6556a79fd9495fc9d (patch) | |
| tree | 36f7afa9329156e8719d0970c93f153cbffd023b /youtube_dl/extractor/dispeak.py | |
| parent | 99ef96f84c2c9fa1267d6edb836c8f1945295424 (diff) | |
[dispeak] Add new extractor
Both GDCVault and GPUTechConf uses the service of DigitalSpeaking.
Diffstat (limited to 'youtube_dl/extractor/dispeak.py')
| -rw-r--r-- | youtube_dl/extractor/dispeak.py | 111 | 
1 files changed, 111 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py new file mode 100644 index 000000000..6ebc3255a --- /dev/null +++ b/youtube_dl/extractor/dispeak.py @@ -0,0 +1,111 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    parse_duration, +    remove_end, +    xpath_element, +    xpath_text, +) + + +class DigitalSpeakingIE(InfoExtractor): +    _VALID_URL = r'http://evt.dispeak.com/([^/]+/)+xml/(?P<id>[^.]+).xml' + +    _TEST = { +        # From http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml +        'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml', +        'md5': 'a8efb6c31ed06ca8739294960b2dbabd', +        'info_dict': { +            'id': '840376_BQRC', +            'ext': 'mp4', +            'title': 'Tenacious Design and The Interface of \'Destiny\'', +        }, +    } + +    def _parse_mp4(self, metadata): +        video_formats = [] +        video_root = None + +        mp4_video = xpath_text(metadata, './mp4video', default=None) +        if mp4_video is not None: +            mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video) +            video_root = mobj.group('root') +        if video_root is None: +            http_host = xpath_text(metadata, 'httpHost', default=None) +            if http_host: +                video_root = 'http://%s/' % http_host +        if video_root is None: +            # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js +            # Works for GPUTechConf, too +            video_root = 'http://s3-2u.digitallyspeaking.com/' + +        formats = metadata.findall('./MBRVideos/MBRVideo') +        if not formats: +            return None +        for a_format in formats: +            stream_name = xpath_text(a_format, 'streamName', fatal=True) +            video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path') +            url = video_root + video_path +            vbr = xpath_text(a_format, 'bitrate') +            video_formats.append({ +                'url': url, +                'vbr': int_or_none(vbr), +            }) +        return video_formats + +    def _parse_flv(self, metadata): +        formats = [] +        akamai_url = xpath_text(metadata, './akamaiHost', fatal=True) +        audios = metadata.find('./audios') +        if audios is not None: +            for audio in audios: +                formats.append({ +                    'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, +                    'play_path': remove_end(audio.get('url'), '.flv'), +                    'ext': 'flv', +                    'vcodec': 'none', +                    'format_id': audio.get('code'), +                }) +        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) +        formats.append({ +            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, +            'play_path': remove_end(slide_video_path, '.flv'), +            'ext': 'flv', +            'format_note': 'slide deck video', +            'quality': -2, +            'preference': -2, +            'format_id': 'slides', +        }) +        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) +        formats.append({ +            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, +            'play_path': remove_end(speaker_video_path, '.flv'), +            'ext': 'flv', +            'format_note': 'speaker video', +            'quality': -1, +            'preference': -1, +            'format_id': 'speaker', +        }) +        return formats + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        xml_description = self._download_xml(url, video_id) +        metadata = xpath_element(xml_description, 'metadata') + +        video_formats = self._parse_mp4(metadata) +        if video_formats is None: +            video_formats = self._parse_flv(metadata) + +        return { +            'id': video_id, +            'formats': video_formats, +            'title': xpath_text(metadata, 'title', fatal=True), +            'duration': parse_duration(xpath_text(metadata, 'endTime')), +            'creator': xpath_text(metadata, 'speaker'), +        } | 
