diff options
| -rw-r--r-- | youtube_dl/extractor/dispeak.py | 111 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/gdcvault.py | 74 | ||||
| -rw-r--r-- | youtube_dl/extractor/gputechconf.py | 36 | 
4 files changed, 123 insertions, 99 deletions
| diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dl/extractor/dispeak.py new file mode 100644 index 000000000..6ebc3255a --- /dev/null +++ b/youtube_dl/extractor/dispeak.py @@ -0,0 +1,111 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    int_or_none, +    parse_duration, +    remove_end, +    xpath_element, +    xpath_text, +) + + +class DigitalSpeakingIE(InfoExtractor): +    _VALID_URL = r'http://evt.dispeak.com/([^/]+/)+xml/(?P<id>[^.]+).xml' + +    _TEST = { +        # From http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml +        'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml', +        'md5': 'a8efb6c31ed06ca8739294960b2dbabd', +        'info_dict': { +            'id': '840376_BQRC', +            'ext': 'mp4', +            'title': 'Tenacious Design and The Interface of \'Destiny\'', +        }, +    } + +    def _parse_mp4(self, metadata): +        video_formats = [] +        video_root = None + +        mp4_video = xpath_text(metadata, './mp4video', default=None) +        if mp4_video is not None: +            mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video) +            video_root = mobj.group('root') +        if video_root is None: +            http_host = xpath_text(metadata, 'httpHost', default=None) +            if http_host: +                video_root = 'http://%s/' % http_host +        if video_root is None: +            # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js +            # Works for GPUTechConf, too +            video_root = 'http://s3-2u.digitallyspeaking.com/' + +        formats = metadata.findall('./MBRVideos/MBRVideo') +        if not formats: +            return None +        for a_format in formats: +            stream_name = xpath_text(a_format, 'streamName', fatal=True) +            video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path') +            url = video_root + video_path +            vbr = xpath_text(a_format, 'bitrate') +            video_formats.append({ +                'url': url, +                'vbr': int_or_none(vbr), +            }) +        return video_formats + +    def _parse_flv(self, metadata): +        formats = [] +        akamai_url = xpath_text(metadata, './akamaiHost', fatal=True) +        audios = metadata.find('./audios') +        if audios is not None: +            for audio in audios: +                formats.append({ +                    'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, +                    'play_path': remove_end(audio.get('url'), '.flv'), +                    'ext': 'flv', +                    'vcodec': 'none', +                    'format_id': audio.get('code'), +                }) +        slide_video_path = xpath_text(metadata, './slideVideo', fatal=True) +        formats.append({ +            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, +            'play_path': remove_end(slide_video_path, '.flv'), +            'ext': 'flv', +            'format_note': 'slide deck video', +            'quality': -2, +            'preference': -2, +            'format_id': 'slides', +        }) +        speaker_video_path = xpath_text(metadata, './speakerVideo', fatal=True) +        formats.append({ +            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, +            'play_path': remove_end(speaker_video_path, '.flv'), +            'ext': 'flv', +            'format_note': 'speaker video', +            'quality': -1, +            'preference': -1, +            'format_id': 'speaker', +        }) +        return formats + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        xml_description = self._download_xml(url, video_id) +        metadata = xpath_element(xml_description, 'metadata') + +        video_formats = self._parse_mp4(metadata) +        if video_formats is None: +            video_formats = self._parse_flv(metadata) + +        return { +            'id': video_id, +            'formats': video_formats, +            'title': xpath_text(metadata, 'title', fatal=True), +            'duration': parse_duration(xpath_text(metadata, 'endTime')), +            'creator': xpath_text(metadata, 'speaker'), +        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 1f3172119..84bdf5e97 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -197,6 +197,7 @@ from .dump import DumpIE  from .dumpert import DumpertIE  from .defense import DefenseGouvFrIE  from .discovery import DiscoveryIE +from .dispeak import DigitalSpeakingIE  from .dropbox import DropboxIE  from .dw import (      DWIE, diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dl/extractor/gdcvault.py index 3ebcaf733..01e1ceec8 100644 --- a/youtube_dl/extractor/gdcvault.py +++ b/youtube_dl/extractor/gdcvault.py @@ -4,7 +4,6 @@ import re  from .common import InfoExtractor  from ..utils import ( -    remove_end,      HEADRequest,      sanitized_Request,      urlencode_postdata, @@ -64,66 +63,6 @@ class GDCVaultIE(InfoExtractor):          },      ] -    def _parse_mp4(self, xml_description): -        video_formats = [] -        video_root = None - -        mp4_video = xml_description.find('./metadata/mp4video') -        if mp4_video is not None: -            mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video.text) -            video_root = mobj.group('root') -        if video_root is None: -            # Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js -            video_root = 'http://s3-2u.digitallyspeaking.com/' - -        formats = xml_description.findall('./metadata/MBRVideos/MBRVideo') -        if not formats: -            return None -        for format in formats: -            mobj = re.match(r'mp4\:(?P<path>.*)', format.find('streamName').text) -            url = video_root + mobj.group('path') -            vbr = format.find('bitrate').text -            video_formats.append({ -                'url': url, -                'vbr': int(vbr), -            }) -        return video_formats - -    def _parse_flv(self, xml_description): -        formats = [] -        akamai_url = xml_description.find('./metadata/akamaiHost').text -        audios = xml_description.find('./metadata/audios') -        if audios is not None: -            for audio in audios: -                formats.append({ -                    'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, -                    'play_path': remove_end(audio.get('url'), '.flv'), -                    'ext': 'flv', -                    'vcodec': 'none', -                    'format_id': audio.get('code'), -                }) -        slide_video_path = xml_description.find('./metadata/slideVideo').text -        formats.append({ -            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, -            'play_path': remove_end(slide_video_path, '.flv'), -            'ext': 'flv', -            'format_note': 'slide deck video', -            'quality': -2, -            'preference': -2, -            'format_id': 'slides', -        }) -        speaker_video_path = xml_description.find('./metadata/speakerVideo').text -        formats.append({ -            'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, -            'play_path': remove_end(speaker_video_path, '.flv'), -            'ext': 'flv', -            'format_note': 'speaker video', -            'quality': -1, -            'preference': -1, -            'format_id': 'speaker', -        }) -        return formats -      def _login(self, webpage_url, display_id):          (username, password) = self._get_login_info()          if username is None or password is None: @@ -199,17 +138,10 @@ class GDCVaultIE(InfoExtractor):                  r'<iframe src=".*?\?xmlURL=xml/(?P<xml_file>.+?\.xml).*?".*?</iframe>',                  start_page, 'xml filename') -        xml_description = self._download_xml( -            '%s/xml/%s' % (xml_root, xml_name), display_id) - -        video_title = xml_description.find('./metadata/title').text -        video_formats = self._parse_mp4(xml_description) -        if video_formats is None: -            video_formats = self._parse_flv(xml_description) -          return { +            '_type': 'url_transparent',              'id': video_id,              'display_id': display_id, -            'title': video_title, -            'formats': video_formats, +            'url': '%s/xml/%s' % (xml_root, xml_name), +            'ie': 'DigitalSpeaking',          } diff --git a/youtube_dl/extractor/gputechconf.py b/youtube_dl/extractor/gputechconf.py index 145b55bf3..359b348e4 100644 --- a/youtube_dl/extractor/gputechconf.py +++ b/youtube_dl/extractor/gputechconf.py @@ -2,12 +2,6 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import ( -    xpath_element, -    xpath_text, -    int_or_none, -    parse_duration, -)  class GPUTechConfIE(InfoExtractor): @@ -27,29 +21,15 @@ class GPUTechConfIE(InfoExtractor):          video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) -        root_path = self._search_regex(r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', 'http://evt.dispeak.com/nvidia/events/gtc15/') -        xml_file_id = self._search_regex(r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id') - -        doc = self._download_xml('%sxml/%s.xml' % (root_path, xml_file_id), video_id) - -        metadata = xpath_element(doc, 'metadata') -        http_host = xpath_text(metadata, 'httpHost', 'http host', True) -        mbr_videos = xpath_element(metadata, 'MBRVideos') - -        formats = [] -        for mbr_video in mbr_videos.findall('MBRVideo'): -            stream_name = xpath_text(mbr_video, 'streamName') -            if stream_name: -                formats.append({ -                    'url': 'http://%s/%s' % (http_host, stream_name.replace('mp4:', '')), -                    'tbr': int_or_none(xpath_text(mbr_video, 'bitrate')), -                }) -        self._sort_formats(formats) +        root_path = self._search_regex( +            r'var\s+rootPath\s*=\s*"([^"]+)', webpage, 'root path', +            default='http://evt.dispeak.com/nvidia/events/gtc15/') +        xml_file_id = self._search_regex( +            r'var\s+xmlFileId\s*=\s*"([^"]+)', webpage, 'xml file id')          return { +            '_type': 'url_transparent',              'id': video_id, -            'title': xpath_text(metadata, 'title'), -            'duration': parse_duration(xpath_text(metadata, 'endTime')), -            'creator': xpath_text(metadata, 'speaker'), -            'formats': formats, +            'url': '%sxml/%s.xml' % (root_path, xml_file_id), +            'ie': 'DigitalSpeaking',          } | 
