1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
int_or_none,
parse_duration,
remove_end,
xpath_element,
xpath_text,
)
class DigitallySpeakingIE(InfoExtractor):
_VALID_URL = r'https?://(?:s?evt\.dispeak|events\.digitallyspeaking)\.com/(?:[^/]+/)+xml/(?P<id>[^.]+)\.xml'
_TESTS = [{
# From http://gdcvault.com/play/1023460/Tenacious-Design-and-The-Interface
'url': 'http://evt.dispeak.com/ubm/gdc/sf16/xml/840376_BQRC.xml',
'md5': 'a8efb6c31ed06ca8739294960b2dbabd',
'info_dict': {
'id': '840376_BQRC',
'ext': 'mp4',
'title': 'Tenacious Design and The Interface of \'Destiny\'',
},
}, {
# From http://www.gdcvault.com/play/1014631/Classic-Game-Postmortem-PAC
'url': 'http://events.digitallyspeaking.com/gdc/sf11/xml/12396_1299111843500GMPX.xml',
'only_matching': True,
}, {
# From http://www.gdcvault.com/play/1013700/Advanced-Material
'url': 'http://sevt.dispeak.com/ubm/gdc/eur10/xml/11256_1282118587281VNIT.xml',
'only_matching': True,
}, {
# From https://gdcvault.com/play/1016624, empty speakerVideo
'url': 'https://sevt.dispeak.com/ubm/gdc/online12/xml/201210-822101_1349794556671DDDD.xml',
'info_dict': {
'id': '201210-822101_1349794556671DDDD',
'ext': 'flv',
'title': 'Pre-launch - Preparing to Take the Plunge',
},
}, {
# From http://www.gdcvault.com/play/1014846/Conference-Keynote-Shigeru, empty slideVideo
'url': 'http://events.digitallyspeaking.com/gdc/project25/xml/p25-miyamoto1999_1282467389849HSVB.xml',
'only_matching': True,
}]
def _parse_mp4(self, metadata):
video_formats = []
video_root = None
mp4_video = xpath_text(metadata, './mp4video', default=None)
if mp4_video is not None:
mobj = re.match(r'(?P<root>https?://.*?/).*', mp4_video)
video_root = mobj.group('root')
if video_root is None:
http_host = xpath_text(metadata, 'httpHost', default=None)
if http_host:
video_root = 'http://%s/' % http_host
if video_root is None:
# Hard-coded in http://evt.dispeak.com/ubm/gdc/sf16/custom/player2.js
# Works for GPUTechConf, too
video_root = 'http://s3-2u.digitallyspeaking.com/'
formats = metadata.findall('./MBRVideos/MBRVideo')
if not formats:
return None
for a_format in formats:
stream_name = xpath_text(a_format, 'streamName', fatal=True)
video_path = re.match(r'mp4\:(?P<path>.*)', stream_name).group('path')
url = video_root + video_path
bitrate = xpath_text(a_format, 'bitrate')
tbr = int_or_none(bitrate)
vbr = int_or_none(self._search_regex(
r'-(\d+)\.mp4', video_path, 'vbr', default=None))
abr = tbr - vbr if tbr and vbr else None
video_formats.append({
'format_id': bitrate,
'url': url,
'tbr': tbr,
'vbr': vbr,
'abr': abr,
})
return video_formats
def _parse_flv(self, metadata):
formats = []
akamai_url = xpath_text(metadata, './akamaiHost', fatal=True)
audios = metadata.findall('./audios/audio')
for audio in audios:
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(audio.get('url'), '.flv'),
'ext': 'flv',
'vcodec': 'none',
'format_id': audio.get('code'),
})
for video_key, format_id, preference in (
('slide', 'slides', -2), ('speaker', 'speaker', -1)):
video_path = xpath_text(metadata, './%sVideo' % video_key)
if not video_path:
continue
formats.append({
'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url,
'play_path': remove_end(video_path, '.flv'),
'ext': 'flv',
'format_note': '%s video' % video_key,
'quality': preference,
'preference': preference,
'format_id': format_id,
})
return formats
def _real_extract(self, url):
video_id = self._match_id(url)
xml_description = self._download_xml(url, video_id)
metadata = xpath_element(xml_description, 'metadata')
video_formats = self._parse_mp4(metadata)
if video_formats is None:
video_formats = self._parse_flv(metadata)
return {
'id': video_id,
'formats': video_formats,
'title': xpath_text(metadata, 'title', fatal=True),
'duration': parse_duration(xpath_text(metadata, 'endTime')),
'creator': xpath_text(metadata, 'speaker'),
}
|