aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-07-19 23:59:50 +0600
committerSergey M․ <dstftw@gmail.com>2015-07-19 23:59:50 +0600
commite37c92ec6d137cd2d09c5f13e9a261c4fae552dd (patch)
treee4f5a3c07c8d7a103be965203407531894c09ad1
parent1d18e26ecad636cb88ee9710807e9583eec98cd4 (diff)
downloadyoutube-dl-e37c92ec6d137cd2d09c5f13e9a261c4fae552dd.tar.xz
[ard] Extract all formats
-rw-r--r--youtube_dl/extractor/ard.py125
1 files changed, 91 insertions, 34 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 6a35ea463..55f940d57 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -35,6 +35,87 @@ class ARDMediathekIE(InfoExtractor):
'skip': 'Blocked outside of Germany',
}]
+ def _extract_media_info(self, media_info_url, webpage, video_id):
+ media_info = self._download_json(
+ media_info_url, video_id, 'Downloading media JSON')
+
+ formats = self._extract_formats(media_info, video_id)
+
+ if not formats:
+ if '"fsk"' in webpage:
+ raise ExtractorError(
+ 'This video is only available after 20:00', expected=True)
+ elif media_info.get('_geoblocked'):
+ raise ExtractorError('This video is not available due to geo restriction', expected=True)
+
+ self._sort_formats(formats)
+
+ duration = int_or_none(media_info.get('_duration'))
+ thumbnail = media_info.get('_previewImage')
+
+ subtitles = {}
+ subtitle_url = media_info.get('_subtitleUrl')
+ if subtitle_url:
+ subtitles['de'] = [{
+ 'ext': 'srt',
+ 'url': subtitle_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_formats(self, media_info, video_id):
+ type_ = media_info.get('_type')
+ media_array = media_info.get('_mediaArray', [])
+ formats = []
+ for num, media in enumerate(media_array):
+ for stream in media.get('_mediaStreamArray', []):
+ stream_urls = stream.get('_stream')
+ if not stream_urls:
+ continue
+ if not isinstance(stream_urls, list):
+ stream_urls = [stream_urls]
+ quality = stream.get('_quality')
+ server = stream.get('_server')
+ for stream_url in stream_urls:
+ ext = determine_ext(stream_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124',
+ video_id, preference=-1, f4m_id='hds'))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', preference=1, m3u8_id='hls'))
+ else:
+ if server and server.startswith('rtmp'):
+ f = {
+ 'url': server,
+ 'play_path': stream_url,
+ 'format_id': 'a%s-rtmp-%s' % (num, quality),
+ }
+ elif stream_url.startswith('http'):
+ f = {
+ 'url': stream_url,
+ 'format_id': 'a%s-%s-%s' % (num, ext, quality)
+ }
+ else:
+ continue
+ m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+ formats.append(f)
+ return formats
+
def _real_extract(self, url):
# determine video id from url
m = re.match(self._VALID_URL, url)
@@ -92,46 +173,22 @@ class ARDMediathekIE(InfoExtractor):
'format_id': fid,
'url': furl,
})
+ self._sort_formats(formats)
+ info = {
+ 'formats': formats,
+ }
else: # request JSON file
- media_info = self._download_json(
- 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
- # The second element of the _mediaArray contains the standard http urls
- streams = media_info['_mediaArray'][1]['_mediaStreamArray']
- if not streams:
- if '"fsk"' in webpage:
- raise ExtractorError('This video is only available after 20:00')
-
- formats = []
- for s in streams:
- if type(s['_stream']) == list:
- for index, url in enumerate(s['_stream'][::-1]):
- quality = s['_quality'] + index
- formats.append({
- 'quality': quality,
- 'url': url,
- 'format_id': '%s-%s' % (determine_ext(url), quality)
- })
- continue
-
- format = {
- 'quality': s['_quality'],
- 'url': s['_stream'],
- }
-
- format['format_id'] = '%s-%s' % (
- determine_ext(format['url']), format['quality'])
+ info = self._extract_media_info(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id)
- formats.append(format)
-
- self._sort_formats(formats)
-
- return {
+ info.update({
'id': video_id,
'title': title,
'description': description,
- 'formats': formats,
'thumbnail': thumbnail,
- }
+ })
+
+ return info
class ARDIE(InfoExtractor):