aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2014-06-03 21:56:49 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2014-06-03 21:56:49 +0200
commit6a3fa81ffba194c9fdcb30b61f72935d63f30a53 (patch)
tree4f77ccf313e421b2a9d20a0cf948f18d074ede5d
parent0d697950149871612a4132a68893884fbe88a513 (diff)
downloadyoutube-dl-6a3fa81ffba194c9fdcb30b61f72935d63f30a53.tar.xz
[ard] Fix format extraction (fixes #3006 and #3032)
-rw-r--r--youtube_dl/extractor/ard.py29
1 files changed, 11 insertions, 18 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index a87b32b22..c6d22c029 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -39,16 +39,18 @@ class ARDIE(InfoExtractor):
title = self._html_search_regex(
[r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+ r'<meta name="dcterms.title" content="(.*?)"/>',
r'<h4 class="headline">(.*?)</h4>'],
webpage, 'title')
description = self._html_search_meta(
'dcterms.abstract', webpage, 'description')
thumbnail = self._og_search_thumbnail(webpage)
- streams = [
- mo.groupdict()
- for mo in re.finditer(
- r'mediaCollection\.addMediaStream\((?P<media_type>\d+), (?P<quality>\d+), "(?P<rtmp_url>[^"]*)", "(?P<video_url>[^"]*)", "[^"]*"\)', webpage)]
+
+ media_info = self._download_json(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id, video_id)
+ # The second element of the _mediaArray contains the standard http urls
+ streams = media_info['_mediaArray'][1]['_mediaStreamArray']
if not streams:
if '"fsk"' in webpage:
raise ExtractorError('This video is only available after 20:00')
@@ -56,21 +58,12 @@ class ARDIE(InfoExtractor):
formats = []
for s in streams:
format = {
- 'quality': int(s['quality']),
+ 'quality': s['_quality'],
+ 'url': s['_stream'],
}
- if s.get('rtmp_url'):
- format['protocol'] = 'rtmp'
- format['url'] = s['rtmp_url']
- format['playpath'] = s['video_url']
- else:
- format['url'] = s['video_url']
-
- quality_name = self._search_regex(
- r'[,.]([a-zA-Z0-9_-]+),?\.mp4', format['url'],
- 'quality name', default='NA')
- format['format_id'] = '%s-%s-%s-%s' % (
- determine_ext(format['url']), quality_name, s['media_type'],
- s['quality'])
+
+ format['format_id'] = '%s-%s' % (
+ determine_ext(format['url']), format['quality'])
formats.append(format)