diff options
Diffstat (limited to 'youtube_dl/extractor/ard.py')
| -rw-r--r-- | youtube_dl/extractor/ard.py | 125 | 
1 files changed, 91 insertions, 34 deletions
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 6a35ea463..55f940d57 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -35,6 +35,87 @@ class ARDMediathekIE(InfoExtractor):          'skip': 'Blocked outside of Germany',      }] +    def _extract_media_info(self, media_info_url, webpage, video_id): +        media_info = self._download_json( +            media_info_url, video_id, 'Downloading media JSON') + +        formats = self._extract_formats(media_info, video_id) + +        if not formats: +            if '"fsk"' in webpage: +                raise ExtractorError( +                    'This video is only available after 20:00', expected=True) +            elif media_info.get('_geoblocked'): +                raise ExtractorError('This video is not available due to geo restriction', expected=True) + +        self._sort_formats(formats) + +        duration = int_or_none(media_info.get('_duration')) +        thumbnail = media_info.get('_previewImage') + +        subtitles = {} +        subtitle_url = media_info.get('_subtitleUrl') +        if subtitle_url: +            subtitles['de'] = [{ +                'ext': 'srt', +                'url': subtitle_url, +            }] + +        return { +            'id': video_id, +            'duration': duration, +            'thumbnail': thumbnail, +            'formats': formats, +            'subtitles': subtitles, +        } + +    def _extract_formats(self, media_info, video_id): +        type_ = media_info.get('_type') +        media_array = media_info.get('_mediaArray', []) +        formats = [] +        for num, media in enumerate(media_array): +            for stream in media.get('_mediaStreamArray', []): +                stream_urls = stream.get('_stream') +                if not stream_urls: +                    continue +                if not isinstance(stream_urls, list): +                    stream_urls = [stream_urls] +                quality = stream.get('_quality') +                server = stream.get('_server') +                for stream_url in stream_urls: +                    ext = determine_ext(stream_url) +                    if ext == 'f4m': +                        formats.extend(self._extract_f4m_formats( +                            stream_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', +                            video_id, preference=-1, f4m_id='hds')) +                    elif ext == 'm3u8': +                        formats.extend(self._extract_m3u8_formats( +                            stream_url, video_id, 'mp4', preference=1, m3u8_id='hls')) +                    else: +                        if server and server.startswith('rtmp'): +                            f = { +                                'url': server, +                                'play_path': stream_url, +                                'format_id': 'a%s-rtmp-%s' % (num, quality), +                            } +                        elif stream_url.startswith('http'): +                            f = { +                                'url': stream_url, +                                'format_id': 'a%s-%s-%s' % (num, ext, quality) +                            } +                        else: +                            continue +                        m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url) +                        if m: +                            f.update({ +                                'width': int(m.group('width')), +                                'height': int(m.group('height')), +                            }) +                        if type_ == 'audio': +                            f['vcodec'] = 'none' +                        formats.append(f) +        return formats +      def _real_extract(self, url):          # determine video id from url          m = re.match(self._VALID_URL, url) @@ -92,46 +173,22 @@ class ARDMediathekIE(InfoExtractor):                      'format_id': fid,                      'url': furl,                  }) +            self._sort_formats(formats) +            info = { +                'formats': formats, +            }          else:  # request JSON file -            media_info = self._download_json( -                'http://www.ardmediathek.de/play/media/%s' % video_id, video_id) -            # The second element of the _mediaArray contains the standard http urls -            streams = media_info['_mediaArray'][1]['_mediaStreamArray'] -            if not streams: -                if '"fsk"' in webpage: -                    raise ExtractorError('This video is only available after 20:00') - -            formats = [] -            for s in streams: -                if type(s['_stream']) == list: -                    for index, url in enumerate(s['_stream'][::-1]): -                        quality = s['_quality'] + index -                        formats.append({ -                            'quality': quality, -                            'url': url, -                            'format_id': '%s-%s' % (determine_ext(url), quality) -                        }) -                    continue - -                format = { -                    'quality': s['_quality'], -                    'url': s['_stream'], -                } - -                format['format_id'] = '%s-%s' % ( -                    determine_ext(format['url']), format['quality']) +            info = self._extract_media_info( +                'http://www.ardmediathek.de/play/media/%s' % video_id, webpage, video_id) -                formats.append(format) - -        self._sort_formats(formats) - -        return { +        info.update({              'id': video_id,              'title': title,              'description': description, -            'formats': formats,              'thumbnail': thumbnail, -        } +        }) + +        return info  class ARDIE(InfoExtractor):  | 
