diff options
Diffstat (limited to 'youtube_dl/extractor/telegraaf.py')
| -rw-r--r-- | youtube_dl/extractor/telegraaf.py | 58 | 
1 files changed, 51 insertions, 7 deletions
| diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py index 6f8333cfc..9092e9b85 100644 --- a/youtube_dl/extractor/telegraaf.py +++ b/youtube_dl/extractor/telegraaf.py @@ -2,14 +2,16 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import remove_end +from ..utils import ( +    determine_ext, +    remove_end, +)  class TelegraafIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'      _TEST = {          'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html', -        'md5': '83245a9779bcc4a24454bfd53c65b6dc',          'info_dict': {              'id': '24353229',              'ext': 'mp4', @@ -18,18 +20,60 @@ class TelegraafIE(InfoExtractor):              'thumbnail': 're:^https?://.*\.jpg$',              'duration': 33,          }, +        'params': { +            # m3u8 download +            'skip_download': True, +        },      }      def _real_extract(self, url): -        playlist_id = self._match_id(url) +        video_id = self._match_id(url) -        webpage = self._download_webpage(url, playlist_id) +        webpage = self._download_webpage(url, video_id) +        player_url = self._html_search_regex( +            r'<iframe[^>]+src="([^"]+")', webpage, 'player URL') +        player_page = self._download_webpage( +            player_url, video_id, note='Download player webpage')          playlist_url = self._search_regex( -            r"iframe\.loadPlayer\('([^']+)'", webpage, 'player') +            r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL') +        playlist_data = self._download_json(playlist_url, video_id) + +        item = playlist_data['items'][0] +        formats = [] +        locations = item['locations'] +        for location in locations.get('adaptive', []): +            manifest_url = location['src'] +            ext = determine_ext(manifest_url) +            if ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    manifest_url, video_id, ext='mp4', m3u8_id='hls')) +            elif ext == 'mpd': +                # TODO: Current DASH formats are broken - $Time$ pattern in +                # <SegmentTemplate> not implemented yet +                continue +            else: +                self.report_warning('Unknown adaptive format %s' % ext) +        for location in locations.get('progressive', []): +            formats.append({ +                'url': location['sources'][0]['src'], +                'width': location.get('width'), +                'height': location.get('height'), +                'format_id': 'http-%s' % location['label'], +            }) + +        self._sort_formats(formats) -        entries = self._extract_xspf_playlist(playlist_url, playlist_id)          title = remove_end(self._og_search_title(webpage), ' - VIDEO')          description = self._og_search_description(webpage) +        duration = item.get('duration') +        thumbnail = item.get('poster') -        return self.playlist_result(entries, playlist_id, title, description) +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'formats': formats, +            'duration': duration, +            'thumbnail': thumbnail, +        } | 
