diff options
| -rw-r--r-- | youtube_dl/extractor/rtbf.py | 62 | 
1 files changed, 45 insertions, 17 deletions
| diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dl/extractor/rtbf.py index e42b319a3..28cc5522d 100644 --- a/youtube_dl/extractor/rtbf.py +++ b/youtube_dl/extractor/rtbf.py @@ -4,12 +4,18 @@ from __future__ import unicode_literals  from .common import InfoExtractor  from ..utils import (      int_or_none, -    unescapeHTML, +    ExtractorError,  )  class RTBFIE(InfoExtractor): -    _VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)' +    _VALID_URL = r'''(?x) +        https?://(?:www\.)?rtbf\.be/ +        (?: +            video/[^?]+\?.*\bid=| +            ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| +            auvio/[^/]+\?.*id= +        )(?P<id>\d+)'''      _TESTS = [{          'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',          'md5': '799f334ddf2c0a582ba80c44655be570', @@ -17,7 +23,11 @@ class RTBFIE(InfoExtractor):              'id': '1921274',              'ext': 'mp4',              'title': 'Les Diables au coeur (épisode 2)', +            'description': 'Football - Diables Rouges',              'duration': 3099, +            'upload_date': '20140425', +            'timestamp': 1398456336, +            'uploader': 'rtbfsport',          }      }, {          # geo restricted @@ -26,45 +36,63 @@ class RTBFIE(InfoExtractor):      }, {          'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',          'only_matching': True, +    }, { +        'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', +        'only_matching': True,      }] - +    _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' +    _PROVIDERS = { +        'YOUTUBE': 'Youtube', +        'DAILYMOTION': 'Dailymotion', +        'VIMEO': 'Vimeo', +    }      _QUALITIES = [ -        ('mobile', 'mobile'), -        ('web', 'SD'), -        ('url', 'MD'), +        ('mobile', 'SD'), +        ('web', 'MD'),          ('high', 'HD'),      ]      def _real_extract(self, url):          video_id = self._match_id(url) +        data = self._download_json( +            'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id) -        webpage = self._download_webpage( -            'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id) +        error = data.get('error') +        if error: +            raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) -        data = self._parse_json( -            unescapeHTML(self._search_regex( -                r'data-media="([^"]+)"', webpage, 'data video')), -            video_id) +        data = data['data'] + +        provider = data.get('provider') +        if provider in self._PROVIDERS: +            return self.url_result(data['url'], self._PROVIDERS[provider]) -        if data.get('provider').lower() == 'youtube': -            video_url = data.get('downloadUrl') or data.get('url') -            return self.url_result(video_url, 'Youtube')          formats = []          for key, format_id in self._QUALITIES: -            format_url = data['sources'].get(key) +            format_url = data.get(key + 'Url')              if format_url:                  formats.append({                      'format_id': format_id,                      'url': format_url,                  }) +        thumbnails = [] +        for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items(): +            if thumbnail_id != 'default': +                thumbnails.append({ +                    'url': self._IMAGE_HOST + thumbnail_url, +                    'id': thumbnail_id, +                }) +          return {              'id': video_id,              'formats': formats,              'title': data['title'],              'description': data.get('description') or data.get('subtitle'), -            'thumbnail': data.get('thumbnail'), +            'thumbnails': thumbnails,              'duration': data.get('duration') or data.get('realDuration'),              'timestamp': int_or_none(data.get('created')),              'view_count': int_or_none(data.get('viewCount')), +            'uploader': data.get('channel'), +            'tags': data.get('tags'),          } | 
