diff options
| -rw-r--r-- | youtube_dl/extractor/mediaset.py | 60 | 
1 files changed, 41 insertions, 19 deletions
diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 9760eafd5..76a2ae125 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -10,6 +10,7 @@ from ..utils import (      parse_duration,      try_get,      unified_strdate, +    ExtractorError  ) @@ -43,6 +44,22 @@ class MediasetIE(InfoExtractor):          },          'expected_warnings': ['is not a supported codec'],      }, { +        'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html', +        'md5': '1276f966ac423d16ba255ce867de073e', +        'info_dict': { +            'id': '846685', +            'ext': 'mp4', +            'title': 'Puntata del 25 maggio', +            'description': 'md5:ee2e456e3eb1dba5e814596655bb5296', +            'thumbnail': r're:^https?://.*\.jpg$', +            'duration': 6565, +            'creator': 'mediaset', +            'upload_date': '20180525', +            'series': 'Matrix', +            'categories': ['infotainment'], +        }, +        'expected_warnings': ['is not a supported codec'], +    }, {          # clip          'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html',          'only_matching': True, @@ -70,18 +87,29 @@ class MediasetIE(InfoExtractor):      def _real_extract(self, url):          video_id = self._match_id(url) +        media_info = self._download_json( +            'https://www.video.mediaset.it/html/metainfo.sjson', +            video_id, 'Downloading media info', query={ +                'id': video_id +            })['video'] + +        media_id = try_get(media_info, lambda x: x['guid']) or video_id +          video_list = self._download_json( -            'http://cdnsel01.mediaset.net/GetCdn.aspx', +            'http://cdnsel01.mediaset.net/GetCdn2018.aspx',              video_id, 'Downloading video CDN JSON', query={ -                'streamid': video_id, +                'streamid': media_id,                  'format': 'json',              })['videoList']          formats = []          for format_url in video_list:              if '.ism' in format_url: -                formats.extend(self._extract_ism_formats( -                    format_url, video_id, ism_id='mss', fatal=False)) +                try: +                    formats.extend(self._extract_ism_formats( +                        format_url, video_id, ism_id='mss', fatal=False)) +                except ExtractorError: +                    pass              else:                  formats.append({                      'url': format_url, @@ -89,30 +117,24 @@ class MediasetIE(InfoExtractor):                  })          self._sort_formats(formats) -        mediainfo = self._download_json( -            'http://plr.video.mediaset.it/html/metainfo.sjson', -            video_id, 'Downloading video info JSON', query={ -                'id': video_id, -            })['video'] - -        title = mediainfo['title'] +        title = media_info['title']          creator = try_get( -            mediainfo, lambda x: x['brand-info']['publisher'], compat_str) +            media_info, lambda x: x['brand-info']['publisher'], compat_str)          category = try_get( -            mediainfo, lambda x: x['brand-info']['category'], compat_str) +            media_info, lambda x: x['brand-info']['category'], compat_str)          categories = [category] if category else None          return {              'id': video_id,              'title': title, -            'description': mediainfo.get('short-description'), -            'thumbnail': mediainfo.get('thumbnail'), -            'duration': parse_duration(mediainfo.get('duration')), +            'description': media_info.get('short-description'), +            'thumbnail': media_info.get('thumbnail'), +            'duration': parse_duration(media_info.get('duration')),              'creator': creator, -            'upload_date': unified_strdate(mediainfo.get('production-date')), -            'webpage_url': mediainfo.get('url'), -            'series': mediainfo.get('brand-value'), +            'upload_date': unified_strdate(media_info.get('production-date')), +            'webpage_url': media_info.get('url'), +            'series': media_info.get('brand-value'),              'categories': categories,              'formats': formats,          }  | 
