diff options
| -rw-r--r-- | youtube_dl/extractor/mediaset.py | 155 | 
1 files changed, 74 insertions, 81 deletions
diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py index 9f2b60dcc..57f97409d 100644 --- a/youtube_dl/extractor/mediaset.py +++ b/youtube_dl/extractor/mediaset.py @@ -3,75 +3,75 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor -from ..compat import compat_str +from .theplatform import ThePlatformBaseIE  from ..utils import ( -    determine_ext, -    parse_duration, -    try_get, -    unified_strdate, +    ExtractorError, +    int_or_none, +    update_url_query,  ) -class MediasetIE(InfoExtractor): +class MediasetIE(ThePlatformBaseIE): +    _TP_TLD = 'eu'      _VALID_URL = r'''(?x)                      (?:                          mediaset:|                          https?:// -                            (?:www\.)?video\.mediaset\.it/ +                            (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/                              (?:                                  (?:video|on-demand)/(?:[^/]+/)+[^/]+_| -                                player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid= +                                player/index\.html\?.*?\bprogramGuid=                              ) -                    )(?P<id>[0-9]+) +                    )(?P<id>[0-9A-Z]{16})                      '''      _TESTS = [{          # full episode -        'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html', +        'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',          'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',          'info_dict': { -            'id': '661824', +            'id': 'FAFU000000661824',              'ext': 'mp4',              'title': 'Quarta puntata', -            'description': 'md5:7183696d6df570e3412a5ef74b27c5e2', +            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',              'thumbnail': r're:^https?://.*\.jpg$', -            'duration': 1414, -            'creator': 'mediaset', +            'duration': 1414.26,              'upload_date': '20161107',              'series': 'Hello Goodbye', -            'categories': ['reality'], +            'timestamp': 1478532900, +            'uploader': 'Rete 4', +            'uploader_id': 'R4',          }, -        'expected_warnings': ['is not a supported codec'],      }, { -        'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html', -        'md5': '1276f966ac423d16ba255ce867de073e', +        'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', +        'md5': '288532f0ad18307705b01e581304cd7b',          'info_dict': { -            'id': '846685', +            'id': 'F309013801000501',              'ext': 'mp4',              'title': 'Puntata del 25 maggio', -            'description': 'md5:ee2e456e3eb1dba5e814596655bb5296', +            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',              'thumbnail': r're:^https?://.*\.jpg$', -            'duration': 6565, -            'creator': 'mediaset', -            'upload_date': '20180525', +            'duration': 6565.007, +            'upload_date': '20180526',              'series': 'Matrix', -            'categories': ['infotainment'], +            'timestamp': 1527326245, +            'uploader': 'Canale 5', +            'uploader_id': 'C5',          },          'expected_warnings': ['HTTP Error 403: Forbidden'],      }, {          # clip -        'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html', +        'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',          'only_matching': True,      }, {          # iframe simple -        'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true', +        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',          'only_matching': True,      }, {          # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) -        'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true', +        'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',          'only_matching': True,      }, { -        'url': 'mediaset:661824', +        'url': 'mediaset:FAFU000000665924',          'only_matching': True,      }] @@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor):                  webpage)]      def _real_extract(self, url): -        video_id = self._match_id(url) - -        video = self._download_json( -            'https://www.video.mediaset.it/html/metainfo.sjson', -            video_id, 'Downloading media info', query={ -                'id': video_id -            })['video'] - -        title = video['title'] -        media_id = video.get('guid') or video_id - -        video_list = self._download_json( -            'http://cdnsel01.mediaset.net/GetCdn2018.aspx', -            video_id, 'Downloading video CDN JSON', query={ -                'streamid': media_id, -                'format': 'json', -            })['videoList'] +        guid = self._match_id(url) +        tp_path = 'PR1GhC/media/guid/2702976343/' + guid +        info = self._extract_theplatform_metadata(tp_path, guid)          formats = [] -        for format_url in video_list: -            ext = determine_ext(format_url) -            if ext == 'm3u8': -                formats.extend(self._extract_m3u8_formats( -                    format_url, video_id, 'mp4', entry_protocol='m3u8_native', -                    m3u8_id='hls', fatal=False)) -            elif ext == 'mpd': -                formats.extend(self._extract_mpd_formats( -                    format_url, video_id, mpd_id='dash', fatal=False)) -            elif ext == 'ism' or '.ism' in format_url: -                formats.extend(self._extract_ism_formats( -                    format_url, video_id, ism_id='mss', fatal=False)) -            else: -                formats.append({ -                    'url': format_url, -                    'format_id': determine_ext(format_url), -                }) +        subtitles = {} +        first_e = None +        for asset_type in ('SD', 'HD'): +            for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): +                try: +                    tp_formats, tp_subtitles = self._extract_theplatform_smil( +                        update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { +                            'mbr': 'true', +                            'formats': f, +                            'assetTypes': asset_type, +                        }), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) +                except ExtractorError as e: +                    if not first_e: +                        first_e = e +                    break +                for tp_f in tp_formats: +                    tp_f['quality'] = 1 if asset_type == 'HD' else 0 +                formats.extend(tp_formats) +                subtitles = self._merge_subtitles(subtitles, tp_subtitles) +        if first_e and not formats: +            raise first_e          self._sort_formats(formats) -        creator = try_get( -            video, lambda x: x['brand-info']['publisher'], compat_str) -        category = try_get( -            video, lambda x: x['brand-info']['category'], compat_str) -        categories = [category] if category else None +        fields = [] +        for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))): +            fields.extend(templ % repl for repl in repls) +        feed_data = self._download_json( +            'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid, +            guid, fatal=False, query={'fields': ','.join(fields)}) +        if feed_data: +            publish_info = feed_data.get('mediasetprogram$publishInfo') or {} +            info.update({ +                'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')), +                'season_number': int_or_none(feed_data.get('tvSeasonNumber')), +                'series': feed_data.get('mediasetprogram$brandTitle'), +                'uploader': publish_info.get('description'), +                'uploader_id': publish_info.get('channel'), +                'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')), +            }) -        return { -            'id': video_id, -            'title': title, -            'description': video.get('short-description'), -            'thumbnail': video.get('thumbnail'), -            'duration': parse_duration(video.get('duration')), -            'creator': creator, -            'upload_date': unified_strdate(video.get('production-date')), -            'webpage_url': video.get('url'), -            'series': video.get('brand-value'), -            'season': video.get('season'), -            'categories': categories, +        info.update({ +            'id': guid,              'formats': formats, -        } +            'subtitles': subtitles, +        }) +        return info  | 
