diff options
Diffstat (limited to 'youtube_dl/extractor/wat.py')
| -rw-r--r-- | youtube_dl/extractor/wat.py | 152 | 
1 files changed, 46 insertions, 106 deletions
diff --git a/youtube_dl/extractor/wat.py b/youtube_dl/extractor/wat.py index 20fef1f04..b15e03768 100644 --- a/youtube_dl/extractor/wat.py +++ b/youtube_dl/extractor/wat.py @@ -1,15 +1,13 @@  # coding: utf-8  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..compat import compat_str  from ..utils import (      ExtractorError, -    unified_strdate, -    HEADRequest,      int_or_none, +    try_get, +    unified_strdate,  ) @@ -19,7 +17,6 @@ class WatIE(InfoExtractor):      _TESTS = [          {              'url': 'http://www.wat.tv/video/soupe-figues-l-orange-aux-epices-6z1uz_2hvf7_.html', -            'md5': '83d882d9de5c9d97f0bb2c6273cde56a',              'info_dict': {                  'id': '11713067',                  'ext': 'mp4', @@ -28,10 +25,16 @@ class WatIE(InfoExtractor):                  'upload_date': '20140819',                  'duration': 120,              }, +            'params': { +                # m3u8 download +                'skip_download': True, +            }, +            'expected_warnings': ['HTTP Error 404'], +            'skip': 'This content is no longer available',          },          {              'url': 'http://www.wat.tv/video/gregory-lemarchal-voix-ange-6z1v7_6ygkj_.html', -            'md5': '34bdfa5ca9fd3c7eb88601b635b0424c', +            'md5': 'b16574df2c3cd1a36ca0098f2a791925',              'info_dict': {                  'id': '11713075',                  'ext': 'mp4', @@ -39,17 +42,10 @@ class WatIE(InfoExtractor):                  'upload_date': '20140816',              },              'expected_warnings': ["Ce contenu n'est pas disponible pour l'instant."], +            'skip': 'This content is no longer available',          },      ] - -    _FORMATS = ( -        (200, 416, 234), -        (400, 480, 270), -        (600, 640, 360), -        (1200, 640, 360), -        (1800, 960, 540), -        (2500, 1280, 720), -    ) +    _GEO_BYPASS = False      def _real_extract(self, url):          video_id = self._match_id(url) @@ -57,110 +53,54 @@ class WatIE(InfoExtractor):          # 'contentv4' is used in the website, but it also returns the related          # videos, we don't need them +        # video_data = self._download_json( +        #     'http://www.wat.tv/interface/contentv4s/' + video_id, video_id)          video_data = self._download_json( -            'http://www.wat.tv/interface/contentv4s/' + video_id, video_id) +            'https://mediainfo.tf1.fr/mediainfocombo/' + video_id, +            video_id, query={'context': 'MYTF1', 'pver': '4001000'})          video_info = video_data['media']          error_desc = video_info.get('error_desc')          if error_desc: -            self.report_warning( -                '%s returned error: %s' % (self.IE_NAME, error_desc)) - -        chapters = video_info['chapters'] -        if chapters: -            first_chapter = chapters[0] +            if video_info.get('error_code') == 'GEOBLOCKED': +                self.raise_geo_restricted(error_desc, video_info.get('geoList')) +            raise ExtractorError(error_desc, expected=True) -            def video_id_for_chapter(chapter): -                return chapter['tc_start'].split('-')[0] - -            if video_id_for_chapter(first_chapter) != video_id: -                self.to_screen('Multipart video detected') -                entries = [self.url_result('wat:%s' % video_id_for_chapter(chapter)) for chapter in chapters] -                return self.playlist_result(entries, video_id, video_info['title']) -            # Otherwise we can continue and extract just one part, we have to use -            # the video id for getting the video url -        else: -            first_chapter = video_info - -        title = first_chapter['title'] - -        def extract_url(path_template, url_type): -            req_url = 'http://www.wat.tv/get/%s' % (path_template % video_id) -            head = self._request_webpage(HEADRequest(req_url), video_id, 'Extracting %s url' % url_type, fatal=False) -            if head: -                red_url = head.geturl() -                if req_url != red_url: -                    return red_url -            return None - -        def remove_bitrate_limit(manifest_url): -            return re.sub(r'(?:max|min)_bitrate=\d+&?', '', manifest_url) +        title = video_info['title']          formats = [] -        try: + +        def extract_formats(manifest_urls): +            for f, f_url in manifest_urls.items(): +                if not f_url: +                    continue +                if f in ('dash', 'mpd'): +                    formats.extend(self._extract_mpd_formats( +                        f_url.replace('://das-q1.tf1.fr/', '://das-q1-ssl.tf1.fr/'), +                        video_id, mpd_id='dash', fatal=False)) +                elif f == 'hls': +                    formats.extend(self._extract_m3u8_formats( +                        f_url, video_id, 'mp4', +                        'm3u8_native', m3u8_id='hls', fatal=False)) + +        delivery = video_data.get('delivery') or {} +        extract_formats({delivery.get('format'): delivery.get('url')}) +        if not formats: +            if delivery.get('drm'): +                raise ExtractorError('This video is DRM protected.', expected=True)              manifest_urls = self._download_json( -                'http://www.wat.tv/get/webhtml/' + video_id, video_id) -            m3u8_url = manifest_urls.get('hls') -            if m3u8_url: -                m3u8_url = remove_bitrate_limit(m3u8_url) -                m3u8_formats = self._extract_m3u8_formats( -                    m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) -                if m3u8_formats: -                    formats.extend(m3u8_formats) -                    formats.extend(self._extract_f4m_formats( -                        m3u8_url.replace('ios', 'web').replace('.m3u8', '.f4m'), -                        video_id, f4m_id='hds', fatal=False)) -                    http_url = extract_url('android5/%s.mp4', 'http') -                    if http_url: -                        for m3u8_format in m3u8_formats: -                            vbr, abr = m3u8_format.get('vbr'), m3u8_format.get('abr') -                            if not vbr or not abr: -                                continue -                            format_id = m3u8_format['format_id'].replace('hls', 'http') -                            fmt_url = re.sub(r'%s-\d+00-\d+' % video_id, '%s-%d00-%d' % (video_id, round(vbr / 100), round(abr)), http_url) -                            if self._is_valid_url(fmt_url, video_id, format_id): -                                f = m3u8_format.copy() -                                f.update({ -                                    'url': fmt_url, -                                    'format_id': format_id, -                                    'protocol': 'http', -                                }) -                                formats.append(f) -            mpd_url = manifest_urls.get('mpd') -            if mpd_url: -                formats.extend(self._extract_mpd_formats(remove_bitrate_limit( -                    mpd_url), video_id, mpd_id='dash', fatal=False)) -            self._sort_formats(formats) -        except ExtractorError: -            abr = 64 -            for vbr, width, height in self._FORMATS: -                tbr = vbr + abr -                format_id = 'http-%s' % tbr -                fmt_url = 'http://dnl.adv.tf1.fr/2/USP-0x0/%s/%s/%s/ssm/%s-%s-64k.mp4' % (video_id[-4:-2], video_id[-2:], video_id, video_id, vbr) -                if self._is_valid_url(fmt_url, video_id, format_id): -                    formats.append({ -                        'format_id': format_id, -                        'url': fmt_url, -                        'vbr': vbr, -                        'abr': abr, -                        'width': width, -                        'height': height, -                    }) +                'http://www.wat.tv/get/webhtml/' + video_id, video_id, fatal=False) +            if manifest_urls: +                extract_formats(manifest_urls) -        date_diffusion = first_chapter.get('date_diffusion') or video_data.get('configv4', {}).get('estatS4') -        upload_date = unified_strdate(date_diffusion) if date_diffusion else None -        duration = None -        files = video_info['files'] -        if files: -            duration = int_or_none(files[0].get('duration')) +        self._sort_formats(formats)          return {              'id': video_id,              'title': title, -            'thumbnail': first_chapter.get('preview'), -            'description': first_chapter.get('description'), -            'view_count': int_or_none(video_info.get('views')), -            'upload_date': upload_date, -            'duration': duration, +            'thumbnail': video_info.get('preview'), +            'upload_date': unified_strdate(try_get( +                video_data, lambda x: x['mediametrie']['chapters'][0]['estatS4'])), +            'duration': int_or_none(video_info.get('duration')),              'formats': formats,          }  | 
