diff options
| author | Remita Amine <remitamine@gmail.com> | 2019-02-03 12:10:41 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2019-02-03 12:10:41 +0100 | 
| commit | 07fbfef1c7e36b25dd7098be73ab76b87378a015 (patch) | |
| tree | fc0f6d114bb7b2b7ff987c5e7e15f1cff3ca6d6f | |
| parent | eecf788b90fa4d49567c714f5a613fdd2b6e2507 (diff) | |
[radiocanada] switch to the new media requests(closes #19115)
| -rw-r--r-- | youtube_dl/extractor/radiocanada.py | 133 | 
1 files changed, 39 insertions, 94 deletions
| diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 302f67d96..58e294892 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -5,14 +5,10 @@ import re  from .common import InfoExtractor  from ..utils import ( -    xpath_text, -    find_xpath_attr,      determine_ext, +    ExtractorError,      int_or_none,      unified_strdate, -    xpath_element, -    ExtractorError, -    determine_protocol,      unsmuggle_url,  ) @@ -61,107 +57,53 @@ class RadioCanadaIE(InfoExtractor):              'only_matching': True,          }      ] +    _GEO_COUNTRIES = ['CA'] + +    def _call_api(self, path, video_id, app_code, query): +        query.update({ +            'appCode': app_code, +            'idMedia': video_id, +            'output': 'json', +        }) +        return self._download_json( +            'https://services.radio-canada.ca/media/' + path, video_id, headers={ +                'Authorization': 'Client-Key 773aea60-0e80-41bb-9c7f-e6d7c3ad17fb' +            }, query=query)      def _real_extract(self, url):          url, smuggled_data = unsmuggle_url(url, {})          app_code, video_id = re.match(self._VALID_URL, url).groups() -        metadata = self._download_xml( -            'http://api.radio-canada.ca/metaMedia/v1/index.ashx', -            video_id, note='Downloading metadata XML', query={ -                'appCode': app_code, -                'idMedia': video_id, -            }) +        metas = self._call_api('meta/v1/index.ashx', video_id, app_code, {})['Metas']          def get_meta(name): -            el = find_xpath_attr(metadata, './/Meta', 'name', name) -            return el.text if el is not None else None +            for meta in metas: +                if meta.get('name') == name: +                    text = meta.get('text') +                    if text: +                        return text          # protectionType does not necessarily mean the video is DRM protected (see          # https://github.com/rg3/youtube-dl/pull/18609).          if get_meta('protectionType'):              self.report_warning('This video is probably DRM protected.') -        device_types = ['ipad'] -        if not smuggled_data: -            device_types.append('flash') -            device_types.append('android') - -        formats = [] -        error = None -        # TODO: extract f4m formats -        # f4m formats can be extracted using flashhd device_type but they produce unplayable file -        for device_type in device_types: -            validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' -            query = { -                'appCode': app_code, -                'idMedia': video_id, -                'connectionType': 'broadband', -                'multibitrate': 'true', -                'deviceType': device_type, -            } -            if smuggled_data: -                validation_url = 'https://services.radio-canada.ca/media/validation/v2/' -                query.update(smuggled_data) -            else: -                query.update({ -                    # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction -                    'paysJ391wsHjbOJwvCs26toz': 'CA', -                    'bypasslock': 'NZt5K62gRqfc', -                }) -            v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False) -            v_url = xpath_text(v_data, 'url') -            if not v_url: -                continue -            if v_url == 'null': -                error = xpath_text(v_data, 'message') -                continue -            ext = determine_ext(v_url) -            if ext == 'm3u8': -                formats.extend(self._extract_m3u8_formats( -                    v_url, video_id, 'mp4', m3u8_id='hls', fatal=False)) -            elif ext == 'f4m': -                formats.extend(self._extract_f4m_formats( -                    v_url, video_id, f4m_id='hds', fatal=False)) -            else: -                ext = determine_ext(v_url) -                bitrates = xpath_element(v_data, 'bitrates') -                for url_e in bitrates.findall('url'): -                    tbr = int_or_none(url_e.get('bitrate')) -                    if not tbr: -                        continue -                    f_url = re.sub(r'\d+\.%s' % ext, '%d.%s' % (tbr, ext), v_url) -                    protocol = determine_protocol({'url': f_url}) -                    f = { -                        'format_id': '%s-%d' % (protocol, tbr), -                        'url': f_url, -                        'ext': 'flv' if protocol == 'rtmp' else ext, -                        'protocol': protocol, -                        'width': int_or_none(url_e.get('width')), -                        'height': int_or_none(url_e.get('height')), -                        'tbr': tbr, -                    } -                    mobj = re.match(r'(?P<url>rtmp://[^/]+/[^/]+)/(?P<playpath>[^?]+)(?P<auth>\?.+)', f_url) -                    if mobj: -                        f.update({ -                            'url': mobj.group('url') + mobj.group('auth'), -                            'play_path': mobj.group('playpath'), -                        }) -                    formats.append(f) -                    if protocol == 'rtsp': -                        base_url = self._search_regex( -                            r'rtsp://([^?]+)', f_url, 'base url', default=None) -                        if base_url: -                            base_url = 'http://' + base_url -                            formats.extend(self._extract_m3u8_formats( -                                base_url + '/playlist.m3u8', video_id, 'mp4', -                                'm3u8_native', m3u8_id='hls', fatal=False)) -                            formats.extend(self._extract_f4m_formats( -                                base_url + '/manifest.f4m', video_id, -                                f4m_id='hds', fatal=False)) -        if not formats and error: +        query = { +            'connectionType': 'hd', +            'deviceType': 'ipad', +            'multibitrate': 'true', +        } +        if smuggled_data: +            query.update(smuggled_data) +        v_data = self._call_api('validation/v2/', video_id, app_code, query) +        v_url = v_data.get('url') +        if not v_url: +            error = v_data['message'] +            if error == "Le contenu sélectionné n'est pas disponible dans votre pays": +                raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)              raise ExtractorError(                  '%s said: %s' % (self.IE_NAME, error), expected=True) +        formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')          self._sort_formats(formats)          subtitles = {} @@ -189,8 +131,8 @@ class RadioCanadaIE(InfoExtractor):  class RadioCanadaAudioVideoIE(InfoExtractor):      'radiocanada:audiovideo' -    _VALID_URL = r'https?://ici\.radio-canada\.ca/audio-video/media-(?P<id>[0-9]+)' -    _TEST = { +    _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' +    _TESTS = [{          'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',          'info_dict': {              'id': '7527184', @@ -203,7 +145,10 @@ class RadioCanadaAudioVideoIE(InfoExtractor):              # m3u8 download              'skip_download': True,          }, -    } +    }, { +        'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', +        'only_matching': True, +    }]      def _real_extract(self, url):          return self.url_result('radiocanada:medianet:%s' % self._match_id(url)) | 
