diff options
| -rw-r--r-- | youtube_dl/extractor/radiocanada.py | 55 | ||||
| -rw-r--r-- | youtube_dl/extractor/toutv.py | 60 | 
2 files changed, 92 insertions, 23 deletions
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dl/extractor/radiocanada.py index 8ec402646..6751270ee 100644 --- a/youtube_dl/extractor/radiocanada.py +++ b/youtube_dl/extractor/radiocanada.py @@ -13,6 +13,7 @@ from ..utils import (      xpath_element,      ExtractorError,      determine_protocol, +    unsmuggle_url,  ) @@ -35,28 +36,51 @@ class RadioCanadaIE(InfoExtractor):      }      def _real_extract(self, url): +        url, smuggled_data = unsmuggle_url(url, {})          app_code, video_id = re.match(self._VALID_URL, url).groups() -        device_types = ['ipad', 'android'] +        metadata = self._download_xml( +            'http://api.radio-canada.ca/metaMedia/v1/index.ashx', +            video_id, note='Downloading metadata XML', query={ +                'appCode': app_code, +                'idMedia': video_id, +            }) + +        def get_meta(name): +            el = find_xpath_attr(metadata, './/Meta', 'name', name) +            return el.text if el is not None else None + +        if get_meta('protectionType'): +            raise ExtractorError('This video is DRM protected.', expected=True) + +        device_types = ['ipad']          if app_code != 'toutv':              device_types.append('flash') +        if not smuggled_data: +            device_types.append('android')          formats = []          # TODO: extract f4m formats          # f4m formats can be extracted using flashhd device_type but they produce unplayable file          for device_type in device_types: -            v_data = self._download_xml( -                'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx', -                video_id, note='Downloading %s XML' % device_type, query={ -                    'appCode': app_code, -                    'idMedia': video_id, -                    'connectionType': 'broadband', -                    'multibitrate': 'true', -                    'deviceType': device_type, +            validation_url = 'http://api.radio-canada.ca/validationMedia/v1/Validation.ashx' +            query = { +                'appCode': app_code, +                'idMedia': video_id, +                'connectionType': 'broadband', +                'multibitrate': 'true', +                'deviceType': device_type, +            } +            if smuggled_data: +                validation_url = 'https://services.radio-canada.ca/media/validation/v2/' +                query.update(smuggled_data) +            else: +                query.update({                      # paysJ391wsHjbOJwvCs26toz and bypasslock are used to bypass geo-restriction                      'paysJ391wsHjbOJwvCs26toz': 'CA',                      'bypasslock': 'NZt5K62gRqfc', -                }, fatal=False) +                }) +            v_data = self._download_xml(validation_url, video_id, note='Downloading %s XML' % device_type, query=query, fatal=False)              v_url = xpath_text(v_data, 'url')              if not v_url:                  continue @@ -101,17 +125,6 @@ class RadioCanadaIE(InfoExtractor):                                  f4m_id='hds', fatal=False))          self._sort_formats(formats) -        metadata = self._download_xml( -            'http://api.radio-canada.ca/metaMedia/v1/index.ashx', -            video_id, note='Downloading metadata XML', query={ -                'appCode': app_code, -                'idMedia': video_id, -            }) - -        def get_meta(name): -            el = find_xpath_attr(metadata, './/Meta', 'name', name) -            return el.text if el is not None else None -          return {              'id': video_id,              'title': get_meta('Title'), diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 54c2d0aa6..d2d5c1171 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -2,12 +2,22 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( +    int_or_none, +    js_to_json, +    ExtractorError, +    urlencode_postdata, +    extract_attributes, +    smuggle_url, +)  class TouTvIE(InfoExtractor): +    _NETRC_MACHINE = 'toutv'      IE_NAME = 'tou.tv'      _VALID_URL = r'https?://ici\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+/S[0-9]+E[0-9]+)' +    _access_token = None +    _claims = None      _TEST = {          'url': 'http://ici.tou.tv/garfield-tout-court/S2015E17', @@ -22,18 +32,64 @@ class TouTvIE(InfoExtractor):              # m3u8 download              'skip_download': True,          }, +        'skip': '404 Not Found',      } +    def _real_initialize(self): +        email, password = self._get_login_info() +        if email is None: +            return +        state = 'http://ici.tou.tv//' +        webpage = self._download_webpage(state, None, 'Downloading homepage') +        toutvlogin = self._parse_json(self._search_regex( +            r'(?s)toutvlogin\s*=\s*({.+?});', webpage, 'toutvlogin'), None, js_to_json) +        authorize_url = toutvlogin['host'] + '/auth/oauth/v2/authorize' +        login_webpage = self._download_webpage( +            authorize_url, None, 'Downloading login page', query={ +                'client_id': toutvlogin['clientId'], +                'redirect_uri': 'https://ici.tou.tv/login/loginCallback', +                'response_type': 'token', +                'scope': 'media-drmt openid profile email id.write media-validation.read.privileged', +                'state': state, +            }) +        login_form = self._search_regex( +            r'(?s)(<form[^>]+id="Form-login".+?</form>)', login_webpage, 'login form') +        form_data = self._hidden_inputs(login_form) +        form_data.update({ +            'login-email': email, +            'login-password': password, +        }) +        post_url = extract_attributes(login_form).get('action') or authorize_url +        _, urlh = self._download_webpage_handle( +            post_url, None, 'Logging in', data=urlencode_postdata(form_data)) +        self._access_token = self._search_regex( +            r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', +            urlh.geturl(), 'access token') +        self._claims = self._download_json( +            'https://services.radio-canada.ca/media/validation/v2/getClaims', +            None, 'Extracting Claims', query={ +                'token': self._access_token, +                'access_token': self._access_token, +            })['claims'] +      def _real_extract(self, url):          path = self._match_id(url)          metadata = self._download_json('http://ici.tou.tv/presentation/%s' % path, path) +        if metadata.get('IsDrm'): +            raise ExtractorError('This video is DRM protected.', expected=True)          video_id = metadata['IdMedia']          details = metadata['Details']          title = details['OriginalTitle'] +        video_url = 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id) +        if self._access_token and self._claims: +            video_url = smuggle_url(video_url, { +                'access_token': self._access_token, +                'claims': self._claims, +            })          return {              '_type': 'url_transparent', -            'url': 'radiocanada:%s:%s' % (metadata.get('AppCode', 'toutv'), video_id), +            'url': video_url,              'id': video_id,              'title': title,              'thumbnail': details.get('ImageUrl'),  | 
