diff options
| -rw-r--r-- | youtube_dl/extractor/vrv.py | 97 | 
1 files changed, 57 insertions, 40 deletions
diff --git a/youtube_dl/extractor/vrv.py b/youtube_dl/extractor/vrv.py index 483a3be3a..014513051 100644 --- a/youtube_dl/extractor/vrv.py +++ b/youtube_dl/extractor/vrv.py @@ -11,10 +11,12 @@ import time  from .common import InfoExtractor  from ..compat import ( +    compat_HTTPError,      compat_urllib_parse_urlencode,      compat_urllib_parse,  )  from ..utils import ( +    ExtractorError,      float_or_none,      int_or_none,  ) @@ -24,29 +26,41 @@ class VRVBaseIE(InfoExtractor):      _API_DOMAIN = None      _API_PARAMS = {}      _CMS_SIGNING = {} +    _TOKEN = None +    _TOKEN_SECRET = ''      def _call_api(self, path, video_id, note, data=None): +        # https://tools.ietf.org/html/rfc5849#section-3          base_url = self._API_DOMAIN + '/core/' + path -        encoded_query = compat_urllib_parse_urlencode({ +        query = {              'oauth_consumer_key': self._API_PARAMS['oAuthKey'],              'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),              'oauth_signature_method': 'HMAC-SHA1',              'oauth_timestamp': int(time.time()), -            'oauth_version': '1.0', -        }) +        } +        if self._TOKEN: +            query['oauth_token'] = self._TOKEN +        encoded_query = compat_urllib_parse_urlencode(query)          headers = self.geo_verification_headers()          if data:              data = json.dumps(data).encode()              headers['Content-Type'] = 'application/json' -        method = 'POST' if data else 'GET' -        base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')]) +        base_string = '&'.join([ +            'POST' if data else 'GET', +            compat_urllib_parse.quote(base_url, ''), +            compat_urllib_parse.quote(encoded_query, '')])          oauth_signature = base64.b64encode(hmac.new( -            (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'), +            (self._API_PARAMS['oAuthSecret'] + '&' + self._TOKEN_SECRET).encode('ascii'),              base_string.encode(), hashlib.sha1).digest()).decode()          encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '') -        return self._download_json( -            '?'.join([base_url, encoded_query]), video_id, -            note='Downloading %s JSON metadata' % note, headers=headers, data=data) +        try: +            return self._download_json( +                '?'.join([base_url, encoded_query]), video_id, +                note='Downloading %s JSON metadata' % note, headers=headers, data=data) +        except ExtractorError as e: +            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: +                raise ExtractorError(json.loads(e.cause.read().decode())['message'], expected=True) +            raise      def _call_cms(self, path, video_id, note):          if not self._CMS_SIGNING: @@ -55,19 +69,22 @@ class VRVBaseIE(InfoExtractor):              self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,              note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers()) -    def _set_api_params(self, webpage, video_id): -        if not self._API_PARAMS: -            self._API_PARAMS = self._parse_json(self._search_regex( -                r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>', -                webpage, 'api config'), video_id)['cxApiParams'] -            self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') -      def _get_cms_resource(self, resource_key, video_id):          return self._call_api(              'cms_resource', video_id, 'resource path', data={                  'resource_key': resource_key,              })['__links__']['cms_resource']['href'] +    def _real_initialize(self): +        webpage = self._download_webpage( +            'https://vrv.co/', None, headers=self.geo_verification_headers()) +        self._API_PARAMS = self._parse_json(self._search_regex( +            [ +                r'window\.__APP_CONFIG__\s*=\s*({.+?})(?:</script>|;)', +                r'window\.__APP_CONFIG__\s*=\s*({.+})' +            ], webpage, 'app config'), None)['cxApiParams'] +        self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co') +  class VRVIE(VRVBaseIE):      IE_NAME = 'vrv' @@ -86,6 +103,22 @@ class VRVIE(VRVBaseIE):              'skip_download': True,          },      }] +    _NETRC_MACHINE = 'vrv' + +    def _real_initialize(self): +        super(VRVIE, self)._real_initialize() + +        email, password = self._get_login_info() +        if email is None: +            return + +        token_credentials = self._call_api( +            'authenticate/by:credentials', None, 'Token Credentials', data={ +                'email': email, +                'password': password, +            }) +        self._TOKEN = token_credentials['oauth_token'] +        self._TOKEN_SECRET = token_credentials['oauth_token_secret']      def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):          if not url or stream_format not in ('hls', 'dash'): @@ -116,28 +149,16 @@ class VRVIE(VRVBaseIE):      def _real_extract(self, url):          video_id = self._match_id(url) -        webpage = self._download_webpage( -            url, video_id, -            headers=self.geo_verification_headers()) -        media_resource = self._parse_json(self._search_regex( -            [ -                r'window\.__INITIAL_STATE__\s*=\s*({.+?})(?:</script>|;)', -                r'window\.__INITIAL_STATE__\s*=\s*({.+})' -            ], webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {} - -        video_data = media_resource.get('json') -        if not video_data: -            self._set_api_params(webpage, video_id) -            episode_path = self._get_cms_resource( -                'cms:/episodes/' + video_id, video_id) -            video_data = self._call_cms(episode_path, video_id, 'video') + +        episode_path = self._get_cms_resource( +            'cms:/episodes/' + video_id, video_id) +        video_data = self._call_cms(episode_path, video_id, 'video')          title = video_data['title'] -        streams_json = media_resource.get('streams', {}).get('json', {}) -        if not streams_json: -            self._set_api_params(webpage, video_id) -            streams_path = video_data['__links__']['streams']['href'] -            streams_json = self._call_cms(streams_path, video_id, 'streams') +        streams_path = video_data['__links__'].get('streams', {}).get('href') +        if not streams_path: +            self.raise_login_required() +        streams_json = self._call_cms(streams_path, video_id, 'streams')          audio_locale = streams_json.get('audio_locale')          formats = [] @@ -202,11 +223,7 @@ class VRVSeriesIE(VRVBaseIE):      def _real_extract(self, url):          series_id = self._match_id(url) -        webpage = self._download_webpage( -            url, series_id, -            headers=self.geo_verification_headers()) -        self._set_api_params(webpage, series_id)          seasons_path = self._get_cms_resource(              'cms:/seasons?series_id=' + series_id, series_id)          seasons_data = self._call_cms(seasons_path, series_id, 'seasons')  | 
