diff options
Diffstat (limited to 'youtube_dl/extractor/canvas.py')
| -rw-r--r-- | youtube_dl/extractor/canvas.py | 142 | 
1 files changed, 140 insertions, 2 deletions
| diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index 6899f8431..c7e9b8ff9 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -1,16 +1,24 @@  from __future__ import unicode_literals  import re +import json  from .common import InfoExtractor +from .gigya import GigyaBaseIE + + +from ..compat import compat_HTTPError  from ..utils import ( -    float_or_none, +    ExtractorError,      strip_or_none, +    float_or_none, +    int_or_none, +    parse_iso8601,  )  class CanvasIE(InfoExtractor): -    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)' +    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'      _TESTS = [{          'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',          'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -166,3 +174,133 @@ class CanvasEenIE(InfoExtractor):              'title': title,              'description': self._og_search_description(webpage),          } + + +class VrtNUIE(GigyaBaseIE): +    IE_DESC = 'VrtNU.be' +    _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', +        'info_dict': { +            'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', +            'ext': 'flv', +            'title': 'De zwarte weduwe', +            'description': 'md5:d90c21dced7db869a85db89a623998d4', +            'duration': 1457.04, +            'thumbnail': r're:^https?://.*\.jpg$', +            'season': '1', +            'season_number': 1, +            'episode_number': 1, +        }, +        # 'skip': 'This video is only available for registered users' +    }] +    _NETRC_MACHINE = 'vrtnu' +    _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' +    _CONTEXT_ID = 'R3595707040' + +    def _real_initialize(self): +        self._login() + +    def _login(self): +        username, password = self._get_login_info() +        if username is None: +            self.raise_login_required() + +        auth_data = { +            'APIKey': self._APIKEY, +            'targetEnv': 'jssdk', +            'loginID': username, +            'password': password, +            'authMode': 'cookie', +        } + +        auth_info = self._gigya_login(auth_data) + +        # Sometimes authentication fails for no good reason, retry +        login_attempt = 1 +        while login_attempt <= 3: +            try: +                # When requesting a token, no actual token is returned, but the +                # necessary cookies are set. +                self._request_webpage( +                    'https://token.vrt.be', +                    None, note='Requesting a token', errnote='Could not get a token', +                    headers={ +                        'Content-Type': 'application/json', +                        'Referer': 'https://www.vrt.be/vrtnu/', +                    }, +                    data=json.dumps({ +                        'uid': auth_info['UID'], +                        'uidsig': auth_info['UIDSignature'], +                        'ts': auth_info['signatureTimestamp'], +                        'email': auth_info['profile']['email'], +                    }).encode('utf-8')) +            except ExtractorError as e: +                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: +                    login_attempt += 1 +                    self.report_warning('Authentication failed') +                    self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') +                else: +                    raise e +            else: +                break + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        title = self._html_search_regex( +            r'(?ms)<h1 class="content__heading">(.+?)</h1>', +            webpage, 'title').strip() + +        description = self._html_search_regex( +            r'(?ms)<div class="content__description">(.+?)</div>', +            webpage, 'description', default=None) + +        season = self._html_search_regex( +            [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* +                    <span>seizoen\ (.+?)</span>\s* +                </div>''', +             r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], +            webpage, 'season', default=None) + +        season_number = int_or_none(season) + +        episode_number = int_or_none(self._html_search_regex( +            r'''(?xms)<div\ class="content__episode">\s* +                    <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> +                </div>''', +            webpage, 'episode_number', default=None)) + +        release_date = parse_iso8601(self._html_search_regex( +            r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', +            webpage, 'release_date', default=None)) + +        # If there's a ? or a # in the URL, remove them and everything after +        clean_url = url.split('?')[0].split('#')[0].strip('/') +        securevideo_url = clean_url + '.mssecurevideo.json' + +        json = self._download_json(securevideo_url, display_id) +        # We are dealing with a '../<show>.relevant' URL +        redirect_url = json.get('url') +        if redirect_url: +            return self.url_result('https:' + redirect_url) +        else: +            # There is only one entry, but with an unknown key, so just get +            # the first one +            video_id = list(json.values())[0].get('videoid') + +        return { +            '_type': 'url_transparent', +            'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, +            'ie_key': CanvasIE.ie_key(), +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'season': season, +            'season_number': season_number, +            'episode_number': episode_number, +            'release_date': release_date, +        } | 
