diff options
| -rw-r--r-- | youtube_dl/extractor/canvas.py | 142 | ||||
| -rw-r--r-- | youtube_dl/extractor/extractors.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/gigya.py | 22 | ||||
| -rw-r--r-- | youtube_dl/extractor/medialaan.py | 17 | 
4 files changed, 167 insertions, 15 deletions
| diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py index 6899f8431..c7e9b8ff9 100644 --- a/youtube_dl/extractor/canvas.py +++ b/youtube_dl/extractor/canvas.py @@ -1,16 +1,24 @@  from __future__ import unicode_literals  import re +import json  from .common import InfoExtractor +from .gigya import GigyaBaseIE + + +from ..compat import compat_HTTPError  from ..utils import ( -    float_or_none, +    ExtractorError,      strip_or_none, +    float_or_none, +    int_or_none, +    parse_iso8601,  )  class CanvasIE(InfoExtractor): -    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet)/assets/(?P<id>m[dz]-ast-[^/?#&]+)' +    _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrtvideo)/assets/(?P<id>[^/?#&]+)'      _TESTS = [{          'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',          'md5': '90139b746a0a9bd7bb631283f6e2a64e', @@ -166,3 +174,133 @@ class CanvasEenIE(InfoExtractor):              'title': title,              'description': self._og_search_description(webpage),          } + + +class VrtNUIE(GigyaBaseIE): +    IE_DESC = 'VrtNU.be' +    _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)' +    _TESTS = [{ +        'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/', +        'info_dict': { +            'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de', +            'ext': 'flv', +            'title': 'De zwarte weduwe', +            'description': 'md5:d90c21dced7db869a85db89a623998d4', +            'duration': 1457.04, +            'thumbnail': r're:^https?://.*\.jpg$', +            'season': '1', +            'season_number': 1, +            'episode_number': 1, +        }, +        # 'skip': 'This video is only available for registered users' +    }] +    _NETRC_MACHINE = 'vrtnu' +    _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy' +    _CONTEXT_ID = 'R3595707040' + +    def _real_initialize(self): +        self._login() + +    def _login(self): +        username, password = self._get_login_info() +        if username is None: +            self.raise_login_required() + +        auth_data = { +            'APIKey': self._APIKEY, +            'targetEnv': 'jssdk', +            'loginID': username, +            'password': password, +            'authMode': 'cookie', +        } + +        auth_info = self._gigya_login(auth_data) + +        # Sometimes authentication fails for no good reason, retry +        login_attempt = 1 +        while login_attempt <= 3: +            try: +                # When requesting a token, no actual token is returned, but the +                # necessary cookies are set. +                self._request_webpage( +                    'https://token.vrt.be', +                    None, note='Requesting a token', errnote='Could not get a token', +                    headers={ +                        'Content-Type': 'application/json', +                        'Referer': 'https://www.vrt.be/vrtnu/', +                    }, +                    data=json.dumps({ +                        'uid': auth_info['UID'], +                        'uidsig': auth_info['UIDSignature'], +                        'ts': auth_info['signatureTimestamp'], +                        'email': auth_info['profile']['email'], +                    }).encode('utf-8')) +            except ExtractorError as e: +                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: +                    login_attempt += 1 +                    self.report_warning('Authentication failed') +                    self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again') +                else: +                    raise e +            else: +                break + +    def _real_extract(self, url): +        display_id = self._match_id(url) + +        webpage = self._download_webpage(url, display_id) + +        title = self._html_search_regex( +            r'(?ms)<h1 class="content__heading">(.+?)</h1>', +            webpage, 'title').strip() + +        description = self._html_search_regex( +            r'(?ms)<div class="content__description">(.+?)</div>', +            webpage, 'description', default=None) + +        season = self._html_search_regex( +            [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s* +                    <span>seizoen\ (.+?)</span>\s* +                </div>''', +             r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'], +            webpage, 'season', default=None) + +        season_number = int_or_none(season) + +        episode_number = int_or_none(self._html_search_regex( +            r'''(?xms)<div\ class="content__episode">\s* +                    <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span> +                </div>''', +            webpage, 'episode_number', default=None)) + +        release_date = parse_iso8601(self._html_search_regex( +            r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"', +            webpage, 'release_date', default=None)) + +        # If there's a ? or a # in the URL, remove them and everything after +        clean_url = url.split('?')[0].split('#')[0].strip('/') +        securevideo_url = clean_url + '.mssecurevideo.json' + +        json = self._download_json(securevideo_url, display_id) +        # We are dealing with a '../<show>.relevant' URL +        redirect_url = json.get('url') +        if redirect_url: +            return self.url_result('https:' + redirect_url) +        else: +            # There is only one entry, but with an unknown key, so just get +            # the first one +            video_id = list(json.values())[0].get('videoid') + +        return { +            '_type': 'url_transparent', +            'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id, +            'ie_key': CanvasIE.ie_key(), +            'id': video_id, +            'display_id': display_id, +            'title': title, +            'description': description, +            'season': season, +            'season_number': season_number, +            'episode_number': episode_number, +            'release_date': release_date, +        } diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 09b20a39a..42b312c42 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -150,6 +150,7 @@ from .canalc2 import Canalc2IE  from .canvas import (      CanvasIE,      CanvasEenIE, +    VrtNUIE,  )  from .carambatv import (      CarambaTVIE, diff --git a/youtube_dl/extractor/gigya.py b/youtube_dl/extractor/gigya.py new file mode 100644 index 000000000..412178492 --- /dev/null +++ b/youtube_dl/extractor/gigya.py @@ -0,0 +1,22 @@ +from __future__ import unicode_literals + +from .common import InfoExtractor + +from ..utils import ( +    ExtractorError, +    urlencode_postdata, +) + + +class GigyaBaseIE(InfoExtractor): +    def _gigya_login(self, auth_data): +        auth_info = self._download_json( +            'https://accounts.eu1.gigya.com/accounts.login', None, +            note='Logging in', errnote='Unable to log in', +            data=urlencode_postdata(auth_data)) + +        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') +        if error_message: +            raise ExtractorError( +                'Unable to login: %s' % error_message, expected=True) +        return auth_info diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dl/extractor/medialaan.py index 4c32fbc2c..f8c30052f 100644 --- a/youtube_dl/extractor/medialaan.py +++ b/youtube_dl/extractor/medialaan.py @@ -2,19 +2,18 @@ from __future__ import unicode_literals  import re -from .common import InfoExtractor +from .gigya import GigyaBaseIE +  from ..compat import compat_str  from ..utils import ( -    ExtractorError,      int_or_none,      parse_duration,      try_get,      unified_timestamp, -    urlencode_postdata,  ) -class MedialaanIE(InfoExtractor): +class MedialaanIE(GigyaBaseIE):      _VALID_URL = r'''(?x)                      https?://                          (?:www\.|nieuws\.)? @@ -119,15 +118,7 @@ class MedialaanIE(InfoExtractor):              'password': password,          } -        auth_info = self._download_json( -            'https://accounts.eu1.gigya.com/accounts.login', None, -            note='Logging in', errnote='Unable to log in', -            data=urlencode_postdata(auth_data)) - -        error_message = auth_info.get('errorDetails') or auth_info.get('errorMessage') -        if error_message: -            raise ExtractorError( -                'Unable to login: %s' % error_message, expected=True) +        auth_info = self._gigya_login(auth_data)          self._uid = auth_info['UID']          self._uid_signature = auth_info['UIDSignature'] | 
