diff options
| -rw-r--r-- | youtube_dl/extractor/adobepass.py | 124 | ||||
| -rw-r--r-- | youtube_dl/extractor/aenetworks.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/nationalgeographic.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/syfy.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/theplatform.py | 98 | 
5 files changed, 134 insertions, 99 deletions
| diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dl/extractor/adobepass.py new file mode 100644 index 000000000..4e59302ab --- /dev/null +++ b/youtube_dl/extractor/adobepass.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import re +import time +import xml.etree.ElementTree as etree + +from .common import InfoExtractor +from ..utils import ( +    unescapeHTML, +    urlencode_postdata, +    unified_timestamp, +) + + +class AdobePass(InfoExtractor): +    _SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s' +    _USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0' + +    @staticmethod +    def _get_mvpd_resource(provider_id, title, guid, rating): +        channel = etree.Element('channel') +        channel_title = etree.SubElement(channel, 'title') +        channel_title.text = provider_id +        item = etree.SubElement(channel, 'item') +        resource_title = etree.SubElement(item, 'title') +        resource_title.text = title +        resource_guid = etree.SubElement(item, 'guid') +        resource_guid.text = guid +        resource_rating = etree.SubElement(item, 'media:rating') +        resource_rating.attrib = {'scheme': 'urn:v-chip'} +        resource_rating.text = rating +        return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>' + +    def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): +        def xml_text(xml_str, tag): +            return self._search_regex( +                '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag) + +        mvpd_headers = { +            'ap_42': 'anonymous', +            'ap_11': 'Linux i686', +            'ap_z': self._USER_AGENT, +            'User-Agent': self._USER_AGENT, +        } + +        guid = xml_text(resource, 'guid') +        requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} +        authn_token = requestor_info.get('authn_token') +        if authn_token: +            token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) +            if token_expires and token_expires <= int(time.time()): +                authn_token = None +                requestor_info = {} +        if not authn_token: +            # TODO add support for other TV Providers +            mso_id = 'DTV' +            username, password = self._get_netrc_login_info(mso_id) +            if not username or not password: +                return '' + +            def post_form(form_page, note, data={}): +                post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url') +                return self._download_webpage( +                    post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ +                        'Content-Type': 'application/x-www-form-urlencoded', +                    }) + +            provider_redirect_page = self._download_webpage( +                self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, +                'Downloading Provider Redirect Page', query={ +                    'noflash': 'true', +                    'mso_id': mso_id, +                    'requestor_id': requestor_id, +                    'no_iframe': 'false', +                    'domain_name': 'adobe.com', +                    'redirect_url': url, +                }) +            provider_login_page = post_form( +                provider_redirect_page, 'Downloading Provider Login Page') +            mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { +                'username': username, +                'password': password, +            }) +            post_form(mvpd_confirm_page, 'Confirming Login') + +            session = self._download_webpage( +                self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, +                'Retrieving Session', data=urlencode_postdata({ +                    '_method': 'GET', +                    'requestor_id': requestor_id, +                }), headers=mvpd_headers) +            authn_token = unescapeHTML(xml_text(session, 'authnToken')) +            requestor_info['authn_token'] = authn_token +            self._downloader.cache.store('mvpd', requestor_id, requestor_info) + +        authz_token = requestor_info.get(guid) +        if not authz_token: +            authorize = self._download_webpage( +                self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, +                'Retrieving Authorization Token', data=urlencode_postdata({ +                    'resource_id': resource, +                    'requestor_id': requestor_id, +                    'authentication_token': authn_token, +                    'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), +                    'userMeta': '1', +                }), headers=mvpd_headers) +            authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) +            requestor_info[guid] = authz_token +            self._downloader.cache.store('mvpd', requestor_id, requestor_info) + +        mvpd_headers.update({ +            'ap_19': xml_text(authn_token, 'simpleSamlNameID'), +            'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), +        }) + +        return self._download_webpage( +            self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', +            video_id, 'Retrieving Media Token', data=urlencode_postdata({ +                'authz_token': authz_token, +                'requestor_id': requestor_id, +                'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), +                'hashed_guid': 'false', +            }), headers=mvpd_headers) diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dl/extractor/aenetworks.py index 8f53050c9..6adb6d824 100644 --- a/youtube_dl/extractor/aenetworks.py +++ b/youtube_dl/extractor/aenetworks.py @@ -109,7 +109,10 @@ class AENetworksIE(AENetworksBaseIE):          info = self._parse_theplatform_metadata(theplatform_metadata)          if theplatform_metadata.get('AETN$isBehindWall'):              requestor_id = self._DOMAIN_TO_REQUESTOR_ID[domain] -            resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>%s</title><item><title>%s</title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (requestor_id, theplatform_metadata['title'], theplatform_metadata['AETN$PPL_pplProgramId'], theplatform_metadata['ratings'][0]['rating']) +            resource = self._get_mvpd_resource( +                requestor_id, theplatform_metadata['title'], +                theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'), +                theplatform_metadata['ratings'][0]['rating'])              query['auth'] = self._extract_mvpd_auth(                  url, video_id, requestor_id, resource)          info.update(self._search_json_ld(webpage, video_id, fatal=False)) diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dl/extractor/nationalgeographic.py index 0027ff1b8..890e8d5bc 100644 --- a/youtube_dl/extractor/nationalgeographic.py +++ b/youtube_dl/extractor/nationalgeographic.py @@ -119,7 +119,7 @@ class NationalGeographicIE(ThePlatformIE):              auth_resource_id = self._search_regex(                  r"video_auth_resourceId\s*=\s*'([^']+)'",                  webpage, 'auth resource id') -            query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id) or '' +            query['auth'] = self._extract_mvpd_auth(url, display_id, 'natgeo', auth_resource_id)          return {              '_type': 'url_transparent', diff --git a/youtube_dl/extractor/syfy.py b/youtube_dl/extractor/syfy.py index 53723b66e..764287a64 100644 --- a/youtube_dl/extractor/syfy.py +++ b/youtube_dl/extractor/syfy.py @@ -40,7 +40,9 @@ class SyfyIE(ThePlatformIE):              'manifest': 'm3u',          }          if syfy_mpx.get('entitlement') == 'auth': -            resource = '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/"><channel><title>syfy</title><item><title><![CDATA[%s]]></title><guid>%s</guid><media:rating scheme="urn:v-chip">%s</media:rating></item></channel></rss>' % (title, video_id, syfy_mpx.get('mpxRating', 'TV-14')) +            resource = self._get_mvpd_resource( +                'syfy', title, video_id, +                syfy_mpx.get('mpxRating', 'TV-14'))              query['auth'] = self._extract_mvpd_auth(                  url, video_id, 'syfy', resource) diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index 9ca765a5f..108ddd3a9 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -6,10 +6,10 @@ import time  import hmac  import binascii  import hashlib -import netrc  from .once import OnceIE +from .adobepass import AdobePass  from ..compat import (      compat_parse_qs,      compat_urllib_parse_urlparse, @@ -25,9 +25,6 @@ from ..utils import (      xpath_with_ns,      mimetype2ext,      find_xpath_attr, -    unescapeHTML, -    urlencode_postdata, -    unified_timestamp,  )  default_ns = 'http://www.w3.org/2005/SMIL21/Language' @@ -96,7 +93,7 @@ class ThePlatformBaseIE(OnceIE):          return self._parse_theplatform_metadata(info) -class ThePlatformIE(ThePlatformBaseIE): +class ThePlatformIE(ThePlatformBaseIE, AdobePass):      _VALID_URL = r'''(?x)          (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/             (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))? @@ -202,97 +199,6 @@ class ThePlatformIE(ThePlatformBaseIE):          sig = flags + expiration_date + checksum + str_to_hex(sig_secret)          return '%s&sig=%s' % (url, sig) -    def _extract_mvpd_auth(self, url, video_id, requestor_id, resource): -        def xml_text(xml_str, tag): -            return self._search_regex( -                '<%s>(.+?)</%s>' % (tag, tag), xml_str, tag) - -        mvpd_headers = { -            'ap_42': 'anonymous', -            'ap_11': 'Linux i686', -            'ap_z': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', -            'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0', -        } - -        guid = xml_text(resource, 'guid') -        requestor_info = self._downloader.cache.load('mvpd', requestor_id) or {} -        authn_token = requestor_info.get('authn_token') -        if authn_token: -            token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(authn_token, 'simpleTokenExpires'))) -            if token_expires and token_expires <= int(time.time()): -                authn_token = None -                requestor_info = {} -        if not authn_token: -            # TODO add support for other TV Providers -            mso_id = 'DTV' -            username, password = self._get_netrc_login_info(mso_id) -            if not username or not password: -                return '' - -            def post_form(form_page, note, data={}): -                post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url') -                return self._download_webpage( -                    post_url, video_id, note, data=urlencode_postdata(data or self._hidden_inputs(form_page)), headers={ -                        'Content-Type': 'application/x-www-form-urlencoded', -                    }) - -            provider_redirect_page = self._download_webpage( -                self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id, -                'Downloading Provider Redirect Page', query={ -                    'noflash': 'true', -                    'mso_id': mso_id, -                    'requestor_id': requestor_id, -                    'no_iframe': 'false', -                    'domain_name': 'adobe.com', -                    'redirect_url': url, -                }) -            provider_login_page = post_form( -                provider_redirect_page, 'Downloading Provider Login Page') -            mvpd_confirm_page = post_form(provider_login_page, 'Logging in', { -                'username': username, -                'password': password, -            }) -            post_form(mvpd_confirm_page, 'Confirming Login') - -            session = self._download_webpage( -                self._SERVICE_PROVIDER_TEMPLATE % 'session', video_id, -                'Retrieving Session', data=urlencode_postdata({ -                    '_method': 'GET', -                    'requestor_id': requestor_id, -                }), headers=mvpd_headers) -            authn_token = unescapeHTML(xml_text(session, 'authnToken')) -            requestor_info['authn_token'] = authn_token -            self._downloader.cache.store('mvpd', requestor_id, requestor_info) - -        authz_token = requestor_info.get(guid) -        if not authz_token: -            authorize = self._download_webpage( -                self._SERVICE_PROVIDER_TEMPLATE % 'authorize', video_id, -                'Retrieving Authorization Token', data=urlencode_postdata({ -                    'resource_id': resource, -                    'requestor_id': requestor_id, -                    'authentication_token': authn_token, -                    'mso_id': xml_text(authn_token, 'simpleTokenMsoID'), -                    'userMeta': '1', -                }), headers=mvpd_headers) -            authz_token = unescapeHTML(xml_text(authorize, 'authzToken')) -            requestor_info[guid] = authz_token -            self._downloader.cache.store('mvpd', requestor_id, requestor_info) - -        mvpd_headers.update({ -            'ap_19': xml_text(authn_token, 'simpleSamlNameID'), -            'ap_23': xml_text(authn_token, 'simpleSamlSessionIndex'), -        }) - -        return self._download_webpage( -            self._SERVICE_PROVIDER_TEMPLATE % 'shortAuthorize', -            video_id, 'Retrieving Media Token', data=urlencode_postdata({ -                'authz_token': authz_token, -                'requestor_id': requestor_id, -                'session_guid': xml_text(authn_token, 'simpleTokenAuthenticationGuid'), -                'hashed_guid': 'false', -            }), headers=mvpd_headers) -      def _real_extract(self, url):          url, smuggled_data = unsmuggle_url(url, {}) | 
