aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-10-15 01:46:05 +0700
committerSergey M․ <dstftw@gmail.com>2017-10-15 01:57:43 +0700
commitb21ab85088345323d1e6d988b2cdce8e02fe6bdf (patch)
treec8f5b413e7387ad7f2c03158a260a24cdc17946a
parent210a2720bcdaf4f98561fefea021f42cae39462d (diff)
[scrippsnetworks:watch] Fix extraction (closes #14389)
-rw-r--r--youtube_dl/extractor/scrippsnetworks.py211
1 files changed, 167 insertions, 44 deletions
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
index 597d6f543..30bb31d69 100644
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ b/youtube_dl/extractor/scrippsnetworks.py
@@ -1,60 +1,183 @@
# coding: utf-8
from __future__ import unicode_literals
-from .adobepass import AdobePassIE
+import datetime
+import json
+import hashlib
+import hmac
+import re
+
+from .common import InfoExtractor
+from .anvato import AnvatoIE
from ..utils import (
- int_or_none,
- smuggle_url,
- update_url_query,
+ urlencode_postdata,
+ xpath_text,
)
-class ScrippsNetworksWatchIE(AdobePassIE):
+class ScrippsNetworksWatchIE(InfoExtractor):
IE_NAME = 'scrippsnetworks:watch'
- _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
- _TEST = {
- 'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
+ _VALID_URL = r'''(?x)
+ https?://
+ watch\.
+ (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/
+ (?:
+ player\.[A-Z0-9]+\.html\#|
+ show/(?:[^/]+/){2}
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
'md5': '26545fd676d939954c6808274bdb905a',
'info_dict': {
- 'id': '0256538',
+ 'id': '4173834',
'ext': 'mp4',
- 'title': 'Seeking a Wow House',
- 'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
- 'uploader': 'SCNI',
- 'upload_date': '20170207',
- 'timestamp': 1486450493,
+ 'title': 'Best Ever Treehouses',
+ 'description': "We're searching for the most over the top treehouses.",
+ 'uploader': 'ANV',
+ 'upload_date': '20170922',
+ 'timestamp': 1506056400,
+ },
+ 'params': {
+ 'skip_download': True,
},
- 'skip': 'requires TV provider authentication',
+ 'add_ie': [AnvatoIE.ie_key()],
+ }, {
+ 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
+ 'only_matching': True,
+ }]
+
+ _SNI_TABLE = {
+ 'hgtv': 'hgtv',
+ 'diynetwork': 'diy',
+ 'foodnetwork': 'food',
+ 'cookingchanneltv': 'cook',
+ 'travelchannel': 'trav',
+ 'geniuskitchen': 'geniuskitchen',
}
+ _SNI_HOST = 'web.api.video.snidigital.com'
+
+ _AWS_REGION = 'us-east-1'
+ _AWS_IDENTITY_ID_JSON = json.dumps({
+ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
+ })
+ _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+ _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+ _AWS_SERVICE = 'execute-api'
+ _AWS_REQUEST = 'aws4_request'
+ _AWS_SIGNED_HEADERS = ';'.join([
+ 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
+ _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
+%(uri)s
+
+host:%(host)s
+x-amz-date:%(date)s
+x-amz-security-token:%(token)s
+x-api-key:%(key)s
+
+%(signed_headers)s
+%(payload_hash)s'''
def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- channel = self._parse_json(self._search_regex(
- r'"channels"\s*:\s*(\[.+\])',
- webpage, 'channels'), video_id)[0]
- video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
- title = video_data['title']
- release_url = video_data['releaseUrl']
- if video_data.get('restricted'):
- requestor_id = self._search_regex(
- r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
- resource = self._get_mvpd_resource(
- requestor_id, title, video_id,
- video_data.get('ratings', [{}])[0].get('rating'))
- auth = self._extract_mvpd_auth(
- url, video_id, requestor_id, resource)
- release_url = update_url_query(release_url, {'auth': auth})
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': title,
- 'url': smuggle_url(release_url, {'force_smil_url': True}),
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('thumbnailUrl'),
- 'series': video_data.get('showTitle'),
- 'season_number': int_or_none(video_data.get('season')),
- 'episode_number': int_or_none(video_data.get('episodeNumber')),
- 'ie_key': 'ThePlatform',
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site', 'id')
+
+ def aws_hash(s):
+ return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+ token = self._download_json(
+ 'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
+ data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
+ headers={
+ 'Accept': '*/*',
+ 'Content-Type': 'application/x-amz-json-1.1',
+ 'Referer': url,
+ 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
+ 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ })['Token']
+
+ sts = self._download_xml(
+ 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+ 'Action': 'AssumeRoleWithWebIdentity',
+ 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+ 'RoleSessionName': 'web-identity',
+ 'Version': '2011-06-15',
+ 'WebIdentityToken': token,
+ }), headers={
+ 'Referer': url,
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
+
+ def get(key):
+ return xpath_text(
+ sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+ fatal=True)
+
+ access_key_id = get('AccessKeyId')
+ secret_access_key = get('SecretAccessKey')
+ session_token = get('SessionToken')
+
+ # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
+ datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+ date = datetime_now[:8]
+ canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
+ 'uri': uri,
+ 'host': self._SNI_HOST,
+ 'date': datetime_now,
+ 'token': session_token,
+ 'key': self._AWS_API_KEY,
+ 'signed_headers': self._AWS_SIGNED_HEADERS,
+ 'payload_hash': aws_hash(''),
}
+
+ # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+ credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
+ string_to_sign = '\n'.join([
+ 'AWS4-HMAC-SHA256', datetime_now, credential_string,
+ aws_hash(canonical_string)])
+
+ # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+ def aws_hmac(key, msg):
+ return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+ def aws_hmac_digest(key, msg):
+ return aws_hmac(key, msg).digest()
+
+ def aws_hmac_hexdigest(key, msg):
+ return aws_hmac(key, msg).hexdigest()
+
+ k_secret = 'AWS4' + secret_access_key
+ k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
+ k_region = aws_hmac_digest(k_date, self._AWS_REGION)
+ k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
+ k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
+
+ signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+ auth_header = ', '.join([
+ 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
+ [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
+ 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
+ 'Signature=%s' % signature,
+ ])
+
+ mcp_id = self._download_json(
+ 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
+ 'Accept': '*/*',
+ 'Referer': url,
+ 'Authorization': auth_header,
+ 'X-Amz-Date': datetime_now,
+ 'X-Amz-Security-Token': session_token,
+ 'X-Api-Key': self._AWS_API_KEY,
+ })['results'][0]['mcpId']
+
+ return self.url_result(
+ 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+ AnvatoIE.ie_key(), video_id=mcp_id)