aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2017-12-19 02:00:13 +0100
committerRemita Amine <remitamine@gmail.com>2017-12-19 02:00:38 +0100
commit78466fcab519d1b92fd9846bc8073885308a7e22 (patch)
treede2d76db0d87cf9fae568e25e8b1d0160c01cc4b /youtube_dl/extractor
parent3961c6cb9d3a1c30fe31db774b0809095952f1bd (diff)
[shahid] add support for show pages(closes #7401)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/aws.py78
-rw-r--r--youtube_dl/extractor/extractors.py5
-rw-r--r--youtube_dl/extractor/scrippsnetworks.py103
-rw-r--r--youtube_dl/extractor/shahid.py164
4 files changed, 219 insertions, 131 deletions
diff --git a/youtube_dl/extractor/aws.py b/youtube_dl/extractor/aws.py
new file mode 100644
index 000000000..670abce0c
--- /dev/null
+++ b/youtube_dl/extractor/aws.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import datetime
+import hashlib
+import hmac
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_urlencode
+
+
+class AWSIE(InfoExtractor):
+ _AWS_ALGORITHM = 'AWS4-HMAC-SHA256'
+ _AWS_REGION = 'us-east-1'
+
+ def _aws_execute_api(self, aws_dict, video_id, query=None):
+ query = query or {}
+ amz_date = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
+ date = amz_date[:8]
+ headers = {
+ 'Accept': 'application/json',
+ 'Host': self._AWS_PROXY_HOST,
+ 'X-Amz-Date': amz_date,
+ }
+ session_token = aws_dict.get('session_token')
+ if session_token:
+ headers['X-Amz-Security-Token'] = session_token
+ headers['X-Api-Key'] = self._AWS_API_KEY
+
+ def aws_hash(s):
+ return hashlib.sha256(s.encode('utf-8')).hexdigest()
+
+ # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
+ canonical_querystring = compat_urllib_parse_urlencode(query)
+ canonical_headers = ''
+ for header_name, header_value in headers.items():
+ canonical_headers += '%s:%s\n' % (header_name.lower(), header_value)
+ signed_headers = ';'.join([header.lower() for header in headers.keys()])
+ canonical_request = '\n'.join([
+ 'GET',
+ aws_dict['uri'],
+ canonical_querystring,
+ canonical_headers,
+ signed_headers,
+ aws_hash('')
+ ])
+
+ # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
+ credential_scope_list = [date, self._AWS_REGION, 'execute-api', 'aws4_request']
+ credential_scope = '/'.join(credential_scope_list)
+ string_to_sign = '\n'.join([self._AWS_ALGORITHM, amz_date, credential_scope, aws_hash(canonical_request)])
+
+ # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
+ def aws_hmac(key, msg):
+ return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
+
+ def aws_hmac_digest(key, msg):
+ return aws_hmac(key, msg).digest()
+
+ def aws_hmac_hexdigest(key, msg):
+ return aws_hmac(key, msg).hexdigest()
+
+ k_signing = ('AWS4' + aws_dict['secret_key']).encode('utf-8')
+ for value in credential_scope_list:
+ k_signing = aws_hmac_digest(k_signing, value)
+
+ signature = aws_hmac_hexdigest(k_signing, string_to_sign)
+
+ # Task 4: http://docs.aws.amazon.com/general/latest/gr/sigv4-add-signature-to-request.html
+ headers['Authorization'] = ', '.join([
+ '%s Credential=%s/%s' % (self._AWS_ALGORITHM, aws_dict['access_key'], credential_scope),
+ 'SignedHeaders=%s' % signed_headers,
+ 'Signature=%s' % signature,
+ ])
+
+ return self._download_json(
+ 'https://%s%s%s' % (self._AWS_PROXY_HOST, aws_dict['uri'], '?' + canonical_querystring if canonical_querystring else ''),
+ video_id, headers=headers)
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 407245513..513074801 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -927,7 +927,10 @@ from .sendtonews import SendtoNewsIE
from .servingsys import ServingSysIE
from .servus import ServusIE
from .sexu import SexuIE
-from .shahid import ShahidIE
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
from .shared import (
SharedIE,
VivoIE,
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
index b446a02ba..4023aeef8 100644
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ b/youtube_dl/extractor/scrippsnetworks.py
@@ -1,13 +1,11 @@
# coding: utf-8
from __future__ import unicode_literals
-import datetime
import json
import hashlib
-import hmac
import re
-from .common import InfoExtractor
+from .aws import AWSIE
from .anvato import AnvatoIE
from ..utils import (
smuggle_url,
@@ -16,7 +14,7 @@ from ..utils import (
)
-class ScrippsNetworksWatchIE(InfoExtractor):
+class ScrippsNetworksWatchIE(AWSIE):
IE_NAME = 'scrippsnetworks:watch'
_VALID_URL = r'''(?x)
https?://
@@ -64,44 +62,27 @@ class ScrippsNetworksWatchIE(InfoExtractor):
'travelchannel': 'trav',
'geniuskitchen': 'genius',
}
- _SNI_HOST = 'web.api.video.snidigital.com'
- _AWS_REGION = 'us-east-1'
- _AWS_IDENTITY_ID_JSON = json.dumps({
- 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
- })
- _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
_AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
- _AWS_SERVICE = 'execute-api'
- _AWS_REQUEST = 'aws4_request'
- _AWS_SIGNED_HEADERS = ';'.join([
- 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
- _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
-%(uri)s
-
-host:%(host)s
-x-amz-date:%(date)s
-x-amz-security-token:%(token)s
-x-api-key:%(key)s
+ _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
-%(signed_headers)s
-%(payload_hash)s'''
+ _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
site_id, video_id = mobj.group('site', 'id')
- def aws_hash(s):
- return hashlib.sha256(s.encode('utf-8')).hexdigest()
-
+ aws_identity_id_json = json.dumps({
+ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
+ }).encode('utf-8')
token = self._download_json(
- 'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
- data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
+ 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
+ data=aws_identity_id_json,
headers={
'Accept': '*/*',
'Content-Type': 'application/x-amz-json-1.1',
'Referer': url,
- 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
+ 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
'X-Amz-User-Agent': self._AWS_USER_AGENT,
})['Token']
@@ -124,64 +105,12 @@ x-api-key:%(key)s
sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
fatal=True)
- access_key_id = get('AccessKeyId')
- secret_access_key = get('SecretAccessKey')
- session_token = get('SessionToken')
-
- # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
- uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
- datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
- date = datetime_now[:8]
- canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
- 'uri': uri,
- 'host': self._SNI_HOST,
- 'date': datetime_now,
- 'token': session_token,
- 'key': self._AWS_API_KEY,
- 'signed_headers': self._AWS_SIGNED_HEADERS,
- 'payload_hash': aws_hash(''),
- }
-
- # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
- credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
- string_to_sign = '\n'.join([
- 'AWS4-HMAC-SHA256', datetime_now, credential_string,
- aws_hash(canonical_string)])
-
- # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
- def aws_hmac(key, msg):
- return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
-
- def aws_hmac_digest(key, msg):
- return aws_hmac(key, msg).digest()
-
- def aws_hmac_hexdigest(key, msg):
- return aws_hmac(key, msg).hexdigest()
-
- k_secret = 'AWS4' + secret_access_key
- k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
- k_region = aws_hmac_digest(k_date, self._AWS_REGION)
- k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
- k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
-
- signature = aws_hmac_hexdigest(k_signing, string_to_sign)
-
- auth_header = ', '.join([
- 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
- [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
- 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
- 'Signature=%s' % signature,
- ])
-
- mcp_id = self._download_json(
- 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
- 'Accept': '*/*',
- 'Referer': url,
- 'Authorization': auth_header,
- 'X-Amz-Date': datetime_now,
- 'X-Amz-Security-Token': session_token,
- 'X-Api-Key': self._AWS_API_KEY,
- })['results'][0]['mcpId']
+ mcp_id = self._aws_execute_api({
+ 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
+ 'access_key': get('AccessKeyId'),
+ 'secret_key': get('SecretAccessKey'),
+ 'session_token': get('SessionToken'),
+ }, video_id)['results'][0]['mcpId']
return self.url_result(
smuggle_url(
diff --git a/youtube_dl/extractor/shahid.py b/youtube_dl/extractor/shahid.py
index 374f7faf9..5c2a6206b 100644
--- a/youtube_dl/extractor/shahid.py
+++ b/youtube_dl/extractor/shahid.py
@@ -1,22 +1,53 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
import json
+import math
+import re
-from .common import InfoExtractor
+from .aws import AWSIE
from ..compat import compat_HTTPError
from ..utils import (
+ clean_html,
ExtractorError,
+ InAdvancePagedList,
int_or_none,
parse_iso8601,
str_or_none,
urlencode_postdata,
- clean_html,
)
-class ShahidIE(InfoExtractor):
+class ShahidBaseIE(AWSIE):
+ _AWS_PROXY_HOST = 'api2.shahid.net'
+ _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
+
+ def _handle_error(self, e):
+ fail_data = self._parse_json(
+ e.cause.read().decode('utf-8'), None, fatal=False)
+ if fail_data:
+ faults = fail_data.get('faults', [])
+ faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
+ if faults_message:
+ raise ExtractorError(faults_message, expected=True)
+
+ def _call_api(self, path, video_id, request=None):
+ query = {}
+ if request:
+ query['request'] = json.dumps(request)
+ try:
+ return self._aws_execute_api({
+ 'uri': '/proxy/v2/' + path,
+ 'access_key': 'AKIAI6X4TYCIXM2B7MUQ',
+ 'secret_key': '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',
+ }, video_id, query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ self._handle_error(e)
+ raise
+
+
+class ShahidIE(ShahidBaseIE):
_NETRC_MACHINE = 'shahid'
_VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
_TESTS = [{
@@ -41,34 +72,25 @@ class ShahidIE(InfoExtractor):
'only_matching': True
}]
- def _api2_request(self, *args, **kwargs):
- try:
- return self._download_json(*args, **kwargs)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError):
- fail_data = self._parse_json(
- e.cause.read().decode('utf-8'), None, fatal=False)
- if fail_data:
- faults = fail_data.get('faults', [])
- faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
- if faults_message:
- raise ExtractorError(faults_message, expected=True)
- raise
-
def _real_initialize(self):
email, password = self._get_login_info()
if email is None:
return
- user_data = self._api2_request(
- 'https://shahid.mbc.net/wd/service/users/login',
- None, 'Logging in', data=json.dumps({
- 'email': email,
- 'password': password,
- 'basic': 'false',
- }).encode('utf-8'), headers={
- 'Content-Type': 'application/json; charset=UTF-8',
- })['user']
+ try:
+ user_data = self._download_json(
+ 'https://shahid.mbc.net/wd/service/users/login',
+ None, 'Logging in', data=json.dumps({
+ 'email': email,
+ 'password': password,
+ 'basic': 'false',
+ }).encode('utf-8'), headers={
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['user']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError):
+ self._handle_error(e)
+ raise
self._download_webpage(
'https://shahid.mbc.net/populateContext',
@@ -81,25 +103,13 @@ class ShahidIE(InfoExtractor):
'sessionId': user_data['sessionId'],
}))
- def _get_api_data(self, response):
- data = response.get('data', {})
-
- error = data.get('error')
- if error:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
- expected=True)
-
- return data
-
def _real_extract(self, url):
page_type, video_id = re.match(self._VALID_URL, url).groups()
if page_type == 'clip':
page_type = 'episode'
- playout = self._api2_request(
- 'https://api2.shahid.net/proxy/v2/playout/url/' + video_id,
- video_id, 'Downloading player JSON')['playout']
+ playout = self._call_api(
+ 'playout/url/' + video_id, video_id)['playout']
if playout.get('drm'):
raise ExtractorError('This video is DRM protected.', expected=True)
@@ -107,13 +117,27 @@ class ShahidIE(InfoExtractor):
formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
self._sort_formats(formats)
- video = self._get_api_data(self._download_json(
+ # video = self._call_api(
+ # 'product/id', video_id, {
+ # 'id': video_id,
+ # 'productType': 'ASSET',
+ # 'productSubType': page_type.upper()
+ # })['productModel']
+
+ response = self._download_json(
'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
video_id, 'Downloading video JSON', query={
'apiKey': 'sh@hid0nlin3',
'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
- }))[page_type]
+ })
+ data = response.get('data', {})
+ error = data.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
+ expected=True)
+ video = data[page_type]
title = video['title']
categories = [
category['name']
@@ -135,3 +159,57 @@ class ShahidIE(InfoExtractor):
'episode_id': video_id,
'formats': formats,
}
+
+
+class ShahidShowIE(ShahidBaseIE):
+ _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
+ 'info_dict': {
+ 'id': '79187',
+ 'title': 'رامز قرش البحر',
+ 'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff',
+ },
+ 'playlist_mincount': 32,
+ }, {
+ 'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861',
+ 'only_matching': True
+ }]
+ _PAGE_SIZE = 30
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ product = self._call_api(
+ 'playableAsset', show_id, {'showId': show_id})['productModel']
+ playlist = product['playlist']
+ playlist_id = playlist['id']
+ show = product.get('show', {})
+
+ def page_func(page_num):
+ playlist = self._call_api(
+ 'product/playlist', show_id, {
+ 'playListId': playlist_id,
+ 'pageNumber': page_num,
+ 'pageSize': 30,
+ 'sorts': [{
+ 'order': 'DESC',
+ 'type': 'SORTDATE'
+ }],
+ })
+ for product in playlist.get('productList', {}).get('products', []):
+ product_url = product.get('productUrl', []).get('url')
+ if not product_url:
+ continue
+ yield self.url_result(
+ product_url, 'Shahid',
+ str_or_none(product.get('id')),
+ product.get('title'))
+
+ entries = InAdvancePagedList(
+ page_func,
+ math.ceil(playlist['count'] / self._PAGE_SIZE),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('description'))