aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorRemita Amine <remitamine@gmail.com>2018-05-30 13:21:07 +0100
committerRemita Amine <remitamine@gmail.com>2018-05-30 13:21:07 +0100
commite0d42dd4b270d06a953822c091afefd946bd93f2 (patch)
treecb4becca6b092335404b00454346c367cc4b4b42 /youtube_dl/extractor
parenta07879d6b2edc474b0595a29932726fa7aa14b3a (diff)
[teamcoco] Fix extraction for full episodes(closes #16573)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/tbs.py61
-rw-r--r--youtube_dl/extractor/teamcoco.py102
-rw-r--r--youtube_dl/extractor/turner.py47
3 files changed, 122 insertions, 88 deletions
diff --git a/youtube_dl/extractor/tbs.py b/youtube_dl/extractor/tbs.py
index edc31729d..784f8ed66 100644
--- a/youtube_dl/extractor/tbs.py
+++ b/youtube_dl/extractor/tbs.py
@@ -4,6 +4,10 @@ from __future__ import unicode_literals
import re
from .turner import TurnerBaseIE
+from ..compat import (
+ compat_urllib_parse_urlparse,
+ compat_parse_qs,
+)
from ..utils import (
float_or_none,
int_or_none,
@@ -38,48 +42,22 @@ class TBSIE(TurnerBaseIE):
def _real_extract(self, url):
site, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id)
- video_data = self._parse_json(self._search_regex(
+ drupal_settings = self._parse_json(self._search_regex(
r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
- webpage, 'drupal setting'), display_id)['turner_playlist'][0]
+ webpage, 'drupal setting'), display_id)
+ video_data = drupal_settings['turner_playlist'][0]
media_id = video_data['mediaID']
title = video_data['title']
+ tokenizer_query = compat_parse_qs(compat_urllib_parse_urlparse(
+ drupal_settings['ngtv_token_url']).query)
- streams_data = self._download_json(
- 'http://medium.ngtv.io/media/%s/tv' % media_id,
- media_id)['media']['tv']
- duration = None
- chapters = []
- formats = []
- for supported_type in ('unprotected', 'bulkaes'):
- stream_data = streams_data.get(supported_type, {})
- m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
- if not m3u8_url:
- continue
- if stream_data.get('playlistProtection') == 'spe':
- m3u8_url = self._add_akamai_spe_token(
- 'http://token.vgtf.net/token/token_spe',
- m3u8_url, media_id, {
- 'url': url,
- 'site_name': site[:3].upper(),
- 'auth_required': video_data.get('authRequired') == '1',
- })
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
-
- duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
-
- if not chapters:
- for chapter in stream_data.get('contentSegments', []):
- start_time = float_or_none(chapter.get('start'))
- duration = float_or_none(chapter.get('duration'))
- if start_time is None or duration is None:
- continue
- chapters.append({
- 'start_time': start_time,
- 'end_time': start_time + duration,
- })
- self._sort_formats(formats)
+ info = self._extract_ngtv_info(
+ media_id, tokenizer_query, {
+ 'url': url,
+ 'site_name': site[:3].upper(),
+ 'auth_required': video_data.get('authRequired') == '1',
+ })
thumbnails = []
for image_id, image in video_data.get('images', {}).items():
@@ -98,15 +76,14 @@ class TBSIE(TurnerBaseIE):
})
thumbnails.append(i)
- return {
+ info.update({
'id': media_id,
'title': title,
'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
- 'duration': duration,
+ 'duration': float_or_none(video_data.get('duration')) or info.get('duration'),
'timestamp': int_or_none(video_data.get('created')),
'season_number': int_or_none(video_data.get('season')),
'episode_number': int_or_none(video_data.get('episode')),
- 'cahpters': chapters,
'thumbnails': thumbnails,
- 'formats': formats,
- }
+ })
+ return info
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
index 63fd4fe1c..73469cc5d 100644
--- a/youtube_dl/extractor/teamcoco.py
+++ b/youtube_dl/extractor/teamcoco.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import json
-from .common import InfoExtractor
+from .turner import TurnerBaseIE
from ..utils import (
determine_ext,
ExtractorError,
@@ -15,7 +15,7 @@ from ..utils import (
)
-class TeamcocoIE(InfoExtractor):
+class TeamcocoIE(TurnerBaseIE):
_VALID_URL = r'https?://teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
_TESTS = [
{
@@ -110,6 +110,8 @@ class TeamcocoIE(InfoExtractor):
name
}
duration
+ turnerMediaId
+ turnerMediaAuthToken
}
}
... on NotFoundSlug {
@@ -123,53 +125,65 @@ class TeamcocoIE(InfoExtractor):
record = response['record']
video_id = record['id']
- video_sources = self._graphql_call('''{
- %s(id: "%s") {
- src
- }
-}''', 'RecordVideoSource', video_id) or {}
-
- formats = []
- get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
- for format_id, src in video_sources.get('src', {}).items():
- if not isinstance(src, dict):
- continue
- src_url = src.get('src')
- if not src_url:
- continue
- ext = determine_ext(src_url, mimetype2ext(src.get('type')))
- if format_id == 'hls' or ext == 'm3u8':
- # compat_urllib_parse.urljoin does not work here
- if src_url.startswith('/'):
- src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
- formats.extend(self._extract_m3u8_formats(
- src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- else:
- if src_url.startswith('/mp4:protected/'):
- # TODO Correct extraction for these files
- continue
- tbr = int_or_none(self._search_regex(
- r'(\d+)k\.mp4', src_url, 'tbr', default=None))
-
- formats.append({
- 'url': src_url,
- 'ext': ext,
- 'tbr': tbr,
- 'format_id': format_id,
- 'quality': get_quality(format_id),
- })
- if not formats:
- formats = self._extract_m3u8_formats(
- record['file']['url'], video_id, 'mp4', fatal=False)
- self._sort_formats(formats)
-
- return {
+ info = {
'id': video_id,
'display_id': display_id,
- 'formats': formats,
'title': record['title'],
'thumbnail': record.get('thumb', {}).get('preview'),
'description': record.get('teaser'),
'duration': parse_duration(record.get('duration')),
'timestamp': parse_iso8601(record.get('publishOn')),
}
+
+ media_id = record.get('turnerMediaId')
+ if media_id:
+ self._initialize_geo_bypass({
+ 'countries': ['US'],
+ })
+ info.update(self._extract_ngtv_info(media_id, {
+ 'accessToken': record['turnerMediaAuthToken'],
+ 'accessTokenType': 'jws',
+ }))
+ else:
+ video_sources = self._graphql_call('''{
+ %s(id: "%s") {
+ src
+ }
+}''', 'RecordVideoSource', video_id) or {}
+
+ formats = []
+ get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+ for format_id, src in video_sources.get('src', {}).items():
+ if not isinstance(src, dict):
+ continue
+ src_url = src.get('src')
+ if not src_url:
+ continue
+ ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+ if format_id == 'hls' or ext == 'm3u8':
+ # compat_urllib_parse.urljoin does not work here
+ if src_url.startswith('/'):
+ src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+ formats.extend(self._extract_m3u8_formats(
+ src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ else:
+ if src_url.startswith('/mp4:protected/'):
+ # TODO Correct extraction for these files
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+ formats.append({
+ 'url': src_url,
+ 'ext': ext,
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
+ if not formats:
+ formats = self._extract_m3u8_formats(
+ record['file']['url'], video_id, 'mp4', fatal=False)
+ self._sort_formats(formats)
+ info['formats'] = formats
+
+ return info
diff --git a/youtube_dl/extractor/turner.py b/youtube_dl/extractor/turner.py
index e73b64aeb..2b7b0d6e1 100644
--- a/youtube_dl/extractor/turner.py
+++ b/youtube_dl/extractor/turner.py
@@ -9,6 +9,7 @@ from ..utils import (
xpath_text,
int_or_none,
determine_ext,
+ float_or_none,
parse_duration,
xpath_attr,
update_url_query,
@@ -23,14 +24,17 @@ class TurnerBaseIE(AdobePassIE):
def _extract_timestamp(self, video_data):
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
- def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data):
+ def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
if not token:
query = {
'path': secure_path,
- 'videoId': content_id,
}
+ if custom_tokenizer_query:
+ query.update(custom_tokenizer_query)
+ else:
+ query['videoId'] = content_id
if ap_data.get('auth_required'):
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
auth = self._download_xml(
@@ -188,3 +192,42 @@ class TurnerBaseIE(AdobePassIE):
'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
'is_live': is_live,
}
+
+ def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
+ streams_data = self._download_json(
+ 'http://medium.ngtv.io/media/%s/tv' % media_id,
+ media_id)['media']['tv']
+ duration = None
+ chapters = []
+ formats = []
+ for supported_type in ('unprotected', 'bulkaes'):
+ stream_data = streams_data.get(supported_type, {})
+ m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
+ if not m3u8_url:
+ continue
+ if stream_data.get('playlistProtection') == 'spe':
+ m3u8_url = self._add_akamai_spe_token(
+ 'http://token.ngtv.io/token/token_spe',
+ m3u8_url, media_id, ap_data or {}, tokenizer_query)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
+
+ duration = float_or_none(stream_data.get('totalRuntime'))
+
+ if not chapters:
+ for chapter in stream_data.get('contentSegments', []):
+ start_time = float_or_none(chapter.get('start'))
+ chapter_duration = float_or_none(chapter.get('duration'))
+ if start_time is None or chapter_duration is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': start_time + chapter_duration,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'formats': formats,
+ 'chapters': chapters,
+ 'duration': duration,
+ }