aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/theplatform.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/theplatform.py')
-rw-r--r--youtube_dl/extractor/theplatform.py60
1 files changed, 48 insertions, 12 deletions
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index 9a424b1c6..adfe11e31 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -32,14 +32,24 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
class ThePlatformBaseIE(OnceIE):
+ _TP_TLD = 'com'
+
def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
meta = self._download_xml(
smil_url, video_id, note=note, query={'format': 'SMIL'},
headers=self.geo_verification_headers())
error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
- if error_element is not None and error_element.attrib['src'].startswith(
- 'http://link.theplatform.com/s/errorFiles/Unavailable.'):
- raise ExtractorError(error_element.attrib['abstract'], expected=True)
+ if error_element is not None:
+ exception = find_xpath_attr(
+ error_element, _x('.//smil:param'), 'name', 'exception')
+ if exception is not None:
+ if exception.get('value') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(error_element.attrib['abstract'])
+ elif error_element.attrib['src'].startswith(
+ 'http://link.theplatform.%s/s/errorFiles/Unavailable.'
+ % self._TP_TLD):
+ raise ExtractorError(
+ error_element.attrib['abstract'], expected=True)
smil_formats = self._parse_smil_formats(
meta, smil_url, video_id, namespace=default_ns,
@@ -66,7 +76,7 @@ class ThePlatformBaseIE(OnceIE):
return formats, subtitles
def _download_theplatform_metadata(self, path, video_id):
- info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
+ info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
return self._download_json(info_url, video_id)
def _parse_theplatform_metadata(self, info):
@@ -80,14 +90,33 @@ class ThePlatformBaseIE(OnceIE):
'url': src,
})
+ duration = info.get('duration')
+ tp_chapters = info.get('chapters', [])
+ chapters = []
+ if tp_chapters:
+ def _add_chapter(start_time, end_time):
+ start_time = float_or_none(start_time, 1000)
+ end_time = float_or_none(end_time, 1000)
+ if start_time is None or end_time is None:
+ return
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ })
+
+ for chapter in tp_chapters[:-1]:
+ _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
+ _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
+
return {
'title': info['title'],
'subtitles': subtitles,
'description': info['description'],
'thumbnail': info['defaultThumbnailUrl'],
- 'duration': int_or_none(info.get('duration'), 1000),
+ 'duration': float_or_none(duration, 1000),
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
'uploader': info.get('billingCode'),
+ 'chapters': chapters,
}
def _extract_theplatform_metadata(self, path, video_id):
@@ -179,8 +208,8 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if m:
return [m.group('url')]
- # Are whitesapces ignored in URLs?
- # https://github.com/rg3/youtube-dl/issues/12044
+ # Are whitespaces ignored in URLs?
+ # https://github.com/ytdl-org/youtube-dl/issues/12044
matches = re.findall(
r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
if matches:
@@ -197,7 +226,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
def hex_to_bytes(hex):
return binascii.a2b_hex(hex.encode('ascii'))
- relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+ relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
@@ -205,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
mobj = re.match(self._VALID_URL, url)
provider_id = mobj.group('provider_id')
@@ -242,7 +274,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
if smuggled_data.get('force_smil_url', False):
smil_url = url
- # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
+ # Explicitly specified SMIL (see https://github.com/ytdl-org/youtube-dl/issues/7385)
elif '/guid/' in url:
headers = {}
source_url = smuggled_data.get('source_url')
@@ -289,7 +321,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
class ThePlatformFeedIE(ThePlatformBaseIE):
_URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
- _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
+ _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
_TESTS = [{
# From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
@@ -306,12 +338,15 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
'uploader': 'NBCU-NEWS',
},
+ }, {
+ 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
+ 'only_matching': True,
}]
def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
entry = self._download_json(real_url, video_id)['entries'][0]
- main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
+ main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
formats = []
subtitles = {}
@@ -324,7 +359,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
if first_video_id is None:
first_video_id = cur_video_id
duration = float_or_none(item.get('plfile$duration'))
- for asset_type in item['plfile$assetTypes']:
+ file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+ for asset_type in file_asset_types:
if asset_type in asset_types:
continue
asset_types.append(asset_type)