aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2018-01-20 00:33:45 +0700
committerSergey M․ <dstftw@gmail.com>2018-01-20 00:34:10 +0700
commit7d5406216ae061d11815a8e8a97ad6617eeaab38 (patch)
tree0b6b614c68f1888c29798d095ee6b5fd89bf5cc1
parent2a3683c3780698dfa1d1175734fca97d1cd6c215 (diff)
[springboardplatform] Add extractor
-rw-r--r--youtube_dl/extractor/extractors.py1
-rw-r--r--youtube_dl/extractor/generic.py22
-rw-r--r--youtube_dl/extractor/springboardplatform.py125
3 files changed, 148 insertions, 0 deletions
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
index 9d3582dfa..f5bac6fd9 100644
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -989,6 +989,7 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE
from .sportbox import SportBoxEmbedIE
from .sportdeutschland import SportDeutschlandIE
+from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE
from .srgssr import (
SRGSSRIE,
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 9b0cd004f..1d9da8115 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -101,6 +101,7 @@ from .vzaar import VzaarIE
from .channel9 import Channel9IE
from .vshare import VShareIE
from .mediasite import MediasiteIE
+from .springboardplatform import SpringboardPlatformIE
class GenericIE(InfoExtractor):
@@ -1938,6 +1939,21 @@ class GenericIE(InfoExtractor):
'timestamp': 1474354800,
'upload_date': '20160920',
}
+ },
+ {
+ 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
+ 'info_dict': {
+ 'id': '1731611',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
+ 'description': 'md5:eb5f23826a027ba95277d105f248b825',
+ 'timestamp': 1516100691,
+ 'upload_date': '20180116',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [SpringboardPlatformIE.ie_key()],
}
# {
# # TODO: find another test
@@ -2906,6 +2922,12 @@ class GenericIE(InfoExtractor):
for mediasite_url in mediasite_urls]
return self.playlist_result(entries, video_id, video_title)
+ springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
+ if springboardplatform_urls:
+ return self.playlist_from_matches(
+ springboardplatform_urls, video_id, video_title,
+ ie=SpringboardPlatformIE.ie_key())
+
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():
diff --git a/youtube_dl/extractor/springboardplatform.py b/youtube_dl/extractor/springboardplatform.py
new file mode 100644
index 000000000..07d99b579
--- /dev/null
+++ b/youtube_dl/extractor/springboardplatform.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ xpath_attr,
+ xpath_text,
+ xpath_element,
+ unescapeHTML,
+ unified_timestamp,
+)
+
+
+class SpringboardPlatformIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ cms\.springboardplatform\.com/
+ (?:
+ (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
+ xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
+ 'md5': '5c3cb7b5c55740d482561099e920f192',
+ 'info_dict': {
+ 'id': '981017',
+ 'ext': 'mp4',
+ 'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
+ 'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1409132328,
+ 'upload_date': '20140827',
+ 'duration': 193,
+ },
+ }, {
+ 'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ index = mobj.group('index') or mobj.group('index_2')
+
+ video = self._download_xml(
+ 'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s'
+ % (index, video_id), video_id)
+
+ item = xpath_element(video, './/item', 'item', fatal=True)
+
+ content = xpath_element(
+ item, './{http://search.yahoo.com/mrss/}content', 'content',
+ fatal=True)
+ title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
+
+ video_url = content.attrib['url']
+
+ if 'error_video.mp4' in video_url:
+ raise ExtractorError(
+ 'Video %s no longer exists' % video_id, expected=True)
+
+ duration = int_or_none(content.get('duration'))
+ tbr = int_or_none(content.get('bitrate'))
+ filesize = int_or_none(content.get('fileSize'))
+ width = int_or_none(content.get('width'))
+ height = int_or_none(content.get('height'))
+
+ description = unescapeHTML(xpath_text(
+ item, './description', 'description'))
+ thumbnail = xpath_attr(
+ item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
+ 'thumbnail')
+
+ timestamp = unified_timestamp(xpath_text(
+ item, './{http://cms.springboardplatform.com/namespaces.html}created',
+ 'timestamp'))
+
+ formats = [{
+ 'url': video_url,
+ 'format_id': 'http',
+ 'tbr': tbr,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ }]
+
+ m3u8_format = formats[0].copy()
+ m3u8_format.update({
+ 'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
+ 'ext': 'mp4',
+ 'format_id': 'hls',
+ 'protocol': 'm3u8_native',
+ })
+ formats.append(m3u8_format)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ }