aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/theplatform.py
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2015-08-20 01:38:39 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2015-08-20 01:38:39 +0800
commit05fe2594e4589b4e714a423550172eeec3949a70 (patch)
tree2b1710e31dcf81cd491253c4a847027a359778ca /youtube_dl/extractor/theplatform.py
parent26e1c3514f4af1ed60cd1114a653fe49e1fa8d11 (diff)
[theplatform] Support URLs with 'guid='
Diffstat (limited to 'youtube_dl/extractor/theplatform.py')
-rw-r--r--youtube_dl/extractor/theplatform.py36
1 files changed, 36 insertions, 0 deletions
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index f02e0f58d..883bf491c 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -9,6 +9,10 @@ import hashlib
from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
from ..utils import (
determine_ext,
ExtractorError,
@@ -120,6 +124,20 @@ class ThePlatformIE(ThePlatformBaseIE):
}, {
'url': 'http://player.theplatform.com/p/NnzsPC/widget/select/media/4Y0TlYUr_ZT7',
'only_matching': True,
+ }, {
+ 'url': 'http://player.theplatform.com/p/2E2eJC/nbcNewsOffsite?guid=tdy_or_siri_150701',
+ 'md5': '734f3790fb5fc4903da391beeebc4836',
+ 'info_dict': {
+ 'id': 'tdy_or_siri_150701',
+ 'ext': 'mp4',
+ 'title': 'iPhone Siri’s sassy response to a math question has people talking',
+ 'description': 'md5:a565d1deadd5086f3331d57298ec6333',
+ 'duration': 83.0,
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ 'timestamp': 1435752600,
+ 'upload_date': '20150701',
+ 'categories': ['Today/Shows/Orange Room', 'Today/Sections/Money', 'Today/Topics/Tech', "Today/Topics/Editor's picks"],
+ },
}]
@staticmethod
@@ -154,6 +172,24 @@ class ThePlatformIE(ThePlatformBaseIE):
path += '/media'
path += '/' + video_id
+ qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+ if 'guid' in qs_dict:
+ webpage = self._download_webpage(url, video_id)
+ scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
+ feed_id = None
+ # feed id usually locates in the last script.
+ # Seems there's no pattern for the interested script filename, so
+ # I try one by one
+ for script in reversed(scripts):
+ feed_script = self._download_webpage(script, video_id, 'Downloading feed script')
+ feed_id = self._search_regex(r'defaultFeedId\s*:\s*"([^"]+)"', feed_script, 'default feed id', default=None)
+ if feed_id is not None:
+ break
+ if feed_id is None:
+ raise ExtractorError('Unable to find feed id')
+ return self.url_result('http://feed.theplatform.com/f/%s/%s?byGuid=%s' % (
+ provider_id, feed_id, qs_dict['guid'][0]))
+
if smuggled_data.get('force_smil_url', False):
smil_url = url
elif mobj.group('config'):