diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-06 16:40:24 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-11-06 16:58:03 +0100 |
commit | eeb165e674e07aaa798f69e15f16faa01bc8feaa (patch) | |
tree | 196b2cf1da4e10b7e54f8c405ed47f4d1ae02ddc /youtube_dl/extractor | |
parent | 9ee2b5f6f2a6e07ab901e47ede59eb5382ebb6ef (diff) |
[brightcove] Add the extraction of the url from generic
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 16 | ||||
-rw-r--r-- | youtube_dl/extractor/generic.py | 20 |
2 files changed, 32 insertions, 4 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 0d9b87a34..b3c3dc0fd 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -9,6 +9,7 @@ from ..utils import ( compat_urllib_parse, find_xpath_attr, compat_urlparse, + compat_str, ExtractorError, ) @@ -71,6 +72,19 @@ class BrightcoveIE(InfoExtractor): data = compat_urllib_parse.urlencode(params) return cls._FEDERATED_URL_TEMPLATE % data + @classmethod + def _extract_brightcove_url(cls, webpage): + """Try to extract the brightcove url from the wepbage, returns None + if it can't be found + """ + m_brightcove = re.search( + r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', + webpage, re.DOTALL) + if m_brightcove is not None: + return cls._build_brighcove_url(m_brightcove.group()) + else: + return None + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) query_str = mobj.group('query') @@ -109,7 +123,7 @@ class BrightcoveIE(InfoExtractor): def _extract_video_info(self, video_info): info = { - 'id': video_info['id'], + 'id': compat_str(video_info['id']), 'title': video_info['displayName'], 'description': video_info.get('shortDescription'), 'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 76d369273..04b7212f4 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -54,6 +54,21 @@ class GenericIE(InfoExtractor): }, u'skip': u'There is a limit of 200 free downloads / month for the test song', }, + # embedded brightcove video + { + u'add_ie': ['Brightcove'], + u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics', + u'info_dict': { + u'id': u'2365799484001', + u'ext': u'mp4', + u'title': u'Bubble Simulation', + u'description': u'A visualization from a new computer model of foam behavior.', + u'uploader': u'Scientific American', + }, + u'params': { + u'skip_download': True, + }, + }, ] def report_download_webpage(self, video_id): @@ -146,10 +161,9 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) # Look for BrightCove: - m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) - if m_brightcove is not None: + bc_url = BrightcoveIE._extract_brightcove_url(webpage) + if bc_url is not None: self.to_screen(u'Brightcove video detected.') - bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group()) return self.url_result(bc_url, 'Brightcove') # Look for embedded Vimeo player |