[brightcove] Add the extraction of the url from generic

author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-11-06 16:40:24 +0100
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2013-11-06 16:58:03 +0100
commit: eeb165e674e07aaa798f69e15f16faa01bc8feaa (patch)
tree: 196b2cf1da4e10b7e54f8c405ed47f4d1ae02ddc /youtube_dl
parent: 9ee2b5f6f2a6e07ab901e47ede59eb5382ebb6ef (diff)
2 files changed, 32 insertions, 4 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 0d9b87a34..b3c3dc0fd 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -9,6 +9,7 @@ from ..utils import (
     compat_urllib_parse,
     find_xpath_attr,
     compat_urlparse,
+    compat_str,
 
     ExtractorError,
 )
@@ -71,6 +72,19 @@ class BrightcoveIE(InfoExtractor):
         data = compat_urllib_parse.urlencode(params)
         return cls._FEDERATED_URL_TEMPLATE % data
 
+    @classmethod
+    def _extract_brightcove_url(cls, webpage):
+        """Try to extract the brightcove url from the wepbage, returns None
+        if it can't be found
+        """
+        m_brightcove = re.search(
+            r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>',
+            webpage, re.DOTALL)
+        if m_brightcove is not None:
+            return cls._build_brighcove_url(m_brightcove.group())
+        else:
+            return None
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         query_str = mobj.group('query')
@@ -109,7 +123,7 @@ class BrightcoveIE(InfoExtractor):
 
     def _extract_video_info(self, video_info):
         info = {
-            'id': video_info['id'],
+            'id': compat_str(video_info['id']),
             'title': video_info['displayName'],
             'description': video_info.get('shortDescription'),
             'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'),
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 76d369273..04b7212f4 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -54,6 +54,21 @@ class GenericIE(InfoExtractor):
             },
             u'skip': u'There is a limit of 200 free downloads / month for the test song',
         },
+        # embedded brightcove video
+        {
+            u'add_ie': ['Brightcove'],
+            u'url': u'http://www.scientificamerican.com/article.cfm?id=soap-bubble-physics',
+            u'info_dict': {
+                u'id': u'2365799484001',
+                u'ext': u'mp4',
+                u'title': u'Bubble Simulation',
+                u'description': u'A visualization from a new computer model of foam behavior.',
+                u'uploader': u'Scientific American',
+            },
+            u'params': {
+                u'skip_download': True,
+            },
+        },
     ]
 
     def report_download_webpage(self, video_id):
@@ -146,10 +161,9 @@ class GenericIE(InfoExtractor):
 
         self.report_extraction(video_id)
         # Look for BrightCove:
-        m_brightcove = re.search(r'<object[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
-        if m_brightcove is not None:
+        bc_url = BrightcoveIE._extract_brightcove_url(webpage)
+        if bc_url is not None:
             self.to_screen(u'Brightcove video detected.')
-            bc_url = BrightcoveIE._build_brighcove_url(m_brightcove.group())
             return self.url_result(bc_url, 'Brightcove')
 
         # Look for embedded Vimeo player
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-11-06 16:40:24 +0100
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2013-11-06 16:58:03 +0100
commit	eeb165e674e07aaa798f69e15f16faa01bc8feaa (patch)
tree	196b2cf1da4e10b7e54f8c405ed47f4d1ae02ddc /youtube_dl
parent	9ee2b5f6f2a6e07ab901e47ede59eb5382ebb6ef (diff)