aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-02-03 15:19:40 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-02-03 15:19:40 +0100
commit99877772d08285b1b2743427ddd20440f4f4ded2 (patch)
treebc05d503c7ec012f00185b9eb45e9cdc360ad3ae
parentb0268cb6ce16f54ef23901c860cba6be1e16cf37 (diff)
downloadyoutube-dl-99877772d08285b1b2743427ddd20440f4f4ded2.tar.xz
[generic] Add support for multiple brightcove URLs (Fixes #2283)
-rw-r--r--test/test_playlists.py11
-rw-r--r--youtube_dl/extractor/brightcove.py19
-rw-r--r--youtube_dl/extractor/generic.py18
3 files changed, 36 insertions, 12 deletions
diff --git a/test/test_playlists.py b/test/test_playlists.py
index b3ce6f71e..fda2e0112 100644
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@@ -34,6 +34,7 @@ from youtube_dl.extractor import (
KhanAcademyIE,
EveryonesMixtapeIE,
RutubeChannelIE,
+ GenericIE,
)
@@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34)
+ def test_multiple_brightcove_videos(self):
+ # https://github.com/rg3/youtube-dl/issues/2283
+ dl = FakeYDL()
+ ie = GenericIE(dl)
+ result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
+ self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
+ self.assertEqual(len(result['entries']), 3)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 9ccf923a6..031fe385d 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
@classmethod
def _extract_brightcove_url(cls, webpage):
- """Try to extract the brightcove url from the wepbage, returns None
+ """Try to extract the brightcove url from the webpage, returns None
if it can't be found
"""
+ urls = cls._extract_brightcove_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def _extract_brightcove_urls(cls, webpage):
+ """Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m:
- return url_m.group(1)
+ return [url_m.group(1)]
- m_brightcove = re.search(
+ matches = re.findall(
r'''(?sx)<object
(?:
- [^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 |
+ [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''',
webpage)
- if m_brightcove is not None:
- return cls._build_brighcove_url(m_brightcove.group())
- else:
- return None
+ return [cls._build_brighcove_url(m) for m in matches]
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 082da9c77..5bcc78bf7 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Look for BrightCove:
- bc_url = BrightcoveIE._extract_brightcove_url(webpage)
- if bc_url is not None:
+ bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
+ if bc_urls:
self.to_screen('Brightcove video detected.')
- surl = smuggle_url(bc_url, {'Referer': url})
- return self.url_result(surl, 'Brightcove')
+ entries = [{
+ '_type': 'url',
+ 'url': smuggle_url(bc_url, {'Referer': url}),
+ 'ie_key': 'Brightcove'
+ } for bc_url in bc_urls]
+
+ return {
+ '_type': 'playlist',
+ 'title': video_title,
+ 'id': video_id,
+ 'entries': entries,
+ }
# Look for embedded (iframe) Vimeo player
mobj = re.search(