diff options
Diffstat (limited to 'youtube_dl/extractor/brightcove.py')
-rw-r--r-- | youtube_dl/extractor/brightcove.py | 43 |
1 files changed, 28 insertions, 15 deletions
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index ad22cbafd..003152c4e 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -6,24 +6,26 @@ import json import xml.etree.ElementTree from .common import InfoExtractor -from ..utils import ( - compat_urllib_parse, - find_xpath_attr, - fix_xml_ampersands, - compat_urlparse, +from ..compat import ( + compat_parse_qs, compat_str, + compat_urllib_parse, + compat_urllib_parse_urlparse, compat_urllib_request, - compat_parse_qs, - + compat_urlparse, +) +from ..utils import ( determine_ext, ExtractorError, - unsmuggle_url, + find_xpath_attr, + fix_xml_ampersands, unescapeHTML, + unsmuggle_url, ) class BrightcoveIE(InfoExtractor): - _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' + _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' _TESTS = [ @@ -110,6 +112,8 @@ class BrightcoveIE(InfoExtractor): lambda m: m.group(1) + '/>', object_str) # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 object_str = object_str.replace('<--', '<!--') + # remove namespace to simplify extraction + object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str) object_str = fix_xml_ampersands(object_str) object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) @@ -218,7 +222,7 @@ class BrightcoveIE(InfoExtractor): webpage = self._download_webpage(req, video_id) error_msg = self._html_search_regex( - r"<h1>We're sorry.</h1>\s*<p>(.*?)</p>", webpage, + r"<h1>We're sorry.</h1>([\s\n]*<p>.*?</p>)+", webpage, 'error message', default=None) if error_msg is not None: raise ExtractorError( @@ -260,12 +264,21 @@ class BrightcoveIE(InfoExtractor): formats = [] for rend in renditions: url = rend['defaultURL'] + if not url: + continue + ext = None if rend['remote']: - # This type of renditions are served through akamaihd.net, - # but they don't use f4m manifests - url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' - ext = 'flv' - else: + url_comp = compat_urllib_parse_urlparse(url) + if url_comp.path.endswith('.m3u8'): + formats.extend( + self._extract_m3u8_formats(url, info['id'], 'mp4')) + continue + elif 'akamaihd.net' in url_comp.netloc: + # This type of renditions are served through + # akamaihd.net, but they don't use f4m manifests + url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' + ext = 'flv' + if ext is None: ext = determine_ext(url) size = rend.get('size') formats.append({ |