diff options
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/aljazeera.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/brightcove.py | 156 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 33 | ||||
| -rw-r--r-- | youtube_dl/extractor/nowness.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/safari.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/space.py | 8 | ||||
| -rw-r--r-- | youtube_dl/extractor/tlc.py | 6 | 
8 files changed, 200 insertions, 24 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 06d25ef40..64ce3210b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -60,7 +60,10 @@ from .bloomberg import BloombergIE  from .bpb import BpbIE  from .br import BRIE  from .breakcom import BreakIE -from .brightcove import BrightcoveIE +from .brightcove import ( +    BrightcoveLegacyIE, +    BrightcoveNewIE, +)  from .buzzfeed import BuzzFeedIE  from .byutv import BYUtvIE  from .c56 import C56IE diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dl/extractor/aljazeera.py index 184a14a4f..5b2c0dc9a 100644 --- a/youtube_dl/extractor/aljazeera.py +++ b/youtube_dl/extractor/aljazeera.py @@ -15,7 +15,7 @@ class AlJazeeraIE(InfoExtractor):              'description': 'As a birth attendant advocating for family planning, Remy is on the frontline of Tondo\'s battle with overcrowding.',              'uploader': 'Al Jazeera English',          }, -        'add_ie': ['Brightcove'], +        'add_ie': ['BrightcoveLegacy'],          'skip': 'Not accessible from Travis CI server',      } @@ -32,5 +32,5 @@ class AlJazeeraIE(InfoExtractor):                  'playerKey=AQ~~%2CAAAAmtVJIFk~%2CTVGOQ5ZTwJbeMWnq5d_H4MOM57xfzApc'                  '&%40videoPlayer={0}'.format(brightcove_id)              ), -            'ie_key': 'Brightcove', +            'ie_key': 'BrightcoveLegacy',          } diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index 1686cdde1..6b184157c 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -20,12 +20,17 @@ from ..utils import (      ExtractorError,      find_xpath_attr,      fix_xml_ampersands, +    float_or_none, +    js_to_json, +    int_or_none, +    parse_iso8601,      unescapeHTML,      unsmuggle_url,  ) -class BrightcoveIE(InfoExtractor): +class BrightcoveLegacyIE(InfoExtractor): +    IE_NAME = 'brightcove:legacy'      _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'      _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' @@ -346,3 +351,152 @@ class BrightcoveIE(InfoExtractor):          if 'url' not in info and not info.get('formats'):              raise ExtractorError('Unable to extract video url for %s' % info['id'])          return info + + +class BrightcoveNewIE(InfoExtractor): +    IE_NAME = 'brightcove:new' +    _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*videoId=(?P<video_id>\d+)' +    _TEST = { +        'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001', +        'md5': 'c8100925723840d4b0d243f7025703be', +        'info_dict': { +            'id': '4463358922001', +            'ext': 'mp4', +            'title': 'Meet the man behind Popcorn Time', +            'description': 'md5:eac376a4fe366edc70279bfb681aea16', +            'timestamp': 1441391203, +            'upload_date': '20150904', +            'duration': 165.768, +            'uploader_id': '929656772001', +        } +    } + +    @staticmethod +    def _extract_urls(webpage): +        # Reference: +        # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe +        # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript) +        # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html + +        entries = [] + +        # Look for iframe embeds [1] +        for _, url in re.findall( +                r'<iframe[^>]+src=(["\'])((?:https?:)//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage): +            entries.append(url) +        # Look for embed_in_page embeds [2] +        # According to examples from [3] it's unclear whether video id may be optional +        # and what to do when it is +        for video_id, account_id, player_id, embed in re.findall( +                r'''(?sx) +                    <video[^>]+ +                        data-video-id=["\'](\d+)["\'][^>]*>.*? +                    </video>.*? +                    <script[^>]+ +                        src=["\'](?:https?:)?//players\.brightcove\.net/ +                        (\d+)/([\da-f-]+)_([^/]+)/index\.min\.js +                ''', webpage): +            entries.append( +                'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' +                % (account_id, player_id, embed, video_id)) +        return entries + +    def _real_extract(self, url): +        account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() + +        webpage = self._download_webpage( +            'http://players.brightcove.net/%s/%s_%s/index.min.js' +            % (account_id, player_id, embed), video_id) + +        policy_key = None + +        catalog = self._search_regex( +            r'catalog\(({.+?})\);', webpage, 'catalog', default=None) +        if catalog: +            catalog = self._parse_json( +                js_to_json(catalog), video_id, fatal=False) +            if catalog: +                policy_key = catalog.get('policyKey') + +        if not policy_key: +            policy_key = self._search_regex( +                r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', +                webpage, 'policy key', group='pk') + +        req = compat_urllib_request.Request( +            'https://edge.api.brightcove.com/playback/v1/accounts/%s/videos/%s' +            % (account_id, video_id), +            headers={'Accept': 'application/json;pk=%s' % policy_key}) +        json_data = self._download_json(req, video_id) + +        title = json_data['name'] + +        formats = [] +        for source in json_data.get('sources', []): +            source_type = source.get('type') +            src = source.get('src') +            if source_type == 'application/x-mpegURL': +                if not src: +                    continue +                m3u8_formats = self._extract_m3u8_formats( +                    src, video_id, 'mp4', entry_protocol='m3u8_native', +                    m3u8_id='hls', fatal=False) +                if m3u8_formats: +                    formats.extend(m3u8_formats) +            else: +                streaming_src = source.get('streaming_src') +                stream_name, app_name = source.get('stream_name'), source.get('app_name') +                if not src and not streaming_src and (not stream_name or not app_name): +                    continue +                tbr = float_or_none(source.get('avg_bitrate'), 1000) +                height = int_or_none(source.get('height')) +                f = { +                    'tbr': tbr, +                    'width': int_or_none(source.get('width')), +                    'height': height, +                    'filesize': int_or_none(source.get('size')), +                    'container': source.get('container'), +                    'vcodec': source.get('codec'), +                    'ext': source.get('container').lower(), +                } + +                def build_format_id(kind): +                    format_id = kind +                    if tbr: +                        format_id += '-%dk' % int(tbr) +                    if height: +                        format_id += '-%dp' % height +                    return format_id + +                if src or streaming_src: +                    f.update({ +                        'url': src or streaming_src, +                        'format_id': build_format_id('http' if src else 'http-streaming'), +                        'preference': 2 if src else 1, +                    }) +                else: +                    f.update({ +                        'url': app_name, +                        'play_path': stream_name, +                        'format_id': build_format_id('rtmp'), +                    }) +                formats.append(f) +        self._sort_formats(formats) + +        description = json_data.get('description') +        thumbnail = json_data.get('thumbnail') +        timestamp = parse_iso8601(json_data.get('published_at')) +        duration = float_or_none(json_data.get('duration'), 1000) +        tags = json_data.get('tags', []) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'duration': duration, +            'timestamp': timestamp, +            'uploader_id': account_id, +            'formats': formats, +            'tags': tags, +        } diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d0b486d2a..51516a38a 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -30,7 +30,10 @@ from ..utils import (      url_basename,      xpath_text,  ) -from .brightcove import BrightcoveIE +from .brightcove import ( +    BrightcoveLegacyIE, +    BrightcoveNewIE, +)  from .nbc import NBCSportsVPlayerIE  from .ooyala import OoyalaIE  from .rutv import RUTVIE @@ -275,7 +278,7 @@ class GenericIE(InfoExtractor):          # it also tests brightcove videos that need to set the 'Referer' in the          # http requests          { -            'add_ie': ['Brightcove'], +            'add_ie': ['BrightcoveLegacy'],              'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',              'info_dict': {                  'id': '2765128793001', @@ -299,7 +302,7 @@ class GenericIE(InfoExtractor):                  'uploader': 'thestar.com',                  'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',              }, -            'add_ie': ['Brightcove'], +            'add_ie': ['BrightcoveLegacy'],          },          {              'url': 'http://www.championat.com/video/football/v/87/87499.html', @@ -314,7 +317,7 @@ class GenericIE(InfoExtractor):          },          {              # https://github.com/rg3/youtube-dl/issues/3541 -            'add_ie': ['Brightcove'], +            'add_ie': ['BrightcoveLegacy'],              'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',              'info_dict': {                  'id': '3866516442001', @@ -1031,6 +1034,17 @@ class GenericIE(InfoExtractor):                  'ext': 'mp4',                  'title': 'cinemasnob',              }, +        }, +        # BrightcoveInPageEmbed embed +        { +            'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/', +            'info_dict': { +                'id': '4238694884001', +                'ext': 'flv', +                'title': 'Tabletop: Dread, Last Thoughts', +                'description': 'Tabletop: Dread, Last Thoughts', +                'duration': 51690, +            },          }      ] @@ -1290,14 +1304,14 @@ class GenericIE(InfoExtractor):              return self.playlist_result(                  urlrs, playlist_id=video_id, playlist_title=video_title) -        # Look for BrightCove: -        bc_urls = BrightcoveIE._extract_brightcove_urls(webpage) +        # Look for Brightcove Legacy Studio embeds +        bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)          if bc_urls:              self.to_screen('Brightcove video detected.')              entries = [{                  '_type': 'url',                  'url': smuggle_url(bc_url, {'Referer': url}), -                'ie_key': 'Brightcove' +                'ie_key': 'BrightcoveLegacy'              } for bc_url in bc_urls]              return { @@ -1307,6 +1321,11 @@ class GenericIE(InfoExtractor):                  'entries': entries,              } +        # Look for Brightcove New Studio embeds +        bc_urls = BrightcoveNewIE._extract_urls(webpage) +        if bc_urls: +            return _playlist_from_matches(bc_urls, ie='BrightcoveNew') +          # Look for embedded rtl.nl player          matches = re.findall(              r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py index b97f62fdb..0fba55833 100644 --- a/youtube_dl/extractor/nowness.py +++ b/youtube_dl/extractor/nowness.py @@ -1,7 +1,7 @@  # encoding: utf-8  from __future__ import unicode_literals -from .brightcove import BrightcoveIE +from .brightcove import BrightcoveLegacyIE  from .common import InfoExtractor  from ..utils import ExtractorError  from ..compat import ( @@ -22,10 +22,10 @@ class NownessBaseIE(InfoExtractor):                              'http://www.nowness.com/iframe?id=%s' % video_id, video_id,                              note='Downloading player JavaScript',                              errnote='Unable to download player JavaScript') -                        bc_url = BrightcoveIE._extract_brightcove_url(player_code) +                        bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)                          if bc_url is None:                              raise ExtractorError('Could not find player definition') -                        return self.url_result(bc_url, 'Brightcove') +                        return self.url_result(bc_url, 'BrightcoveLegacy')                      elif source == 'vimeo':                          return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')                      elif source == 'youtube': diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index a602af692..e9e33d0a3 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -4,7 +4,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from .brightcove import BrightcoveIE +from .brightcove import BrightcoveLegacyIE  from ..compat import (      compat_urllib_parse, @@ -112,11 +112,11 @@ class SafariIE(SafariBaseIE):              '%s/%s/chapter-content/%s.html' % (self._API_BASE, course_id, part),              part) -        bc_url = BrightcoveIE._extract_brightcove_url(webpage) +        bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)          if not bc_url:              raise ExtractorError('Could not extract Brightcove URL from %s' % url, expected=True) -        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'Brightcove') +        return self.url_result(smuggle_url(bc_url, {'Referer': url}), 'BrightcoveLegacy')  class SafariCourseIE(SafariBaseIE): diff --git a/youtube_dl/extractor/space.py b/youtube_dl/extractor/space.py index c2d0d36a6..ebb5d6ec0 100644 --- a/youtube_dl/extractor/space.py +++ b/youtube_dl/extractor/space.py @@ -3,14 +3,14 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from .brightcove import BrightcoveIE +from .brightcove import BrightcoveLegacyIE  from ..utils import RegexNotFoundError, ExtractorError  class SpaceIE(InfoExtractor):      _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html'      _TEST = { -        'add_ie': ['Brightcove'], +        'add_ie': ['BrightcoveLegacy'],          'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html',          'info_dict': {              'id': '2780937028001', @@ -31,8 +31,8 @@ class SpaceIE(InfoExtractor):              brightcove_url = self._og_search_video_url(webpage)          except RegexNotFoundError:              # Other videos works fine with the info from the object -            brightcove_url = BrightcoveIE._extract_brightcove_url(webpage) +            brightcove_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)          if brightcove_url is None:              raise ExtractorError(                  'The webpage does not contain a video', expected=True) -        return self.url_result(brightcove_url, BrightcoveIE.ie_key()) +        return self.url_result(brightcove_url, BrightcoveLegacyIE.ie_key()) diff --git a/youtube_dl/extractor/tlc.py b/youtube_dl/extractor/tlc.py index 13263614c..d6d038a8d 100644 --- a/youtube_dl/extractor/tlc.py +++ b/youtube_dl/extractor/tlc.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals  import re  from .common import InfoExtractor -from .brightcove import BrightcoveIE +from .brightcove import BrightcoveLegacyIE  from .discovery import DiscoveryIE  from ..compat import compat_urlparse @@ -66,6 +66,6 @@ class TlcDeIE(InfoExtractor):          return {              '_type': 'url', -            'url': BrightcoveIE._extract_brightcove_url(iframe), -            'ie': BrightcoveIE.ie_key(), +            'url': BrightcoveLegacyIE._extract_brightcove_url(iframe), +            'ie': BrightcoveLegacyIE.ie_key(),          }  | 
