diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-25 23:57:54 +0100 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-25 23:57:54 +0100 | 
| commit | 020cf5ebfd86d039b542e37b1a70e81afe2e034c (patch) | |
| tree | 18fa3783ce4dc30b01344fdee759485052da87d3 | |
| parent | d0a72674c6eab914be41b637ac11627485111313 (diff) | |
[nbc] Add an extractor for the main nbc.com site
Some of the videos are encrypted, the f4m downloader doesn’t support them.
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 5 | ||||
| -rw-r--r-- | youtube_dl/extractor/nbc.py | 24 | ||||
| -rw-r--r-- | youtube_dl/extractor/theplatform.py | 14 | 
3 files changed, 38 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 8eff3df41..c276fad0c 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -152,7 +152,10 @@ from .myspass import MySpassIE  from .myvideo import MyVideoIE  from .naver import NaverIE  from .nba import NBAIE -from .nbc import NBCNewsIE +from .nbc import ( +    NBCIE, +    NBCNewsIE, +)  from .ndr import NDRIE  from .ndtv import NDTVIE  from .newgrounds import NewgroundsIE diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py index ff750de3f..1a63ab56a 100644 --- a/youtube_dl/extractor/nbc.py +++ b/youtube_dl/extractor/nbc.py @@ -6,6 +6,30 @@ from .common import InfoExtractor  from ..utils import find_xpath_attr, compat_str +class NBCIE(InfoExtractor): +    _VALID_URL = r'http://www\.nbc\.com/[^/]+/video/[^/]+/(?P<id>n?\d+)' + +    _TEST = { +        'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', +        'md5': '54d0fbc33e0b853a65d7b4de5c06d64e', +        'info_dict': { +            'id': 'u1RInQZRN7QJ', +            'ext': 'flv', +            'title': 'I Am a Firefighter', +            'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', +        }, +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') +        webpage = self._download_webpage(url, video_id) +        theplatform_url = self._search_regex('class="video-player video-player-full" data-mpx-url="(.*?)"', webpage, 'theplatform url') +        if theplatform_url.startswith('//'): +            theplatform_url = 'http:' + theplatform_url +        return self.url_result(theplatform_url) + +  class NBCNewsIE(InfoExtractor):      _VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)' diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py index d60702325..91f2453eb 100644 --- a/youtube_dl/extractor/theplatform.py +++ b/youtube_dl/extractor/theplatform.py @@ -13,7 +13,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language  class ThePlatformIE(InfoExtractor):      _VALID_URL = r'''(?x)          (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ -           (?P<config>[^/\?]+/(?:swf|config)/select/)? +           (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?           |theplatform:)(?P<id>[^/\?&]+)'''      _TEST = { @@ -54,10 +54,15 @@ class ThePlatformIE(InfoExtractor):          f4m_node = body.find(_x('smil:seq/smil:video'))          if f4m_node is not None: +            f4m_url = f4m_node.attrib['src'] +            if 'manifest.f4m?' not in f4m_url: +                f4m_url += '?' +            # the parameters are from syfy.com, other sites may use others, +            # they also work for nbc.com +            f4m_url += '&g=UXWGVKRWHFSP&hdcore=3.0.3'              formats = [{                  'ext': 'flv', -                # the parameters are from syfy.com, other sites may use others -                'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3', +                'url': f4m_url,              }]          else:              base_url = head.find(_x('smil:meta')).attrib['base'] @@ -95,9 +100,10 @@ class ThePlatformIE(InfoExtractor):          if mobj.group('config'):              config_url = url+ '&form=json'              config_url = config_url.replace('swf/', 'config/') +            config_url = config_url.replace('onsite/', 'onsite/config/')              config_json = self._download_webpage(config_url, video_id, u'Downloading config')              config = json.loads(config_json) -            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4' +            smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m'          else:              smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?'                  'format=smil&mbr=true'.format(video_id)) | 
