diff options
| author | Naglis Jonaitis <njonaitis@gmail.com> | 2014-09-23 00:28:19 +0300 | 
|---|---|---|
| committer | Naglis Jonaitis <njonaitis@gmail.com> | 2014-09-23 00:28:19 +0300 | 
| commit | 632e5684ce797eb8a7372eb25dd4ce299f2e66de (patch) | |
| tree | d9bbf5fdd7ee6bf0bb3ba9f6947312d41dfeea28 | |
| parent | 094d42fe443c8f7ad5bd9049d63317195ab8fd3a (diff) | |
[nfl] Add new extractor. (Closes #3815)
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/nfl.py | 103 | 
2 files changed, 104 insertions, 0 deletions
| diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 244d22297..1f1fc0eb2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -240,6 +240,7 @@ from .ndtv import NDTVIE  from .newgrounds import NewgroundsIE  from .newstube import NewstubeIE  from .nfb import NFBIE +from .nfl import NFLIE  from .nhl import NHLIE, NHLVideocenterIE  from .niconico import NiconicoIE  from .ninegag import NineGagIE diff --git a/youtube_dl/extractor/nfl.py b/youtube_dl/extractor/nfl.py new file mode 100644 index 000000000..f53596f5e --- /dev/null +++ b/youtube_dl/extractor/nfl.py @@ -0,0 +1,103 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    ExtractorError, +    int_or_none, +    remove_end, +) + + +class NFLIE(InfoExtractor): +    IE_NAME = 'nfl.com' +    _VALID_URL = r'(?x)https?://(?:www\.)?nfl\.com/(?:videos/(?:.+)/|.*?\#video=)(?P<id>\d..[0-9]+)' +    _PLAYER_CONFIG_URL = 'http://www.nfl.com/static/content/static/config/video/config.json' +    _TEST = { +        'url': 'http://www.nfl.com/videos/nfl-game-highlights/0ap3000000398478/Week-3-Redskins-vs-Eagles-highlights', +        'skip_download': True,  # md5 sum fluctuates +        'info_dict': { +            'id': '0ap3000000398478', +            'ext': 'mp4', +            'title': 'Week 3: Washington Redskins vs. Philadelphia Eagles highlights', +            'description': 'md5:56323bfb0ac4ee5ab24bd05fdf3bf478', +            'upload_date': '20140921', +            'timestamp': 1411337580, +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    } + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        config = self._download_json(self._PLAYER_CONFIG_URL, video_id, +                                     note='Downloading player config') +        url_template = 'http://nfl.com{contentURLTemplate:s}'.format(**config) +        video_data = self._download_json(url_template.format(id=video_id), video_id) + +        cdns = config.get('cdns') +        if not cdns: +            raise ExtractorError('Failed to get CDN data', expected=True) + +        formats = [] +        streams = video_data.get('cdnData', {}).get('bitrateInfo', []) +        for name, cdn in cdns.items(): +            # LimeLight streams don't seem to work +            if cdn.get('name') == 'LIMELIGHT': +                continue + +            protocol = cdn.get('protocol') +            host = remove_end(cdn.get('host', ''), '/') +            if not (protocol and host): +                continue + +            path_prefix = cdn.get('pathprefix', '') +            if path_prefix and not path_prefix.endswith('/'): +                path_prefix = '%s/' % path_prefix + +            get_url = lambda p: '{protocol:s}://{host:s}/{prefix:s}{path:}'.format( +                protocol=protocol, +                host=host, +                prefix=path_prefix, +                path=p, +            ) + +            if protocol == 'rtmp': +                preference = -2 +            elif 'prog' in name.lower(): +                preference = -1 +            else: +                preference = 0 + +            for stream in streams: +                path = stream.get('path') +                if not path: +                    continue + +                formats.append({ +                    'url': get_url(path), +                    'vbr': int_or_none(stream.get('rate', 0), 1000), +                    'preference': preference, +                    'format_note': name, +                }) + +        self._sort_formats(formats) + +        thumbnail = None +        for q in ('xl', 'l', 'm', 's', 'xs'): +            thumbnail = video_data.get('imagePaths', {}).get(q) +            if thumbnail: +                break + +        return { +            'id': video_id, +            'title': video_data.get('storyHeadline'), +            'formats': formats, +            'description': video_data.get('caption'), +            'duration': video_data.get('duration'), +            'thumbnail': thumbnail, +            'timestamp': int_or_none(video_data.get('posted'), 1000), +        } | 
