diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-11-08 00:29:12 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-11-08 00:29:12 +0700 | 
| commit | ebc7ab1e231483f189290608425a23590cae6af9 (patch) | |
| tree | 8bb913c300fb6b28373d9799579cc4ca483f00cb | |
| parent | 97726317ac8e905dc72e75c7c2a823280c51af00 (diff) | |
[espn] Fix extraction (closes #11041)
| -rw-r--r-- | youtube_dl/extractor/espn.py | 126 | 
1 files changed, 94 insertions, 32 deletions
| diff --git a/youtube_dl/extractor/espn.py b/youtube_dl/extractor/espn.py index 6d10f8e68..8795e0ddf 100644 --- a/youtube_dl/extractor/espn.py +++ b/youtube_dl/extractor/espn.py @@ -1,38 +1,117 @@  from __future__ import unicode_literals  from .common import InfoExtractor -from ..utils import remove_end +from ..compat import compat_str +from ..utils import ( +    determine_ext, +    int_or_none, +    unified_timestamp, +)  class ESPNIE(InfoExtractor): -    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' +    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'      _TESTS = [{          'url': 'http://espn.go.com/video/clip?id=10365079', -        'md5': '60e5d097a523e767d06479335d1bdc58',          'info_dict': { -            'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG', +            'id': '10365079',              'ext': 'mp4',              'title': '30 for 30 Shorts: Judging Jewell', -            'description': None, +            'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f', +            'timestamp': 1390936111, +            'upload_date': '20140128',          },          'params': {              'skip_download': True,          }, -        'add_ie': ['OoyalaExternal'],      }, {          # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season          'url': 'http://espn.go.com/video/clip?id=2743663', -        'md5': 'f4ac89b59afc7e2d7dbb049523df6768',          'info_dict': { -            'id': '50NDFkeTqRHB0nXBOK-RGdSG5YQPuxHg', +            'id': '2743663',              'ext': 'mp4',              'title': 'Must-See Moments: Best of the MLS season', +            'description': 'md5:4c2d7232beaea572632bec41004f0aeb', +            'timestamp': 1449446454, +            'upload_date': '20151207',          },          'params': {              'skip_download': True,          }, -        'add_ie': ['OoyalaExternal'], +        'expected_warnings': ['Unable to download f4m manifest'],      }, { +        'url': 'http://www.espn.com/video/clip?id=10365079', +        'only_matching': True, +    }, { +        'url': 'http://www.espn.com/video/clip/_/id/17989860', +        'only_matching': True, +    }] + +    def _real_extract(self, url): +        video_id = self._match_id(url) + +        clip = self._download_json( +            'http://api-app.espn.com/v1/video/clips/%s' % video_id, +            video_id)['videos'][0] + +        title = clip['headline'] + +        format_urls = set() +        formats = [] + +        def traverse_source(source, base_source_id=None): +            for source_id, source in source.items(): +                if isinstance(source, compat_str): +                    extract_source(source, base_source_id) +                elif isinstance(source, dict): +                    traverse_source( +                        source, +                        '%s-%s' % (base_source_id, source_id) +                        if base_source_id else source_id) + +        def extract_source(source_url, source_id=None): +            if source_url in format_urls: +                return +            format_urls.add(source_url) +            ext = determine_ext(source_url) +            if ext == 'smil': +                formats.extend(self._extract_smil_formats( +                    source_url, video_id, fatal=False)) +            elif ext == 'f4m': +                formats.extend(self._extract_f4m_formats( +                    source_url, video_id, f4m_id=source_id, fatal=False)) +            elif ext == 'm3u8': +                formats.extend(self._extract_m3u8_formats( +                    source_url, video_id, 'mp4', entry_protocol='m3u8_native', +                    m3u8_id=source_id, fatal=False)) +            else: +                formats.append({ +                    'url': source_url, +                    'format_id': source_id, +                }) + +        traverse_source(clip['links']['source']) +        self._sort_formats(formats) + +        description = clip.get('caption') or clip.get('description') +        thumbnail = clip.get('thumbnail') +        duration = int_or_none(clip.get('duration')) +        timestamp = unified_timestamp(clip.get('originalPublishDate')) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'thumbnail': thumbnail, +            'timestamp': timestamp, +            'duration': duration, +            'formats': formats, +        } + + +class ESPNArticleIE(InfoExtractor): +    _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)' +    _TESTS = [{          'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',          'only_matching': True,      }, { @@ -47,11 +126,12 @@ class ESPNIE(InfoExtractor):      }, {          'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',          'only_matching': True, -    }, { -        'url': 'http://www.espn.com/video/clip?id=10365079', -        'only_matching': True,      }] +    @classmethod +    def suitable(cls, url): +        return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url) +      def _real_extract(self, url):          video_id = self._match_id(url) @@ -61,23 +141,5 @@ class ESPNIE(InfoExtractor):              r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',              webpage, 'video id', group='id') -        cms = 'espn' -        if 'data-source="intl"' in webpage: -            cms = 'intl' -        player_url = 'https://espn.go.com/video/iframe/twitter/?id=%s&cms=%s' % (video_id, cms) -        player = self._download_webpage( -            player_url, video_id) - -        pcode = self._search_regex( -            r'["\']pcode=([^"\']+)["\']', player, 'pcode') - -        title = remove_end( -            self._og_search_title(webpage), -            '- ESPN Video').strip() - -        return { -            '_type': 'url_transparent', -            'url': 'ooyalaexternal:%s:%s:%s' % (cms, video_id, pcode), -            'ie_key': 'OoyalaExternal', -            'title': title, -        } +        return self.url_result( +            'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key()) | 
