diff options
| -rw-r--r-- | youtube_dl/extractor/ted.py | 103 | 
1 files changed, 57 insertions, 46 deletions
| diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index dc9c5ce8e..212ac80ab 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -7,8 +7,10 @@ from .common import InfoExtractor  from ..compat import compat_str  from ..utils import ( +    float_or_none,      int_or_none,      try_get, +    url_or_none,  ) @@ -30,7 +32,7 @@ class TEDIE(InfoExtractor):          '''      _TESTS = [{          'url': 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html', -        'md5': '0de43ac406aa3e4ea74b66c9c7789b13', +        'md5': 'b0ce2b05ca215042124fbc9e3886493a',          'info_dict': {              'id': '102',              'ext': 'mp4', @@ -42,24 +44,30 @@ class TEDIE(InfoExtractor):              'uploader': 'Dan Dennett',              'width': 853,              'duration': 1308, -        } +            'view_count': int, +            'comment_count': int, +            'tags': list, +        }, +        'params': { +            'skip_download': True, +        },      }, { -        'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', -        'md5': 'b899ac15e345fb39534d913f7606082b', +        # missing HTTP bitrates +        'url': 'https://www.ted.com/talks/vishal_sikka_the_beauty_and_power_of_algorithms',          'info_dict': { -            'id': 'tSVI8ta_P4w', +            'id': '6069',              'ext': 'mp4', -            'title': 'Vishal Sikka: The beauty and power of algorithms', +            'title': 'The beauty and power of algorithms',              'thumbnail': r're:^https?://.+\.jpg', -            'description': 'md5:6261fdfe3e02f4f579cbbfc00aff73f4', -            'upload_date': '20140122', -            'uploader_id': 'TEDInstitute', -            'uploader': 'TED Institute', +            'description': 'md5:734e352710fb00d840ab87ae31aaf688', +            'uploader': 'Vishal Sikka', +        }, +        'params': { +            'skip_download': True,          }, -        'add_ie': ['Youtube'],      }, {          'url': 'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best', -        'md5': '71b3ab2f4233012dce09d515c9c39ce2', +        'md5': 'e6b9617c01a7970ceac8bb2c92c346c0',          'info_dict': {              'id': '1972',              'ext': 'mp4', @@ -68,6 +76,9 @@ class TEDIE(InfoExtractor):              'description': 'md5:5174aed4d0f16021b704120360f72b92',              'duration': 1128,          }, +        'params': { +            'skip_download': True, +        },      }, {          'url': 'http://www.ted.com/playlists/who_are_the_hackers',          'info_dict': { @@ -92,22 +103,6 @@ class TEDIE(InfoExtractor):              'skip_download': True,          },      }, { -        # YouTube video -        'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', -        'add_ie': ['Youtube'], -        'info_dict': { -            'id': 'aFBIPO-P7LM', -            'ext': 'mp4', -            'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', -            'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', -            'uploader': 'TEDx Talks', -            'uploader_id': 'TEDxTalks', -            'upload_date': '20111216', -        }, -        'params': { -            'skip_download': True, -        }, -    }, {          # no nativeDownloads          'url': 'https://www.ted.com/talks/tom_thum_the_orchestra_in_my_mouth',          'info_dict': { @@ -116,6 +111,9 @@ class TEDIE(InfoExtractor):              'title': 'The orchestra in my mouth',              'description': 'md5:5d1d78650e2f8dfcbb8ebee2951ac29a',              'uploader': 'Tom Thum', +            'view_count': int, +            'comment_count': int, +            'tags': list,          },          'params': {              'skip_download': True, @@ -174,24 +172,11 @@ class TEDIE(InfoExtractor):          info = self._extract_info(webpage) -        talk_info = try_get( -            info, lambda x: x['__INITIAL_DATA__']['talks'][0], -            dict) or info['talks'][0] +        data = try_get(info, lambda x: x['__INITIAL_DATA__'], dict) or info +        talk_info = data['talks'][0]          title = talk_info['title'].strip() -        external = talk_info.get('external') -        if external: -            service = external['service'] -            self.to_screen('Found video from %s' % service) -            ext_url = None -            if service.lower() == 'youtube': -                ext_url = external.get('code') -            return { -                '_type': 'url', -                'url': ext_url or external['uri'], -            } -          native_downloads = try_get(              talk_info,              (lambda x: x['downloads']['nativeDownloads'], @@ -211,10 +196,24 @@ class TEDIE(InfoExtractor):          player_talk = talk_info['player_talks'][0] +        external = player_talk.get('external') +        if isinstance(external, dict): +            service = external.get('service') +            if isinstance(service, compat_str): +                ext_url = None +                if service.lower() == 'youtube': +                    ext_url = external.get('code') +                return { +                    '_type': 'url', +                    'url': ext_url or external['uri'], +                } +          resources_ = player_talk.get('resources') or talk_info.get('resources')          http_url = None          for format_id, resources in resources_.items(): +            if not isinstance(resources, dict): +                continue              if format_id == 'h264':                  for resource in resources:                      h264_url = resource.get('file') @@ -243,8 +242,12 @@ class TEDIE(InfoExtractor):                          'tbr': int_or_none(resource.get('bitrate')),                      })              elif format_id == 'hls': +                stream_url = url_or_none(resources.get('stream')) +                if not stream_url: +                    continue                  formats.extend(self._extract_m3u8_formats( -                    resources.get('stream'), video_name, 'mp4', m3u8_id=format_id, fatal=False)) +                    stream_url, video_name, 'mp4', m3u8_id=format_id, +                    fatal=False))          m3u8_formats = list(filter(              lambda f: f.get('protocol') == 'm3u8' and f.get('vcodec') != 'none', @@ -254,9 +257,13 @@ class TEDIE(InfoExtractor):                  bitrate = self._search_regex(r'(\d+k)', m3u8_format['url'], 'bitrate', default=None)                  if not bitrate:                      continue +                bitrate_url = re.sub(r'\d+k', bitrate, http_url) +                if not self._is_valid_url( +                        bitrate_url, video_name, '%s bitrate' % bitrate): +                    continue                  f = m3u8_format.copy()                  f.update({ -                    'url': re.sub(r'\d+k', bitrate, http_url), +                    'url': bitrate_url,                      'format_id': m3u8_format['format_id'].replace('hls', 'http'),                      'protocol': 'http',                  }) @@ -282,7 +289,11 @@ class TEDIE(InfoExtractor):              'description': self._og_search_description(webpage),              'subtitles': self._get_subtitles(video_id, talk_info),              'formats': formats, -            'duration': talk_info.get('duration'), +            'duration': float_or_none(talk_info.get('duration')), +            'view_count': int_or_none(data.get('viewed_count')), +            'comment_count': int_or_none( +                try_get(data, lambda x: x['comments']['count'])), +            'tags': try_get(talk_info, lambda x: x['tags'], list),          }      def _get_subtitles(self, video_id, talk_info): | 
