diff options
| -rw-r--r-- | youtube_dl/extractor/ted.py | 49 | 
1 files changed, 34 insertions, 15 deletions
| diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 3f3c681ae..f27d0e313 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -6,7 +6,10 @@ import re  from .common import InfoExtractor  from ..compat import compat_str -from ..utils import int_or_none +from ..utils import ( +    int_or_none, +    try_get, +)  class TEDIE(InfoExtractor): @@ -113,8 +116,9 @@ class TEDIE(InfoExtractor):      }      def _extract_info(self, webpage): -        info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', -                                       webpage, 'info json') +        info_json = self._search_regex( +            r'(?s)q\(\s*"\w+.init"\s*,\s*({.+})\)\s*</script>', +            webpage, 'info json')          return json.loads(info_json)      def _real_extract(self, url): @@ -136,11 +140,16 @@ class TEDIE(InfoExtractor):          webpage = self._download_webpage(url, name,                                           'Downloading playlist webpage')          info = self._extract_info(webpage) -        playlist_info = info['playlist'] + +        playlist_info = try_get( +            info, lambda x: x['__INITIAL_DATA__']['playlist'], +            dict) or info['playlist']          playlist_entries = [              self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key()) -            for talk in info['talks'] +            for talk in try_get( +                info, lambda x: x['__INITIAL_DATA__']['talks'], +                dict) or info['talks']          ]          return self.playlist_result(              playlist_entries, @@ -149,9 +158,14 @@ class TEDIE(InfoExtractor):      def _talk_info(self, url, video_name):          webpage = self._download_webpage(url, video_name) -        self.report_extraction(video_name) -        talk_info = self._extract_info(webpage)['talks'][0] +        info = self._extract_info(webpage) + +        talk_info = try_get( +            info, lambda x: x['__INITIAL_DATA__']['talks'][0], +            dict) or info['talks'][0] + +        title = talk_info['title'].strip()          external = talk_info.get('external')          if external: @@ -165,19 +179,27 @@ class TEDIE(InfoExtractor):                  'url': ext_url or external['uri'],              } +        native_downloads = try_get( +            talk_info, lambda x: x['downloads']['nativeDownloads'], +            dict) or talk_info['nativeDownloads'] +          formats = [{              'url': format_url,              'format_id': format_id,              'format': format_id, -        } for (format_id, format_url) in talk_info['nativeDownloads'].items() if format_url is not None] +        } for (format_id, format_url) in native_downloads.items() if format_url is not None]          if formats:              for f in formats:                  finfo = self._NATIVE_FORMATS.get(f['format_id'])                  if finfo:                      f.update(finfo) +        player_talk = talk_info['player_talks'][0] + +        resources_ = player_talk.get('resources') or talk_info.get('resources') +          http_url = None -        for format_id, resources in talk_info['resources'].items(): +        for format_id, resources in resources_.items():              if format_id == 'h264':                  for resource in resources:                      h264_url = resource.get('file') @@ -237,14 +259,11 @@ class TEDIE(InfoExtractor):          video_id = compat_str(talk_info['id']) -        thumbnail = talk_info['thumb'] -        if not thumbnail.startswith('http'): -            thumbnail = 'http://' + thumbnail          return {              'id': video_id, -            'title': talk_info['title'].strip(), -            'uploader': talk_info['speaker'], -            'thumbnail': thumbnail, +            'title': title, +            'uploader': player_talk.get('speaker') or talk_info.get('speaker'), +            'thumbnail': player_talk.get('thumb') or talk_info.get('thumb'),              'description': self._og_search_description(webpage),              'subtitles': self._get_subtitles(video_id, talk_info),              'formats': formats, | 
