diff options
Diffstat (limited to 'youtube_dl/extractor/ted.py')
| -rw-r--r-- | youtube_dl/extractor/ted.py | 43 | 
1 files changed, 35 insertions, 8 deletions
| diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index d5e28efad..944177426 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,7 +5,7 @@ import re  from .subtitles import SubtitlesInfoExtractor -from ..utils import ( +from ..compat import (      compat_str,  ) @@ -33,11 +33,12 @@ class TEDIE(SubtitlesInfoExtractor):              'ext': 'mp4',              'title': 'The illusion of consciousness',              'description': ('Philosopher Dan Dennett makes a compelling ' -                'argument that not only don\'t we understand our own ' -                'consciousness, but that half the time our brains are ' -                'actively fooling us.'), +                            'argument that not only don\'t we understand our own ' +                            'consciousness, but that half the time our brains are ' +                            'actively fooling us.'),              'uploader': 'Dan Dennett',              'width': 854, +            'duration': 1308,          }      }, {          'url': 'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms', @@ -57,6 +58,7 @@ class TEDIE(SubtitlesInfoExtractor):              'title': 'Be passionate. Be courageous. Be your best.',              'uploader': 'Gabby Giffords and Mark Kelly',              'description': 'md5:5174aed4d0f16021b704120360f72b92', +            'duration': 1128,          },      }, {          'url': 'http://www.ted.com/playlists/who_are_the_hackers', @@ -65,6 +67,22 @@ class TEDIE(SubtitlesInfoExtractor):              'title': 'Who are the hackers?',          },          'playlist_mincount': 6, +    }, { +        # contains a youtube video +        'url': 'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything', +        'add_ie': ['Youtube'], +        'info_dict': { +            'id': '_ZG8HBuDjgc', +            'ext': 'mp4', +            'title': 'Douglas Adams: Parrots the Universe and Everything', +            'description': 'md5:01ad1e199c49ac640cb1196c0e9016af', +            'uploader': 'University of California Television (UCTV)', +            'uploader_id': 'UCtelevision', +            'upload_date': '20080522', +        }, +        'params': { +            'skip_download': True, +        },      }]      _NATIVE_FORMATS = { @@ -75,7 +93,7 @@ class TEDIE(SubtitlesInfoExtractor):      def _extract_info(self, webpage):          info_json = self._search_regex(r'q\("\w+.init",({.+})\)</script>', -            webpage, 'info json') +                                       webpage, 'info json')          return json.loads(info_json)      def _real_extract(self, url): @@ -95,7 +113,7 @@ class TEDIE(SubtitlesInfoExtractor):          '''Returns the videos of the playlist'''          webpage = self._download_webpage(url, name, -            'Downloading playlist webpage') +                                         'Downloading playlist webpage')          info = self._extract_info(webpage)          playlist_info = info['playlist'] @@ -114,6 +132,13 @@ class TEDIE(SubtitlesInfoExtractor):          talk_info = self._extract_info(webpage)['talks'][0] +        if talk_info.get('external') is not None: +            self.to_screen('Found video from %s' % talk_info['external']['service']) +            return { +                '_type': 'url', +                'url': talk_info['external']['uri'], +            } +          formats = [{              'url': format_url,              'format_id': format_id, @@ -155,6 +180,7 @@ class TEDIE(SubtitlesInfoExtractor):              'description': self._og_search_description(webpage),              'subtitles': video_subtitles,              'formats': formats, +            'duration': talk_info.get('duration'),          }      def _get_available_subtitles(self, video_id, talk_info): @@ -173,8 +199,9 @@ class TEDIE(SubtitlesInfoExtractor):          webpage = self._download_webpage(url, name)          config_json = self._html_search_regex( -            r"data-config='([^']+)", webpage, 'config') -        config = json.loads(config_json) +            r'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>', +            webpage, 'config') +        config = json.loads(config_json)['config']          video_url = config['video']['url']          thumbnail = config.get('image', {}).get('url') | 
