diff options
| author | biwubo <biwubo> | 2019-05-09 18:11:27 +0000 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2019-06-08 03:07:02 +0700 | 
| commit | c2ee6fa66ac082a74e645e605c346d0abe95afe8 (patch) | |
| tree | 475132bbd5e67204c4be2ac407d3a5524cd62637 /youtube_dl/extractor/ted.py | |
| parent | 4831ef7fe41cf4dfca5957c61635fb5a547ad9ad (diff) | |
[ted] Fix playlist extraction (closes #20844)
Diffstat (limited to 'youtube_dl/extractor/ted.py')
| -rw-r--r-- | youtube_dl/extractor/ted.py | 30 | 
1 files changed, 16 insertions, 14 deletions
| diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 645942dfd..17dc41a39 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -5,8 +5,12 @@ import re  from .common import InfoExtractor -from ..compat import compat_str +from ..compat import ( +    compat_str, +    compat_urlparse +)  from ..utils import ( +    extract_attributes,      float_or_none,      int_or_none,      try_get, @@ -20,7 +24,7 @@ class TEDIE(InfoExtractor):          (?P<proto>https?://)          (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/          ( -            (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist +            (?P<type_playlist>playlists(?:/(?P<playlist_id>\d+))?) # We have a playlist              |              ((?P<type_talk>talks)) # We have a simple talk              | @@ -84,6 +88,7 @@ class TEDIE(InfoExtractor):          'info_dict': {              'id': '10',              'title': 'Who are the hackers?', +            'description': 'md5:49a0dbe8fb76d81a0e64b4a80af7f15a'          },          'playlist_mincount': 6,      }, { @@ -150,22 +155,19 @@ class TEDIE(InfoExtractor):          webpage = self._download_webpage(url, name,                                           'Downloading playlist webpage') -        info = self._extract_info(webpage) -        playlist_info = try_get( -            info, lambda x: x['__INITIAL_DATA__']['playlist'], -            dict) or info['playlist'] +        playlist_entries = [] +        for entry in re.findall(r'(?s)<[^>]+data-ga-context="playlist"[^>]*>', webpage): +            attrs = extract_attributes(entry) +            entry_url = compat_urlparse.urljoin(url, attrs['href']) +            playlist_entries.append(self.url_result(entry_url, self.ie_key())) -        playlist_entries = [ -            self.url_result('http://www.ted.com/talks/' + talk['slug'], self.ie_key()) -            for talk in try_get( -                info, lambda x: x['__INITIAL_DATA__']['talks'], -                dict) or info['talks'] -        ] +        final_url = self._og_search_url(webpage)          return self.playlist_result(              playlist_entries, -            playlist_id=compat_str(playlist_info['id']), -            playlist_title=playlist_info['title']) +            playlist_id=re.match(self._VALID_URL, final_url, re.VERBOSE).group('playlist_id'), +            playlist_title=self._og_search_title(webpage), +            playlist_description=self._og_search_description(webpage))      def _talk_info(self, url, video_name):          webpage = self._download_webpage(url, video_name) | 
