diff options
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 20 | ||||
-rw-r--r-- | youtube_dl/extractor/ted.py | 18 |
2 files changed, 28 insertions, 10 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index c784eedb9..161c623eb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -151,8 +151,14 @@ class InfoExtractor(object): If not explicitly set, calculated from timestamp. uploader_id: Nickname or id of the video uploader. location: Physical location where the video was filmed. - subtitles: The subtitle file contents as a dictionary in the format - {language: subtitles}. + subtitles: The available subtitles as a dictionary in the format + {language: subformats}. "subformats" is a list sorted from + lower to higher preference, each element is a dictionary + with the "ext" entry and one of: + * "data": The subtitles file contents + * "url": A url pointing to the subtitles file + Note: YoutubeDL.extract_info will get the requested + format and replace the "subformats" list with it. duration: Length of the video in seconds, as an integer. view_count: How many users have watched the video on the platform. like_count: Number of positive ratings of the video @@ -993,6 +999,16 @@ class InfoExtractor(object): any_restricted = any_restricted or is_restricted return not any_restricted + def extract_subtitles(self, *args, **kwargs): + subtitles = {} + list_subtitles = self._downloader.params.get('listsubtitles') + if self._downloader.params.get('writesubtitles', False) or list_subtitles: + subtitles.update(self._get_subtitles(*args, **kwargs)) + return subtitles + + def _get_subtitles(self, *args, **kwargs): + raise NotImplementedError("This method must be implemented by subclasses") + class SearchInfoExtractor(InfoExtractor): """ diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py index 10b3b706a..1809eaae4 100644 --- a/youtube_dl/extractor/ted.py +++ b/youtube_dl/extractor/ted.py @@ -3,14 +3,14 @@ from __future__ import unicode_literals import json import re -from .subtitles import SubtitlesInfoExtractor +from .common import InfoExtractor from ..compat import ( compat_str, ) -class TEDIE(SubtitlesInfoExtractor): +class TEDIE(InfoExtractor): _VALID_URL = r'''(?x) (?P<proto>https?://) (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ @@ -165,9 +165,6 @@ class TEDIE(SubtitlesInfoExtractor): video_id = compat_str(talk_info['id']) # subtitles video_subtitles = self.extract_subtitles(video_id, talk_info) - if self._downloader.params.get('listsubtitles', False): - self._list_available_subtitles(video_id, talk_info) - return thumbnail = talk_info['thumb'] if not thumbnail.startswith('http'): @@ -183,13 +180,18 @@ class TEDIE(SubtitlesInfoExtractor): 'duration': talk_info.get('duration'), } - def _get_available_subtitles(self, video_id, talk_info): + def _get_subtitles(self, video_id, talk_info): languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] if languages: sub_lang_list = {} for l in languages: - url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) - sub_lang_list[l] = url + sub_lang_list[l] = [ + { + 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext), + 'ext': ext, + } + for ext in ['ted', 'srt'] + ] return sub_lang_list else: self._downloader.report_warning('video doesn\'t have subtitles') |