aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py20
-rw-r--r--youtube_dl/extractor/ted.py18
2 files changed, 28 insertions, 10 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index c784eedb9..161c623eb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -151,8 +151,14 @@ class InfoExtractor(object):
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
location: Physical location where the video was filmed.
- subtitles: The subtitle file contents as a dictionary in the format
- {language: subtitles}.
+ subtitles: The available subtitles as a dictionary in the format
+ {language: subformats}. "subformats" is a list sorted from
+ lower to higher preference, each element is a dictionary
+ with the "ext" entry and one of:
+ * "data": The subtitles file contents
+ * "url": A url pointing to the subtitles file
+ Note: YoutubeDL.extract_info will get the requested
+ format and replace the "subformats" list with it.
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
@@ -993,6 +999,16 @@ class InfoExtractor(object):
any_restricted = any_restricted or is_restricted
return not any_restricted
+ def extract_subtitles(self, *args, **kwargs):
+ subtitles = {}
+ list_subtitles = self._downloader.params.get('listsubtitles')
+ if self._downloader.params.get('writesubtitles', False) or list_subtitles:
+ subtitles.update(self._get_subtitles(*args, **kwargs))
+ return subtitles
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError("This method must be implemented by subclasses")
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 10b3b706a..1809eaae4 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
import json
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_str,
)
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -165,9 +165,6 @@ class TEDIE(SubtitlesInfoExtractor):
video_id = compat_str(talk_info['id'])
# subtitles
video_subtitles = self.extract_subtitles(video_id, talk_info)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, talk_info)
- return
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
@@ -183,13 +180,18 @@ class TEDIE(SubtitlesInfoExtractor):
'duration': talk_info.get('duration'),
}
- def _get_available_subtitles(self, video_id, talk_info):
+ def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
- url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
- sub_lang_list[l] = url
+ sub_lang_list[l] = [
+ {
+ 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+ 'ext': ext,
+ }
+ for ext in ['ted', 'srt']
+ ]
return sub_lang_list
else:
self._downloader.report_warning('video doesn\'t have subtitles')