aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_subtitles.py24
-rwxr-xr-xyoutube_dl/YoutubeDL.py85
-rw-r--r--youtube_dl/__init__.py1
-rw-r--r--youtube_dl/extractor/common.py20
-rw-r--r--youtube_dl/extractor/ted.py18
-rw-r--r--youtube_dl/options.py4
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py6
7 files changed, 121 insertions, 37 deletions
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index bcc69a778..fbc9eaf4d 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -27,15 +27,23 @@ class BaseTestSubtitles(unittest.TestCase):
def setUp(self):
self.DL = FakeYDL()
- self.ie = self.IE(self.DL)
+ self.ie = self.IE()
+ self.DL.add_info_extractor(self.ie)
def getInfoDict(self):
- info_dict = self.ie.extract(self.url)
+ info_dict = self.DL.extract_info(self.url, download=False)
return info_dict
def getSubtitles(self):
info_dict = self.getInfoDict()
- return info_dict['subtitles']
+ subtitles = info_dict['subtitles']
+ if not subtitles:
+ return subtitles
+ for sub_info in subtitles.values():
+ if sub_info.get('data') is None:
+ uf = self.DL.urlopen(sub_info['url'])
+ sub_info['data'] = uf.read().decode('utf-8')
+ return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
class TestYoutubeSubtitles(BaseTestSubtitles):
@@ -176,7 +184,7 @@ class TestTedSubtitles(BaseTestSubtitles):
def test_no_writesubtitles(self):
subtitles = self.getSubtitles()
- self.assertEqual(subtitles, None)
+ self.assertFalse(subtitles)
def test_subtitles(self):
self.DL.params['writesubtitles'] = True
@@ -196,18 +204,10 @@ class TestTedSubtitles(BaseTestSubtitles):
self.assertTrue(len(subtitles.keys()) >= 28)
def test_list_subtitles(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
self.DL.params['listsubtitles'] = True
info_dict = self.getInfoDict()
self.assertEqual(info_dict, None)
- def test_automatic_captions(self):
- self.DL.expect_warning('Automatic Captions not supported by this server')
- self.DL.params['writeautomaticsub'] = True
- self.DL.params['subtitleslang'] = ['en']
- subtitles = self.getSubtitles()
- self.assertTrue(len(subtitles.keys()) == 0)
-
def test_multiple_langs(self):
self.DL.params['writesubtitles'] = True
langs = ['es', 'fr', 'de']
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 13d18e25e..e665e3d53 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -154,7 +154,7 @@ class YoutubeDL(object):
allsubtitles: Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
- subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
+ subtitlesformat: The format code for subtitles
subtitleslangs: List of languages of the subtitles to download
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
@@ -1019,6 +1019,11 @@ class YoutubeDL(object):
info_dict['timestamp'])
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ if self.params.get('listsubtitles', False):
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'))
+ return
+ info_dict['subtitles'] = self.process_subtitles(info_dict['id'], info_dict.get('subtitles'))
+
# This extractors handle format selection themselves
if info_dict['extractor'] in ['Youku']:
if download:
@@ -1147,6 +1152,53 @@ class YoutubeDL(object):
info_dict.update(formats_to_download[-1])
return info_dict
+ def process_subtitles(self, video_id, available_subs):
+ """Select the requested subtitles and their format"""
+ if not available_subs:
+ return available_subs
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ if isinstance(formats, compat_str):
+ # TODO: convert all IE with subtitles support to the new format
+ # and remove this
+ subs[lang] = {
+ 'ext': formats_preference[0],
+ 'data': formats,
+ }
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
@@ -1253,11 +1305,18 @@ class YoutubeDL(object):
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['subtitles']
- sub_format = self.params.get('subtitlesformat', 'srt')
- for sub_lang in subtitles.keys():
- sub = subtitles[sub_lang]
- if sub is None:
- continue
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ if sub_info.get('data') is not None:
+ sub_data = sub_info['data']
+ else:
+ try:
+ uf = self.urlopen(sub_info['url'])
+ sub_data = uf.read().decode('utf-8')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, compat_str(err)))
+ continue
try:
sub_filename = subtitles_filename(filename, sub_lang, sub_format)
if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@@ -1265,7 +1324,7 @@ class YoutubeDL(object):
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
- subfile.write(sub)
+ subfile.write(sub_data)
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
@@ -1586,6 +1645,18 @@ class YoutubeDL(object):
['ID', 'width', 'height', 'URL'],
[[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+ def list_subtitles(self, video_id, subtitles):
+ if not subtitles:
+ self.to_screen('%s has no subtitles' % video_id)
+ return
+ header_line = 'Language formats'
+ sub_lines = [
+ '%-12s%s' % (lang, ', '.join(f['ext'] for f in reversed(formats)))
+ for lang, formats in subtitles.items()]
+ self.to_screen(
+ 'Available subtitles for %s:\n%s\n%s' %
+ (video_id, header_line, '\n'.join(sub_lines)))
+
def urlopen(self, req):
""" Start an HTTP download """
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index ed22f169f..5f2585003 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -226,7 +226,6 @@ def _real_main(argv=None):
if opts.embedsubtitles:
postprocessors.append({
'key': 'FFmpegEmbedSubtitle',
- 'subtitlesformat': opts.subtitlesformat,
})
if opts.xattrs:
postprocessors.append({'key': 'XAttrMetadata'})
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index c784eedb9..161c623eb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -151,8 +151,14 @@ class InfoExtractor(object):
If not explicitly set, calculated from timestamp.
uploader_id: Nickname or id of the video uploader.
location: Physical location where the video was filmed.
- subtitles: The subtitle file contents as a dictionary in the format
- {language: subtitles}.
+ subtitles: The available subtitles as a dictionary in the format
+ {language: subformats}. "subformats" is a list sorted from
+ lower to higher preference, each element is a dictionary
+ with the "ext" entry and one of:
+ * "data": The subtitles file contents
+ * "url": A url pointing to the subtitles file
+ Note: YoutubeDL.extract_info will get the requested
+ format and replace the "subformats" list with it.
duration: Length of the video in seconds, as an integer.
view_count: How many users have watched the video on the platform.
like_count: Number of positive ratings of the video
@@ -993,6 +999,16 @@ class InfoExtractor(object):
any_restricted = any_restricted or is_restricted
return not any_restricted
+ def extract_subtitles(self, *args, **kwargs):
+ subtitles = {}
+ list_subtitles = self._downloader.params.get('listsubtitles')
+ if self._downloader.params.get('writesubtitles', False) or list_subtitles:
+ subtitles.update(self._get_subtitles(*args, **kwargs))
+ return subtitles
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError("This method must be implemented by subclasses")
+
class SearchInfoExtractor(InfoExtractor):
"""
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 10b3b706a..1809eaae4 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
import json
import re
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
from ..compat import (
compat_str,
)
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
_VALID_URL = r'''(?x)
(?P<proto>https?://)
(?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -165,9 +165,6 @@ class TEDIE(SubtitlesInfoExtractor):
video_id = compat_str(talk_info['id'])
# subtitles
video_subtitles = self.extract_subtitles(video_id, talk_info)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, talk_info)
- return
thumbnail = talk_info['thumb']
if not thumbnail.startswith('http'):
@@ -183,13 +180,18 @@ class TEDIE(SubtitlesInfoExtractor):
'duration': talk_info.get('duration'),
}
- def _get_available_subtitles(self, video_id, talk_info):
+ def _get_subtitles(self, video_id, talk_info):
languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
if languages:
sub_lang_list = {}
for l in languages:
- url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
- sub_lang_list[l] = url
+ sub_lang_list[l] = [
+ {
+ 'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+ 'ext': ext,
+ }
+ for ext in ['ted', 'srt']
+ ]
return sub_lang_list
else:
self._downloader.report_warning('video doesn\'t have subtitles')
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 873432bee..4fcf8c83d 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -387,8 +387,8 @@ def parseOpts(overrideArguments=None):
help='lists all available subtitles for the video')
subtitles.add_option(
'--sub-format',
- action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
- help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+ help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
subtitles.add_option(
'--sub-lang', '--sub-langs', '--srt-lang',
action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 5238ce534..d1bbfbfe3 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -453,10 +453,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
'zu': 'zul',
}
- def __init__(self, downloader=None, subtitlesformat='srt'):
- super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
- self._subformat = subtitlesformat
-
@classmethod
def _conver_lang_code(cls, code):
"""Convert language code from ISO 639-1 to ISO 639-2/T"""
@@ -472,7 +468,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
sub_langs = [key for key in information['subtitles']]
filename = information['filepath']
- input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+ input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in information['subtitles'].items()]
opts = [
'-map', '0',