6 files changed, 109 insertions, 25 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 13d18e25e..e665e3d53 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -154,7 +154,7 @@ class YoutubeDL(object):
     allsubtitles:      Downloads all the subtitles of the video
                        (requires writesubtitles or writeautomaticsub)
     listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
+    subtitlesformat:   The format code for subtitles
     subtitleslangs:    List of languages of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
@@ -1019,6 +1019,11 @@ class YoutubeDL(object):
                 info_dict['timestamp'])
             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 
+        if self.params.get('listsubtitles', False):
+            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'))
+            return
+        info_dict['subtitles'] = self.process_subtitles(info_dict['id'], info_dict.get('subtitles'))
+
         # This extractors handle format selection themselves
         if info_dict['extractor'] in ['Youku']:
             if download:
@@ -1147,6 +1152,53 @@ class YoutubeDL(object):
         info_dict.update(formats_to_download[-1])
         return info_dict
 
+    def process_subtitles(self, video_id, available_subs):
+        """Select the requested subtitles and their format"""
+        if not available_subs:
+            return available_subs
+
+        if self.params.get('allsubtitles', False):
+            requested_langs = available_subs.keys()
+        else:
+            if self.params.get('subtitleslangs', False):
+                requested_langs = self.params.get('subtitleslangs')
+            elif 'en' in available_subs:
+                requested_langs = ['en']
+            else:
+                requested_langs = [list(available_subs.keys())[0]]
+
+        formats_query = self.params.get('subtitlesformat', 'best')
+        formats_preference = formats_query.split('/') if formats_query else []
+        subs = {}
+        for lang in requested_langs:
+            formats = available_subs.get(lang)
+            if formats is None:
+                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+                continue
+            if isinstance(formats, compat_str):
+                # TODO: convert all IE with subtitles support to the new format
+                # and remove this
+                subs[lang] = {
+                    'ext': formats_preference[0],
+                    'data': formats,
+                }
+                continue
+            for ext in formats_preference:
+                if ext == 'best':
+                    f = formats[-1]
+                    break
+                matches = list(filter(lambda f: f['ext'] == ext, formats))
+                if matches:
+                    f = matches[-1]
+                    break
+            else:
+                f = formats[-1]
+                self.report_warning(
+                    'No subtitle format found matching "%s" for language %s, '
+                    'using %s' % (formats_query, lang, f['ext']))
+            subs[lang] = f
+        return subs
+
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
 
@@ -1253,11 +1305,18 @@ class YoutubeDL(object):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
             subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat', 'srt')
-            for sub_lang in subtitles.keys():
-                sub = subtitles[sub_lang]
-                if sub is None:
-                    continue
+            for sub_lang, sub_info in subtitles.items():
+                sub_format = sub_info['ext']
+                if sub_info.get('data') is not None:
+                    sub_data = sub_info['data']
+                else:
+                    try:
+                        uf = self.urlopen(sub_info['url'])
+                        sub_data = uf.read().decode('utf-8')
+                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        self.report_warning('Unable to download subtitle for "%s": %s' %
+                                            (sub_lang, compat_str(err)))
+                        continue
                 try:
                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@@ -1265,7 +1324,7 @@ class YoutubeDL(object):
                     else:
                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                            subfile.write(sub)
+                            subfile.write(sub_data)
                 except (OSError, IOError):
                     self.report_error('Cannot write subtitles file ' + sub_filename)
                     return
@@ -1586,6 +1645,18 @@ class YoutubeDL(object):
             ['ID', 'width', 'height', 'URL'],
             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 
+    def list_subtitles(self, video_id, subtitles):
+        if not subtitles:
+            self.to_screen('%s has no subtitles' % video_id)
+            return
+        header_line = 'Language    formats'
+        sub_lines = [
+            '%-12s%s' % (lang, ', '.join(f['ext'] for f in reversed(formats)))
+            for lang, formats in subtitles.items()]
+        self.to_screen(
+            'Available subtitles for %s:\n%s\n%s' %
+            (video_id, header_line, '\n'.join(sub_lines)))
+
     def urlopen(self, req):
         """ Start an HTTP download """
 
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index ed22f169f..5f2585003 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -226,7 +226,6 @@ def _real_main(argv=None):
     if opts.embedsubtitles:
         postprocessors.append({
             'key': 'FFmpegEmbedSubtitle',
-            'subtitlesformat': opts.subtitlesformat,
         })
     if opts.xattrs:
         postprocessors.append({'key': 'XAttrMetadata'})
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index c784eedb9..161c623eb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -151,8 +151,14 @@ class InfoExtractor(object):
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
     location:       Physical location where the video was filmed.
-    subtitles:      The subtitle file contents as a dictionary in the format
-                    {language: subtitles}.
+    subtitles:      The available subtitles as a dictionary in the format
+                    {language: subformats}. "subformats" is a list sorted from
+                    lower to higher preference, each element is a dictionary
+                    with the "ext" entry and one of:
+                        * "data": The subtitles file contents
+                        * "url": A url pointing to the subtitles file
+                    Note: YoutubeDL.extract_info will get the requested
+                    format and replace the "subformats" list with it.
     duration:       Length of the video in seconds, as an integer.
     view_count:     How many users have watched the video on the platform.
     like_count:     Number of positive ratings of the video
@@ -993,6 +999,16 @@ class InfoExtractor(object):
             any_restricted = any_restricted or is_restricted
         return not any_restricted
 
+    def extract_subtitles(self, *args, **kwargs):
+        subtitles = {}
+        list_subtitles = self._downloader.params.get('listsubtitles')
+        if self._downloader.params.get('writesubtitles', False) or list_subtitles:
+            subtitles.update(self._get_subtitles(*args, **kwargs))
+        return subtitles
+
+    def _get_subtitles(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 10b3b706a..1809eaae4 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
 import json
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 
 from ..compat import (
     compat_str,
 )
 
 
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         (?P<proto>https?://)
         (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -165,9 +165,6 @@ class TEDIE(SubtitlesInfoExtractor):
         video_id = compat_str(talk_info['id'])
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, talk_info)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, talk_info)
-            return
 
         thumbnail = talk_info['thumb']
         if not thumbnail.startswith('http'):
@@ -183,13 +180,18 @@ class TEDIE(SubtitlesInfoExtractor):
             'duration': talk_info.get('duration'),
         }
 
-    def _get_available_subtitles(self, video_id, talk_info):
+    def _get_subtitles(self, video_id, talk_info):
         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
         if languages:
             sub_lang_list = {}
             for l in languages:
-                url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
-                sub_lang_list[l] = url
+                sub_lang_list[l] = [
+                    {
+                        'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+                        'ext': ext,
+                    }
+                    for ext in ['ted', 'srt']
+                ]
             return sub_lang_list
         else:
             self._downloader.report_warning('video doesn\'t have subtitles')
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 873432bee..4fcf8c83d 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -387,8 +387,8 @@ def parseOpts(overrideArguments=None):
         help='lists all available subtitles for the video')
     subtitles.add_option(
         '--sub-format',
-        action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
-        help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+        action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+        help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
     subtitles.add_option(
         '--sub-lang', '--sub-langs', '--srt-lang',
         action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 5238ce534..d1bbfbfe3 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -453,10 +453,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         'zu': 'zul',
     }
 
-    def __init__(self, downloader=None, subtitlesformat='srt'):
-        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
-        self._subformat = subtitlesformat
-
     @classmethod
     def _conver_lang_code(cls, code):
         """Convert language code from ISO 639-1 to ISO 639-2/T"""
@@ -472,7 +468,7 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 
         sub_langs = [key for key in information['subtitles']]
         filename = information['filepath']
-        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+        input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in information['subtitles'].items()]
 
         opts = [
             '-map', '0',