Merge branch 'subtitles-rework'

(Closes PR #4964)
author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-02-23 17:13:03 +0100
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-02-23 17:13:03 +0100
commit: bfc993cc9183d5f001e30267551bcdf9f0a98be9 (patch)
tree: 9408dc1e760394afdf8cbf6f48157ec22d7f3a74 /youtube_dl
parent: 4432db35d9ddd0e6777df6c596d8637514ba0b56 (diff)
parent: b531cfc019576b682f930bd269f68eb87cfd5abf (diff)
25 files changed, 347 insertions, 388 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index ca7c3f5c6..76fc394bc 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -154,7 +154,7 @@ class YoutubeDL(object):
     allsubtitles:      Downloads all the subtitles of the video
                        (requires writesubtitles or writeautomaticsub)
     listsubtitles:     Lists all available subtitles for the video
-    subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
+    subtitlesformat:   The format code for subtitles
     subtitleslangs:    List of languages of the subtitles to download
     keepvideo:         Keep the video file after post-processing
     daterange:         A DateRange object, download only if the upload_date is in the range.
@@ -1008,6 +1008,15 @@ class YoutubeDL(object):
                 info_dict['timestamp'])
             info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 
+        if self.params.get('listsubtitles', False):
+            if 'automatic_captions' in info_dict:
+                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+            return
+        info_dict['requested_subtitles'] = self.process_subtitles(
+            info_dict['id'], info_dict.get('subtitles'),
+            info_dict.get('automatic_captions'))
+
         # This extractors handle format selection themselves
         if info_dict['extractor'] in ['Youku']:
             if download:
@@ -1136,6 +1145,55 @@ class YoutubeDL(object):
         info_dict.update(formats_to_download[-1])
         return info_dict
 
+    def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+        """Select the requested subtitles and their format"""
+        available_subs = {}
+        if normal_subtitles and self.params.get('writesubtitles'):
+            available_subs.update(normal_subtitles)
+        if automatic_captions and self.params.get('writeautomaticsub'):
+            for lang, cap_info in automatic_captions.items():
+                if lang not in available_subs:
+                    available_subs[lang] = cap_info
+
+        if (not self.params.get('writesubtitles') and not
+                self.params.get('writeautomaticsub') or not
+                available_subs):
+            return None
+
+        if self.params.get('allsubtitles', False):
+            requested_langs = available_subs.keys()
+        else:
+            if self.params.get('subtitleslangs', False):
+                requested_langs = self.params.get('subtitleslangs')
+            elif 'en' in available_subs:
+                requested_langs = ['en']
+            else:
+                requested_langs = [list(available_subs.keys())[0]]
+
+        formats_query = self.params.get('subtitlesformat', 'best')
+        formats_preference = formats_query.split('/') if formats_query else []
+        subs = {}
+        for lang in requested_langs:
+            formats = available_subs.get(lang)
+            if formats is None:
+                self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+                continue
+            for ext in formats_preference:
+                if ext == 'best':
+                    f = formats[-1]
+                    break
+                matches = list(filter(lambda f: f['ext'] == ext, formats))
+                if matches:
+                    f = matches[-1]
+                    break
+            else:
+                f = formats[-1]
+                self.report_warning(
+                    'No subtitle format found matching "%s" for language %s, '
+                    'using %s' % (formats_query, lang, f['ext']))
+            subs[lang] = f
+        return subs
+
     def process_info(self, info_dict):
         """Process a single resolved IE result."""
 
@@ -1238,15 +1296,22 @@ class YoutubeDL(object):
         subtitles_are_requested = any([self.params.get('writesubtitles', False),
                                        self.params.get('writeautomaticsub')])
 
-        if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
+        if subtitles_are_requested and info_dict.get('requested_subtitles'):
             # subtitles download errors are already managed as troubles in relevant IE
             # that way it will silently go on when used with unsupporting IE
-            subtitles = info_dict['subtitles']
-            sub_format = self.params.get('subtitlesformat', 'srt')
-            for sub_lang in subtitles.keys():
-                sub = subtitles[sub_lang]
-                if sub is None:
-                    continue
+            subtitles = info_dict['requested_subtitles']
+            for sub_lang, sub_info in subtitles.items():
+                sub_format = sub_info['ext']
+                if sub_info.get('data') is not None:
+                    sub_data = sub_info['data']
+                else:
+                    try:
+                        uf = self.urlopen(sub_info['url'])
+                        sub_data = uf.read().decode('utf-8')
+                    except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                        self.report_warning('Unable to download subtitle for "%s": %s' %
+                                            (sub_lang, compat_str(err)))
+                        continue
                 try:
                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
@@ -1254,7 +1319,7 @@ class YoutubeDL(object):
                     else:
                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
-                            subfile.write(sub)
+                            subfile.write(sub_data)
                 except (OSError, IOError):
                     self.report_error('Cannot write subtitles file ' + sub_filename)
                     return
@@ -1564,6 +1629,17 @@ class YoutubeDL(object):
             ['ID', 'width', 'height', 'URL'],
             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 
+    def list_subtitles(self, video_id, subtitles, name='subtitles'):
+        if not subtitles:
+            self.to_screen('%s has no %s' % (video_id, name))
+            return
+        self.to_screen(
+            'Available %s for %s:' % (name, video_id))
+        self.to_screen(render_table(
+            ['Language', 'formats'],
+            [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+                for lang, formats in subtitles.items()]))
+
     def urlopen(self, req):
         """ Start an HTTP download """
 
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 25ab3fdfe..5ce201800 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -226,7 +226,6 @@ def _real_main(argv=None):
     if opts.embedsubtitles:
         postprocessors.append({
             'key': 'FFmpegEmbedSubtitle',
-            'subtitlesformat': opts.subtitlesformat,
         })
     if opts.xattrs:
         postprocessors.append({'key': 'XAttrMetadata'})
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
index f016368fa..7669e0e3d 100644
--- a/youtube_dl/extractor/atresplayer.py
+++ b/youtube_dl/extractor/atresplayer.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 import time
 import hmac
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..compat import (
     compat_str,
     compat_urllib_parse,
@@ -17,7 +17,7 @@ from ..utils import (
 )
 
 
-class AtresPlayerIE(SubtitlesInfoExtractor):
+class AtresPlayerIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
     _TESTS = [
         {
@@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
         thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
 
         subtitles = {}
-        subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
-        if subtitle:
-            subtitles['es'] = subtitle
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
+        if subtitle_url:
+            subtitles['es'] = [{
+                'ext': 'srt',
+                'url': subtitle_url,
+            }]
 
         return {
             'id': video_id,
@@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor):
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
-            'subtitles': self.extract_subtitles(video_id, subtitles),
+            'subtitles': subtitles,
         }
diff --git a/youtube_dl/extractor/bbccouk.py b/youtube_dl/extractor/bbccouk.py
index f23e39545..abc34a576 100644
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -2,12 +2,12 @@ from __future__ import unicode_literals
 
 import xml.etree.ElementTree
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..utils import ExtractorError
 from ..compat import compat_HTTPError
 
 
-class BBCCoUkIE(SubtitlesInfoExtractor):
+class BBCCoUkIE(InfoExtractor):
     IE_NAME = 'bbc.co.uk'
     IE_DESC = 'BBC iPlayer'
     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
@@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             formats.extend(conn_formats)
         return formats
 
-    def _extract_captions(self, media, programme_id):
+    def _get_subtitles(self, media, programme_id):
         subtitles = {}
         for connection in self._extract_connections(media):
             captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
             lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
             ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
             srt = ''
+
+            def _extract_text(p):
+                if p.text is not None:
+                    stripped_text = p.text.strip()
+                    if stripped_text:
+                        return stripped_text
+                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
             for pos, p in enumerate(ps):
-                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
-                                                          p.text.strip() if p.text is not None else '')
-            subtitles[lang] = srt
+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
+            subtitles[lang] = [
+                {
+                    'url': connection.get('href'),
+                    'ext': 'ttml',
+                },
+                {
+                    'data': srt,
+                    'ext': 'srt',
+                },
+            ]
         return subtitles
 
     def _download_media_selector(self, programme_id):
@@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
             elif kind == 'video':
                 formats.extend(self._extract_video(media, programme_id))
             elif kind == 'captions':
-                subtitles = self._extract_captions(media, programme_id)
+                subtitles = self.extract_subtitles(media, programme_id)
 
         return formats, subtitles
 
@@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
         else:
             programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
 
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(programme_id, subtitles)
-            return
-
         self._sort_formats(formats)
 
         return {
diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py
index 436cc5155..8c7ba4b91 100644
--- a/youtube_dl/extractor/bliptv.py
+++ b/youtube_dl/extractor/bliptv.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
 
 from ..compat import (
     compat_str,
@@ -18,7 +17,7 @@ from ..utils import (
 )
 
 
-class BlipTVIE(SubtitlesInfoExtractor):
+class BlipTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))'
 
     _TESTS = [
@@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
         categories = [category.text for category in item.findall('category')]
 
         formats = []
-        subtitles = {}
+        subtitles_urls = {}
 
         media_group = item.find(media('group'))
         for media_content in media_group.findall(media('content')):
@@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
                 }
                 lang = role.rpartition('-')[-1].strip().lower()
                 langcode = LANGS.get(lang, lang)
-                subtitles[langcode] = url
+                subtitles_urls[langcode] = url
             elif media_type.startswith('video/'):
                 formats.append({
                     'url': real_url,
@@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor):
                 })
         self._sort_formats(formats)
 
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, subtitles)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitles = self.extract_subtitles(video_id, subtitles_urls)
 
         return {
             'id': video_id,
@@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor):
             'thumbnail': thumbnail,
             'categories': categories,
             'formats': formats,
-            'subtitles': video_subtitles,
+            'subtitles': subtitles,
         }
 
-    def _download_subtitle_url(self, sub_lang, url):
-        # For some weird reason, blip.tv serves a video instead of subtitles
-        # when we request with a common UA
-        req = compat_urllib_request.Request(url)
-        req.add_header('User-Agent', 'youtube-dl')
-        return self._download_webpage(req, None, note=False)
+    def _get_subtitles(self, video_id, subtitles_urls):
+        subtitles = {}
+        for lang, url in subtitles_urls.items():
+            # For some weird reason, blip.tv serves a video instead of subtitles
+            # when we request with a common UA
+            req = compat_urllib_request.Request(url)
+            req.add_header('User-Agent', 'youtube-dl')
+            subtitles[lang] = [{
+                # The extension is 'srt' but it's actually an 'ass' file
+                'ext': 'ass',
+                'data': self._download_webpage(req, None, note=False),
+            }]
+        return subtitles
 
 
 class BlipTVUserIE(InfoExtractor):
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
index f70e090bb..65f6be623 100644
--- a/youtube_dl/extractor/ceskatelevize.py
+++ b/youtube_dl/extractor/ceskatelevize.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..compat import (
     compat_urllib_request,
     compat_urllib_parse,
@@ -15,7 +15,7 @@ from ..utils import (
 )
 
 
-class CeskaTelevizeIE(SubtitlesInfoExtractor):
+class CeskaTelevizeIE(InfoExtractor):
     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)'
 
     _TESTS = [
@@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
         subtitles = {}
         subs = item.get('subtitles')
         if subs:
-            subtitles['cs'] = subs[0]['url']
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles))
+            subtitles = self.extract_subtitles(episode_id, subs)
 
         return {
             'id': episode_id,
@@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
             'subtitles': subtitles,
         }
 
+    def _get_subtitles(self, episode_id, subs):
+        original_subtitles = self._download_webpage(
+            subs[0]['url'], episode_id, 'Downloading subtitles')
+        srt_subs = self._fix_subtitles(original_subtitles)
+        return {
+            'cs': [{
+                'ext': 'srt',
+                'data': srt_subs,
+            }]
+        }
+
     @staticmethod
     def _fix_subtitles(subtitles):
         """ Convert millisecond-based subtitles to SRT """
-        if subtitles is None:
-            return subtitles  # subtitles not requested
 
         def _msectotimecode(msec):
             """ Helper utility to convert milliseconds to timecode """
@@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor):
                 else:
                     yield line
 
-        fixed_subtitles = {}
-        for k, v in subtitles.items():
-            fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v))
-        return fixed_subtitles
+        return "\r\n".join(_fix_subtitle(subtitles))
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 79f6d199b..87fce9cd8 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -150,8 +150,14 @@ class InfoExtractor(object):
                     If not explicitly set, calculated from timestamp.
     uploader_id:    Nickname or id of the video uploader.
     location:       Physical location where the video was filmed.
-    subtitles:      The subtitle file contents as a dictionary in the format
-                    {language: subtitles}.
+    subtitles:      The available subtitles as a dictionary in the format
+                    {language: subformats}. "subformats" is a list sorted from
+                    lower to higher preference, each element is a dictionary
+                    with the "ext" entry and one of:
+                        * "data": The subtitles file contents
+                        * "url": A url pointing to the subtitles file
+    automatic_captions: Like 'subtitles', used by the YoutubeIE for
+                    automatically generated captions
     duration:       Length of the video in seconds, as an integer.
     view_count:     How many users have watched the video on the platform.
     like_count:     Number of positive ratings of the video
@@ -1011,6 +1017,24 @@ class InfoExtractor(object):
             any_restricted = any_restricted or is_restricted
         return not any_restricted
 
+    def extract_subtitles(self, *args, **kwargs):
+        if (self._downloader.params.get('writesubtitles', False) or
+                self._downloader.params.get('listsubtitles')):
+            return self._get_subtitles(*args, **kwargs)
+        return {}
+
+    def _get_subtitles(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
+    def extract_automatic_captions(self, *args, **kwargs):
+        if (self._downloader.params.get('writeautomaticsub', False) or
+                self._downloader.params.get('listsubtitles')):
+            return self._get_automatic_captions(*args, **kwargs)
+        return {}
+
+    def _get_automatic_captions(self, *args, **kwargs):
+        raise NotImplementedError("This method must be implemented by subclasses")
+
 
 class SearchInfoExtractor(InfoExtractor):
     """
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 1680f532f..f1da7d09b 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -9,7 +9,7 @@ import xml.etree.ElementTree
 
 from hashlib import sha1
 from math import pow, sqrt, floor
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
@@ -25,10 +25,9 @@ from ..aes import (
     aes_cbc_decrypt,
     inc,
 )
-from .common import InfoExtractor
 
 
-class CrunchyrollIE(SubtitlesInfoExtractor):
+class CrunchyrollIE(InfoExtractor):
     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
@@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
         return output
 
+    def _get_subtitles(self, video_id, webpage):
+        subtitles = {}
+        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
+            sub_page = self._download_webpage(
+                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
+                video_id, note='Downloading subtitles for ' + sub_name)
+            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
+            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
+            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
+            if not id or not iv or not data:
+                continue
+            id = int(id)
+            iv = base64.b64decode(iv)
+            data = base64.b64decode(data)
+
+            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+            if not lang_code:
+                continue
+            sub_root = xml.etree.ElementTree.fromstring(subtitle)
+            subtitles[lang_code] = [
+                {
+                    'ext': 'srt',
+                    'data': self._convert_subtitles_to_srt(sub_root),
+                },
+                {
+                    'ext': 'ass',
+                    'data': self._convert_subtitles_to_ass(sub_root),
+                },
+            ]
+        return subtitles
+
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
         video_id = mobj.group('video_id')
@@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'format_id': video_format,
             })
 
-        subtitles = {}
-        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
-        for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage):
-            sub_page = self._download_webpage(
-                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
-                video_id, note='Downloading subtitles for ' + sub_name)
-            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
-            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
-            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
-            if not id or not iv or not data:
-                continue
-            id = int(id)
-            iv = base64.b64decode(iv)
-            data = base64.b64decode(data)
-
-            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
-            lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
-            if not lang_code:
-                continue
-            sub_root = xml.etree.ElementTree.fromstring(subtitle)
-            if sub_format == 'ass':
-                subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root)
-            else:
-                subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root)
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+        subtitles = self.extract_subtitles(video_id, webpage)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index b2dbf4a92..42b20a46d 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -6,7 +6,6 @@ import json
 import itertools
 
 from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
 
 from ..compat import (
     compat_str,
@@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor):
         return request
 
 
-class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
+class DailymotionIE(DailymotionBaseInfoExtractor):
     """Information Extractor for Dailymotion"""
 
     _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)'
@@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, webpage)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, webpage)
-            return
 
         view_count = str_to_int(self._search_regex(
             r'video_views_count[^>]+>\s+([\d\.,]+)',
@@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             'view_count': view_count,
         }
 
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, webpage):
         try:
             sub_list = self._download_webpage(
                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
@@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor):
             return {}
         info = json.loads(sub_list)
         if (info['total'] > 0):
-            sub_lang_list = dict((l['language'], l['url']) for l in info['list'])
+            sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
             return sub_lang_list
         self._downloader.report_warning('video doesn\'t have subtitles')
         return {}
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
index d5df18d7c..8257e35a4 100644
--- a/youtube_dl/extractor/drtv.py
+++ b/youtube_dl/extractor/drtv.py
@@ -1,11 +1,10 @@
 from __future__ import unicode_literals
 
-from .subtitles import SubtitlesInfoExtractor
-from .common import ExtractorError
+from .common import InfoExtractor, ExtractorError
 from ..utils import parse_iso8601
 
 
-class DRTVIE(SubtitlesInfoExtractor):
+class DRTVIE(InfoExtractor):
     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
 
     _TEST = {
@@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor):
                     }
                     for subs in subtitles_list:
                         lang = subs['Language']
-                        subtitles[LANGS.get(lang, lang)] = subs['Uri']
+                        subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}]
 
         if not formats and restricted_to_denmark:
             raise ExtractorError(
@@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor):
 
         self._sort_formats(formats)
 
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
         return {
             'id': video_id,
             'title': title,
@@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor):
             'timestamp': timestamp,
             'duration': duration,
             'formats': formats,
-            'subtitles': self.extract_subtitles(video_id, subtitles),
+            'subtitles': subtitles,
         }
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py
index 762cefa34..109055e72 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dl/extractor/lynda.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 import re
 import json
 
-from .subtitles import SubtitlesInfoExtractor
 from .common import InfoExtractor
 from ..compat import (
     compat_str,
@@ -16,7 +15,7 @@ from ..utils import (
 )
 
 
-class LyndaIE(SubtitlesInfoExtractor):
+class LyndaIE(InfoExtractor):
     IE_NAME = 'lynda'
     IE_DESC = 'lynda.com videos'
     _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
@@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor):
         self._check_formats(formats, video_id)
         self._sort_formats(formats)
 
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, page)
-            return
-
-        subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page))
+        subtitles = self.extract_subtitles(video_id, page)
 
         return {
             'id': video_id,
@@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor):
         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None:
             raise ExtractorError('Unable to log in')
 
-    def _fix_subtitles(self, subtitles):
-        if subtitles is None:
-            return subtitles  # subtitles not requested
-
-        fixed_subtitles = {}
-        for k, v in subtitles.items():
-            subs = json.loads(v)
-            if len(subs) == 0:
+    def _fix_subtitles(self, subs):
+        srt = ''
+        for pos in range(0, len(subs) - 1):
+            seq_current = subs[pos]
+            m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
+            if m_current is None:
                 continue
-            srt = ''
-            for pos in range(0, len(subs) - 1):
-                seq_current = subs[pos]
-                m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode'])
-                if m_current is None:
-                    continue
-                seq_next = subs[pos + 1]
-                m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
-                if m_next is None:
-                    continue
-                appear_time = m_current.group('timecode')
-                disappear_time = m_next.group('timecode')
-                text = seq_current['Caption']
-                srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
-            if srt:
-                fixed_subtitles[k] = srt
-        return fixed_subtitles
-
-    def _get_available_subtitles(self, video_id, webpage):
+            seq_next = subs[pos + 1]
+            m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode'])
+            if m_next is None:
+                continue
+            appear_time = m_current.group('timecode')
+            disappear_time = m_next.group('timecode')
+            text = seq_current['Caption']
+            srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text)
+        if srt:
+            return srt
+
+    def _get_subtitles(self, video_id, webpage):
         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
-        sub = self._download_webpage(url, None, False)
-        sub_json = json.loads(sub)
-        return {'en': url} if len(sub_json) > 0 else {}
+        subs = self._download_json(url, None, False)
+        if subs:
+            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
+        else:
+            return {}
 
 
 class LyndaCourseIE(InfoExtractor):
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
index 3c61a850f..d7ab6a9ae 100644
--- a/youtube_dl/extractor/mit.py
+++ b/youtube_dl/extractor/mit.py
@@ -5,9 +5,6 @@ import json
 
 from .common import InfoExtractor
 from .youtube import YoutubeIE
-from ..compat import (
-    compat_urlparse,
-)
 from ..utils import (
     clean_html,
     ExtractorError,
@@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor):
                 'upload_date': '20121109',
                 'uploader_id': 'MIT',
                 'uploader': 'MIT OpenCourseWare',
-                # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt'
             }
         },
         {
@@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor):
                 'uploader_id': 'MIT',
                 'uploader': 'MIT OpenCourseWare',
                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
-                # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT'
             }
         }
     ]
@@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor):
             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
             metadata = re.split(r', ?', metadata)
             yt = metadata[1]
-            subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7])
         else:
             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
@@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor):
                 metadata = re.sub(r'[\'"]', '', embed_media.group(1))
                 metadata = re.split(r', ?', metadata)
                 yt = metadata[1]
-                subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5])
             else:
                 raise ExtractorError('Unable to find embedded YouTube video.')
         video_id = YoutubeIE.extract_id(yt)
@@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor):
             'title': title,
             'description': description,
             'url': yt,
-            'url_transparent'
-            'subtitles': subs,
             'ie_key': 'Youtube',
         }
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
index bc7f49ebb..c11de1cb6 100644
--- a/youtube_dl/extractor/mtv.py
+++ b/youtube_dl/extractor/mtv.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
     compat_urllib_request,
@@ -23,7 +23,7 @@ def _media_xml_tag(tag):
     return '{http://search.yahoo.com/mrss/}%s' % tag
 
 
-class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
+class MTVServicesInfoExtractor(InfoExtractor):
     _MOBILE_TEMPLATE = None
 
     @staticmethod
@@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
 
     def _extract_subtitles(self, mdoc, mtvn_id):
         subtitles = {}
-        FORMATS = {
-            'scc': 'cea-608',
-            'eia-608': 'cea-608',
-            'xml': 'ttml',
-        }
-        subtitles_format = FORMATS.get(
-            self._downloader.params.get('subtitlesformat'), 'ttml')
         for transcript in mdoc.findall('.//transcript'):
             if transcript.get('kind') != 'captions':
                 continue
             lang = transcript.get('srclang')
-            for typographic in transcript.findall('./typographic'):
-                captions_format = typographic.get('format')
-                if captions_format == subtitles_format:
-                    subtitles[lang] = compat_str(typographic.get('src'))
-                    break
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(mtvn_id, subtitles)
-        return self.extract_subtitles(mtvn_id, subtitles)
+            subtitles[lang] = [{
+                'url': compat_str(typographic.get('src')),
+                'ext': typographic.get('format')
+            } for typographic in transcript.findall('./typographic')]
+        return subtitles
 
     def _get_video_info(self, itemdoc):
         uri = itemdoc.find('guid').text
@@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):
                 webpage, 'mgid')
 
         videos_info = self._get_videos_info(mgid)
-        if self._downloader.params.get('listsubtitles', False):
-            return
         return videos_info
 
 
diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py
index c075618e8..9c01eb0af 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dl/extractor/npo.py
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-from .subtitles import SubtitlesInfoExtractor
 from .common import InfoExtractor
 from ..utils import (
     fix_xml_ampersands,
@@ -12,7 +11,7 @@ from ..utils import (
 )
 
 
-class NPOBaseIE(SubtitlesInfoExtractor):
+class NPOBaseIE(InfoExtractor):
     def _get_token(self, video_id):
         token_page = self._download_webpage(
             'http://ida.omroep.nl/npoplayer/i.js',
@@ -164,13 +163,10 @@ class NPOIE(NPOBaseIE):
 
         subtitles = {}
         if metadata.get('tt888') == 'ja':
-            subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+            subtitles['nl'] = [{
+                'ext': 'vtt',
+                'url': 'http://e.omroep.nl/tt888/%s' % video_id,
+            }]
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
index f6de26022..46f493cfc 100644
--- a/youtube_dl/extractor/nrk.py
+++ b/youtube_dl/extractor/nrk.py
@@ -10,7 +10,6 @@ from ..utils import (
     parse_duration,
     unified_strdate,
 )
-from .subtitles import SubtitlesInfoExtractor
 
 
 class NRKIE(InfoExtractor):
@@ -73,7 +72,7 @@ class NRKIE(InfoExtractor):
         }
 
 
-class NRKTVIE(SubtitlesInfoExtractor):
+class NRKTVIE(InfoExtractor):
     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 
     _TESTS = [
@@ -156,7 +155,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
         if self._downloader.params.get('verbose', False):
             self.to_screen('[debug] %s' % txt)
 
-    def _extract_captions(self, subtitlesurl, video_id, baseurl):
+    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
         url = "%s%s" % (baseurl, subtitlesurl)
         self._debug_print('%s: Subtitle url: %s' % (video_id, url))
         captions = self._download_xml(url, video_id, 'Downloading subtitles')
@@ -170,7 +169,10 @@ class NRKTVIE(SubtitlesInfoExtractor):
             endtime = self._seconds2str(begin + duration)
             text = '\n'.join(p.itertext())
             srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
-        return {lang: srt}
+        return {lang: [
+            {'ext': 'ttml', 'url': url},
+            {'ext': 'srt', 'data': srt},
+        ]}
 
     def _extract_f4m(self, manifest_url, video_id):
         return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@@ -243,10 +245,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
             webpage, 'subtitle URL', default=None)
         subtitles = None
         if subtitles_url:
-            subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+            subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
 
         return {
             'id': video_id,
diff --git a/youtube_dl/extractor/rai.py b/youtube_dl/extractor/rai.py
index aa26b7e0b..144e33982 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dl/extractor/rai.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals
 
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse,
 )
@@ -12,7 +12,7 @@ from ..utils import (
 )
 
 
-class RaiIE(SubtitlesInfoExtractor):
+class RaiIE(InfoExtractor):
     _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)'
     _TESTS = [
         {
@@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor):
                 'ext': 'mp4',
             })
 
-        if self._downloader.params.get('listsubtitles', False):
-            page = self._download_webpage(url, video_id)
-            self._list_available_subtitles(video_id, page)
-            return
-
-        subtitles = {}
-        if self._have_to_download_any_subtitles:
-            page = self._download_webpage(url, video_id)
-            subtitles = self.extract_subtitles(video_id, page)
+        subtitles = self.extract_subtitles(video_id, url)
 
         return {
             'id': video_id,
@@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor):
             'subtitles': subtitles,
         }
 
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, url):
+        webpage = self._download_webpage(url, video_id)
         subtitles = {}
         m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage)
         if m:
@@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor):
             SRT_EXT = '.srt'
             if captions.endswith(STL_EXT):
                 captions = captions[:-len(STL_EXT)] + SRT_EXT
-            subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions)
+            subtitles['it'] = [{
+                'ext': 'srt',
+                'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions),
+            }]
         return subtitles
diff --git a/youtube_dl/extractor/subtitles.py b/youtube_dl/extractor/subtitles.py
deleted file mode 100644
index 59a51268d..000000000
--- a/youtube_dl/extractor/subtitles.py
+++ /dev/null
@@ -1,99 +0,0 @@
-from __future__ import unicode_literals
-from .common import InfoExtractor
-
-from ..compat import compat_str
-from ..utils import (
-    ExtractorError,
-)
-
-
-class SubtitlesInfoExtractor(InfoExtractor):
-    @property
-    def _have_to_download_any_subtitles(self):
-        return any([self._downloader.params.get('writesubtitles', False),
-                    self._downloader.params.get('writeautomaticsub')])
-
-    def _list_available_subtitles(self, video_id, webpage):
-        """ outputs the available subtitles for the video """
-        sub_lang_list = self._get_available_subtitles(video_id, webpage)
-        auto_captions_list = self._get_available_automatic_caption(video_id, webpage)
-        sub_lang = ",".join(list(sub_lang_list.keys()))
-        self.to_screen('%s: Available subtitles for video: %s' %
-                       (video_id, sub_lang))
-        auto_lang = ",".join(auto_captions_list.keys())
-        self.to_screen('%s: Available automatic captions for video: %s' %
-                       (video_id, auto_lang))
-
-    def extract_subtitles(self, video_id, webpage):
-        """
-        returns {sub_lang: sub} ,{} if subtitles not found or None if the
-        subtitles aren't requested.
-        """
-        if not self._have_to_download_any_subtitles:
-            return None
-        available_subs_list = {}
-        if self._downloader.params.get('writeautomaticsub', False):
-            available_subs_list.update(self._get_available_automatic_caption(video_id, webpage))
-        if self._downloader.params.get('writesubtitles', False):
-            available_subs_list.update(self._get_available_subtitles(video_id, webpage))
-
-        if not available_subs_list:  # error, it didn't get the available subtitles
-            return {}
-        if self._downloader.params.get('allsubtitles', False):
-            sub_lang_list = available_subs_list
-        else:
-            if self._downloader.params.get('subtitleslangs', False):
-                requested_langs = self._downloader.params.get('subtitleslangs')
-            elif 'en' in available_subs_list:
-                requested_langs = ['en']
-            else:
-                requested_langs = [list(available_subs_list.keys())[0]]
-
-            sub_lang_list = {}
-            for sub_lang in requested_langs:
-                if sub_lang not in available_subs_list:
-                    self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang)
-                    continue
-                sub_lang_list[sub_lang] = available_subs_list[sub_lang]
-
-        subtitles = {}
-        for sub_lang, url in sub_lang_list.items():
-            subtitle = self._request_subtitle_url(sub_lang, url)
-            if subtitle:
-                subtitles[sub_lang] = subtitle
-        return subtitles
-
-    def _download_subtitle_url(self, sub_lang, url):
-        return self._download_webpage(url, None, note=False)
-
-    def _request_subtitle_url(self, sub_lang, url):
-        """ makes the http request for the subtitle """
-        try:
-            sub = self._download_subtitle_url(sub_lang, url)
-        except ExtractorError as err:
-            self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err)))
-            return
-        if not sub:
-            self._downloader.report_warning('Did not fetch video subtitles')
-            return
-        return sub
-
-    def _get_available_subtitles(self, video_id, webpage):
-        """
-        returns {sub_lang: url} or {} if not available
-        Must be redefined by the subclasses
-        """
-
-        # By default, allow implementations to simply pass in the result
-        assert isinstance(webpage, dict), \
-            '_get_available_subtitles not implemented'
-        return webpage
-
-    def _get_available_automatic_caption(self, video_id, webpage):
-        """
-        returns {sub_lang: url} or {} if not available
-        Must be redefined by the subclasses that support automatic captions,
-        otherwise it will return {}
-        """
-        self._downloader.report_warning('Automatic Captions not supported by this server')
-        return {}
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 59678399d..4cec06f8b 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
 import json
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 
 from ..compat import (
     compat_str,
 )
 
 
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         (?P<proto>https?://)
         (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -184,11 +184,6 @@ class TEDIE(SubtitlesInfoExtractor):
         self._sort_formats(formats)
 
         video_id = compat_str(talk_info['id'])
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, talk_info)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, talk_info)
-            return
 
         thumbnail = talk_info['thumb']
         if not thumbnail.startswith('http'):
@@ -199,21 +194,25 @@ class TEDIE(SubtitlesInfoExtractor):
             'uploader': talk_info['speaker'],
             'thumbnail': thumbnail,
             'description': self._og_search_description(webpage),
-            'subtitles': video_subtitles,
+            'subtitles': self._get_subtitles(video_id, talk_info),
             'formats': formats,
             'duration': talk_info.get('duration'),
         }
 
-    def _get_available_subtitles(self, video_id, talk_info):
+    def _get_subtitles(self, video_id, talk_info):
         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
         if languages:
             sub_lang_list = {}
             for l in languages:
-                url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
-                sub_lang_list[l] = url
+                sub_lang_list[l] = [
+                    {
+                        'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+                        'ext': ext,
+                    }
+                    for ext in ['ted', 'srt']
+                ]
             return sub_lang_list
         else:
-            self._downloader.report_warning('video doesn\'t have subtitles')
             return {}
 
     def _watch_info(self, url, name):
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py
index f7b34bd26..feac666f7 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -8,7 +8,7 @@ import binascii
 import hashlib
 
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..compat import (
     compat_str,
 )
@@ -22,7 +22,7 @@ from ..utils import (
 _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'})
 
 
-class ThePlatformIE(SubtitlesInfoExtractor):
+class ThePlatformIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
@@ -106,15 +106,11 @@ class ThePlatformIE(SubtitlesInfoExtractor):
         captions = info.get('captions')
         if isinstance(captions, list):
             for caption in captions:
-                lang, src = caption.get('lang'), caption.get('src')
-                if lang and src:
-                    subtitles[lang] = src
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+                lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
+                subtitles[lang] = [{
+                    'ext': 'srt' if mime == 'text/srt' else 'ttml',
+                    'url': src,
+                }]
 
         head = meta.find(_x('smil:head'))
         body = meta.find(_x('smil:body'))
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
index 944901e14..6816dacb6 100644
--- a/youtube_dl/extractor/viki.py
+++ b/youtube_dl/extractor/viki.py
@@ -2,16 +2,17 @@ from __future__ import unicode_literals
 
 import re
 
+from ..compat import compat_urlparse
 from ..utils import (
     ExtractorError,
     unescapeHTML,
     unified_strdate,
     US_RATINGS,
 )
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 
 
-class VikiIE(SubtitlesInfoExtractor):
+class VikiIE(InfoExtractor):
     IE_NAME = 'viki'
 
     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
@@ -69,9 +70,6 @@ class VikiIE(SubtitlesInfoExtractor):
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, info_webpage)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, info_webpage)
-            return
 
         return {
             'id': video_id,
@@ -85,12 +83,15 @@ class VikiIE(SubtitlesInfoExtractor):
             'upload_date': upload_date,
         }
 
-    def _get_available_subtitles(self, video_id, info_webpage):
+    def _get_subtitles(self, video_id, info_webpage):
         res = {}
-        for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
+        for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage):
             sturl = unescapeHTML(sturl_html)
             m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
             if not m:
                 continue
-            res[m.group('lang')] = sturl
+            res[m.group('lang')] = [{
+                'url': compat_urlparse.urljoin('http://www.viki.com', sturl),
+                'ext': 'vtt',
+            }]
         return res
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 4cd2f73d9..8f540f578 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -7,7 +7,6 @@ import itertools
 import hashlib
 
 from .common import InfoExtractor
-from .subtitles import SubtitlesInfoExtractor
 from ..compat import (
     compat_HTTPError,
     compat_urllib_parse,
@@ -53,7 +52,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
         self._download_webpage(login_request, None, False, 'Wrong login info')
 
 
-class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
+class VimeoIE(VimeoBaseInfoExtractor):
     """Information extractor for vimeo.com."""
 
     # _VALID_URL matches Vimeo URLs
@@ -378,12 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
         text_tracks = config['request'].get('text_tracks')
         if text_tracks:
             for tt in text_tracks:
-                subtitles[tt['lang']] = 'http://vimeo.com' + tt['url']
-
-        video_subtitles = self.extract_subtitles(video_id, subtitles)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+                subtitles[tt['lang']] = [{
+                    'ext': 'vtt',
+                    'url': 'http://vimeo.com' + tt['url'],
+                }]
 
         return {
             'id': video_id,
@@ -399,7 +396,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor):
             'view_count': view_count,
             'like_count': like_count,
             'comment_count': comment_count,
-            'subtitles': video_subtitles,
+            'subtitles': subtitles,
         }
 
 
diff --git a/youtube_dl/extractor/walla.py b/youtube_dl/extractor/walla.py
index 672bda7a7..24efbd6e6 100644
--- a/youtube_dl/extractor/walla.py
+++ b/youtube_dl/extractor/walla.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
 
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 from ..utils import (
     xpath_text,
     int_or_none,
 )
 
 
-class WallaIE(SubtitlesInfoExtractor):
+class WallaIE(InfoExtractor):
     _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
     _TEST = {
         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
@@ -52,13 +52,10 @@ class WallaIE(SubtitlesInfoExtractor):
         subtitles = {}
         for subtitle in item.findall('./subtitles/subtitle'):
             lang = xpath_text(subtitle, './title')
-            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src')
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
-
-        subtitles = self.extract_subtitles(video_id, subtitles)
+            subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+                'ext': 'srt',
+                'url': xpath_text(subtitle, './src'),
+            }]
 
         formats = []
         for quality in item.findall('./qualities/quality'):
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 3d3d43491..22db896b1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -11,7 +11,6 @@ import time
 import traceback
 
 from .common import InfoExtractor, SearchInfoExtractor
-from .subtitles import SubtitlesInfoExtractor
 from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
 from ..compat import (
@@ -185,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             return
 
 
-class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
+class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_DESC = 'YouTube.com'
     _VALID_URL = r"""(?x)^
                      (
@@ -648,7 +647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             raise ExtractorError(
                 'Signature extraction failed: ' + tb, cause=e)
 
-    def _get_available_subtitles(self, video_id, webpage):
+    def _get_subtitles(self, video_id, webpage):
         try:
             subs_doc = self._download_xml(
                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
@@ -662,23 +661,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             lang = track.attrib['lang_code']
             if lang in sub_lang_list:
                 continue
-            params = compat_urllib_parse.urlencode({
-                'lang': lang,
-                'v': video_id,
-                'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
-                'name': track.attrib['name'].encode('utf-8'),
-            })
-            url = 'https://www.youtube.com/api/timedtext?' + params
-            sub_lang_list[lang] = url
+            sub_formats = []
+            for ext in ['sbv', 'vtt', 'srt']:
+                params = compat_urllib_parse.urlencode({
+                    'lang': lang,
+                    'v': video_id,
+                    'fmt': ext,
+                    'name': track.attrib['name'].encode('utf-8'),
+                })
+                sub_formats.append({
+                    'url': 'https://www.youtube.com/api/timedtext?' + params,
+                    'ext': ext,
+                })
+            sub_lang_list[lang] = sub_formats
         if not sub_lang_list:
             self._downloader.report_warning('video doesn\'t have subtitles')
             return {}
         return sub_lang_list
 
-    def _get_available_automatic_caption(self, video_id, webpage):
+    def _get_automatic_captions(self, video_id, webpage):
         """We need the webpage for getting the captions url, pass it as an
            argument to speed up the process."""
-        sub_format = self._downloader.params.get('subtitlesformat', 'srt')
         self.to_screen('%s: Looking for automatic captions' % video_id)
         mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
         err_msg = 'Couldn\'t find automatic captions for %s' % video_id
@@ -708,14 +711,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             sub_lang_list = {}
             for lang_node in caption_list.findall('target'):
                 sub_lang = lang_node.attrib['lang_code']
-                params = compat_urllib_parse.urlencode({
-                    'lang': original_lang,
-                    'tlang': sub_lang,
-                    'fmt': sub_format,
-                    'ts': timestamp,
-                    'kind': caption_kind,
-                })
-                sub_lang_list[sub_lang] = caption_url + '&' + params
+                sub_formats = []
+                for ext in ['sbv', 'vtt', 'srt']:
+                    params = compat_urllib_parse.urlencode({
+                        'lang': original_lang,
+                        'tlang': sub_lang,
+                        'fmt': ext,
+                        'ts': timestamp,
+                        'kind': caption_kind,
+                    })
+                    sub_formats.append({
+                        'url': caption_url + '&' + params,
+                        'ext': ext,
+                    })
+                sub_lang_list[sub_lang] = sub_formats
             return sub_lang_list
         # An extractor error can be raise by the download process if there are
         # no automatic captions but there are subtitles
@@ -970,10 +979,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, video_webpage)
-
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, video_webpage)
-            return
+        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
 
         if 'length_seconds' not in video_info:
             self._downloader.report_warning('unable to extract video duration')
@@ -1122,6 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
             'description': video_description,
             'categories': video_categories,
             'subtitles': video_subtitles,
+            'automatic_captions': automatic_captions,
             'duration': video_duration,
             'age_limit': 18 if age_gate else 0,
             'annotations': video_annotations,
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
index 5f678f76b..5c2d153b1 100644
--- a/youtube_dl/options.py
+++ b/youtube_dl/options.py
@@ -387,8 +387,8 @@ def parseOpts(overrideArguments=None):
         help='lists all available subtitles for the video')
     subtitles.add_option(
         '--sub-format',
-        action='store', dest='subtitlesformat', metavar='FORMAT', default='srt',
-        help='subtitle format (default=srt) ([sbv/vtt] youtube only)')
+        action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+        help='subtitle format, accepts formats preference, for example: "ass/srt/best"')
     subtitles.add_option(
         '--sub-lang', '--sub-langs', '--srt-lang',
         action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
index 3f2e6cf1d..398fe050e 100644
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ b/youtube_dl/postprocessor/ffmpeg.py
@@ -496,10 +496,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         'zu': 'zul',
     }
 
-    def __init__(self, downloader=None, subtitlesformat='srt'):
-        super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
-        self._subformat = subtitlesformat
-
     @classmethod
     def _conver_lang_code(cls, code):
         """Convert language code from ISO 639-1 to ISO 639-2/T"""
@@ -509,13 +505,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
         if information['ext'] != 'mp4':
             self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files')
             return True, information
-        if not information.get('subtitles'):
+        subtitles = information.get('requested_subtitles')
+        if not subtitles:
             self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
             return True, information
 
-        sub_langs = [key for key in information['subtitles']]
+        sub_langs = list(subtitles.keys())
         filename = information['filepath']
-        input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
+        input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()]
 
         opts = [
             '-map', '0',
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-02-23 17:13:03 +0100
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-02-23 17:13:03 +0100
commit	bfc993cc9183d5f001e30267551bcdf9f0a98be9 (patch)
tree	9408dc1e760394afdf8cbf6f48157ec22d7f3a74 /youtube_dl
parent	4432db35d9ddd0e6777df6c596d8637514ba0b56 (diff)
parent	b531cfc019576b682f930bd269f68eb87cfd5abf (diff)