From a504ced097e703a9bc6c18b6e31bcafb4783ed80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 15 Feb 2015 18:03:41 +0100
Subject: Improve subtitles support

For each language the extractor builds a list with the available formats sorted (like for video formats), then YoutubeDL selects one of them using the '--sub-format' option which now allows giving the format preferences (for example 'ass/srt/best').
For each format the 'url' field can be set so that we only download the contents if needed, or if the contents needs to be processed (like in crunchyroll) the 'data' field can be used.

The reasons for this change are:
* We weren't checking that the format given with '--sub-format' was available, checking it in each extractor would be repetitive.
* It allows to easily support giving a format preference.
* The subtitles were automatically downloaded in the extractor, but I think that if you use for example the '--dump-json' option you want to finish as fast as possible.

Currently only the ted extractor has been updated, but the old system still works.
---
 youtube_dl/extractor/ted.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'youtube_dl/extractor/ted.py')
diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 10b3b706a..1809eaae4 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -3,14 +3,14 @@ from __future__ import unicode_literals
 import json
 import re
 
-from .subtitles import SubtitlesInfoExtractor
+from .common import InfoExtractor
 
 from ..compat import (
     compat_str,
 )
 
 
-class TEDIE(SubtitlesInfoExtractor):
+class TEDIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         (?P<proto>https?://)
         (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/
@@ -165,9 +165,6 @@ class TEDIE(SubtitlesInfoExtractor):
         video_id = compat_str(talk_info['id'])
         # subtitles
         video_subtitles = self.extract_subtitles(video_id, talk_info)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, talk_info)
-            return
 
         thumbnail = talk_info['thumb']
         if not thumbnail.startswith('http'):
@@ -183,13 +180,18 @@ class TEDIE(SubtitlesInfoExtractor):
             'duration': talk_info.get('duration'),
         }
 
-    def _get_available_subtitles(self, video_id, talk_info):
+    def _get_subtitles(self, video_id, talk_info):
         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])]
         if languages:
             sub_lang_list = {}
             for l in languages:
-                url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l)
-                sub_lang_list[l] = url
+                sub_lang_list[l] = [
+                    {
+                        'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext),
+                        'ext': ext,
+                    }
+                    for ext in ['ted', 'srt']
+                ]
             return sub_lang_list
         else:
             self._downloader.report_warning('video doesn\'t have subtitles')
-- 
cgit v1.2.3


From 03091e372f7033fa52c7961b1a99cd3790c0f60f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sat, 21 Feb 2015 22:33:11 +0100
Subject: [ted] Always extract the subtitles

The required info is already in the webpage
---
 youtube_dl/extractor/ted.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'youtube_dl/extractor/ted.py')

diff --git a/youtube_dl/extractor/ted.py b/youtube_dl/extractor/ted.py
index 1809eaae4..0c38c8f89 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dl/extractor/ted.py
@@ -163,8 +163,6 @@ class TEDIE(InfoExtractor):
         self._sort_formats(formats)
 
         video_id = compat_str(talk_info['id'])
-        # subtitles
-        video_subtitles = self.extract_subtitles(video_id, talk_info)
 
         thumbnail = talk_info['thumb']
         if not thumbnail.startswith('http'):
@@ -175,7 +173,7 @@ class TEDIE(InfoExtractor):
             'uploader': talk_info['speaker'],
             'thumbnail': thumbnail,
             'description': self._og_search_description(webpage),
-            'subtitles': video_subtitles,
+            'subtitles': self._get_subtitles(video_id, talk_info),
             'formats': formats,
             'duration': talk_info.get('duration'),
         }
@@ -194,7 +192,6 @@ class TEDIE(InfoExtractor):
                 ]
             return sub_lang_list
         else:
-            self._downloader.report_warning('video doesn\'t have subtitles')
             return {}
 
     def _watch_info(self, url, name):
-- 
cgit v1.2.3