Improve subtitles support

For each language the extractor builds a list with the available formats sorted (like for video formats), then YoutubeDL selects one of them using the '--sub-format' option which now allows giving the format preferences (for example 'ass/srt/best'). For each format the 'url' field can be set so that we only download the contents if needed, or if the contents needs to be processed (like in crunchyroll) the 'data' field can be used. The reasons for this change are: * We weren't checking that the format given with '--sub-format' was available, checking it in each extractor would be repetitive. * It allows to easily support giving a format preference. * The subtitles were automatically downloaded in the extractor, but I think that if you use for example the '--dump-json' option you want to finish as fast as possible. Currently only the ted extractor has been updated, but the old system still works.
author: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-02-15 18:03:41 +0100
committer: Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> 2015-02-16 21:51:03 +0100
commit: a504ced097e703a9bc6c18b6e31bcafb4783ed80 (patch)
tree: 1d520371df47be5f2c62aaee78dbced5d6b05d08 /test/test_subtitles.py
parent: 8fb474fb17a64ff2aa9f6315ebbc99ae7938c4e1 (diff)
1 files changed, 12 insertions, 12 deletions
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index bcc69a778..fbc9eaf4d 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -27,15 +27,23 @@ class BaseTestSubtitles(unittest.TestCase):
 
     def setUp(self):
         self.DL = FakeYDL()
-        self.ie = self.IE(self.DL)
+        self.ie = self.IE()
+        self.DL.add_info_extractor(self.ie)
 
     def getInfoDict(self):
-        info_dict = self.ie.extract(self.url)
+        info_dict = self.DL.extract_info(self.url, download=False)
         return info_dict
 
     def getSubtitles(self):
         info_dict = self.getInfoDict()
-        return info_dict['subtitles']
+        subtitles = info_dict['subtitles']
+        if not subtitles:
+            return subtitles
+        for sub_info in subtitles.values():
+            if sub_info.get('data') is None:
+                uf = self.DL.urlopen(sub_info['url'])
+                sub_info['data'] = uf.read().decode('utf-8')
+        return dict((l, sub_info['data']) for l, sub_info in subtitles.items())
 
 
 class TestYoutubeSubtitles(BaseTestSubtitles):
@@ -176,7 +184,7 @@ class TestTedSubtitles(BaseTestSubtitles):
 
     def test_no_writesubtitles(self):
         subtitles = self.getSubtitles()
-        self.assertEqual(subtitles, None)
+        self.assertFalse(subtitles)
 
     def test_subtitles(self):
         self.DL.params['writesubtitles'] = True
@@ -196,18 +204,10 @@ class TestTedSubtitles(BaseTestSubtitles):
         self.assertTrue(len(subtitles.keys()) >= 28)
 
     def test_list_subtitles(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
         self.DL.params['listsubtitles'] = True
         info_dict = self.getInfoDict()
         self.assertEqual(info_dict, None)
 
-    def test_automatic_captions(self):
-        self.DL.expect_warning('Automatic Captions not supported by this server')
-        self.DL.params['writeautomaticsub'] = True
-        self.DL.params['subtitleslang'] = ['en']
-        subtitles = self.getSubtitles()
-        self.assertTrue(len(subtitles.keys()) == 0)
-
     def test_multiple_langs(self):
         self.DL.params['writesubtitles'] = True
         langs = ['es', 'fr', 'de']
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-02-15 18:03:41 +0100
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>	2015-02-16 21:51:03 +0100
commit	a504ced097e703a9bc6c18b6e31bcafb4783ed80 (patch)
tree	1d520371df47be5f2c62aaee78dbced5d6b05d08 /test/test_subtitles.py
parent	8fb474fb17a64ff2aa9f6315ebbc99ae7938c4e1 (diff)