diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-06-01 14:18:27 -0700 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-06-01 14:18:27 -0700 |
commit | 418f734a5877bb2d5d9cabe2ee158d076a1ef2c7 (patch) | |
tree | bdc09e1095eba774c92c81c4a5a6efee0d97f330 | |
parent | 1b2b22ed9f641eef34c05afb4230f2ff0aa57e0f (diff) | |
parent | dc1c355b7214657d0649cef3ab0854d07eff6997 (diff) |
Merge pull request #854 from rg3/youtube_automatic_captions
YoutubeIE: fallback to automatic captions when subtitles aren't found
-rw-r--r-- | test/test_youtube_subtitles.py | 12 | ||||
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 37 |
2 files changed, 47 insertions, 2 deletions
diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index a123e6d72..c80c90cbe 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener) class FakeDownloader(FileDownloader): def __init__(self): self.result = [] - self.params = parameters + # Different instances of the downloader can't share the same dictionary + # some test set the "sublang" parameter, which would break the md5 checks. + self.params = dict(parameters) def to_screen(self, s): print(s) def trouble(self, s, tb=None): @@ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase): IE = YoutubeIE(DL) info_dict = IE.extract('QRS8MkLhQmM') self.assertEqual(info_dict, None) + def test_youtube_automatic_captions(self): + DL = FakeDownloader() + DL.params['writesubtitles'] = True + DL.params['subtitleslang'] = 'it' + IE = YoutubeIE(DL) + info_dict = IE.extract('8YoUxe5ncPo') + sub = info_dict[0]['subtitles'][0] + self.assertTrue(sub[2] is not None) if __name__ == '__main__': unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 7a882b4ae..9fbe6d627 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -376,6 +376,34 @@ class YoutubeIE(InfoExtractor): return (u'Did not fetch video subtitles', None, None) return (None, sub_lang, sub) + def _request_automatic_caption(self, video_id, webpage): + """We need the webpage for getting the captions url, pass it as an + argument to speed up the process.""" + sub_lang = self._downloader.params.get('subtitleslang') + sub_format = self._downloader.params.get('subtitlesformat') + self.to_screen(u'%s: Looking for automatic captions' % video_id) + mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang + if mobj is None: + return [(err_msg, None, None)] + player_config = json.loads(mobj.group(1)) + try: + args = player_config[u'args'] + caption_url = args[u'ttsurl'] + timestamp = args[u'timestamp'] + params = compat_urllib_parse.urlencode({ + 'lang': 'en', + 'tlang': sub_lang, + 'fmt': sub_format, + 'ts': timestamp, + 'kind': 'asr', + }) + subtitles_url = caption_url + '&' + params + sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') + return [(None, sub_lang, sub)] + except KeyError: + return [(err_msg, None, None)] + def _extract_subtitle(self, video_id): """ Return a list with a tuple: @@ -623,7 +651,14 @@ class YoutubeIE(InfoExtractor): if video_subtitles: (sub_error, sub_lang, sub) = video_subtitles[0] if sub_error: - self._downloader.report_error(sub_error) + # We try with the automatic captions + video_subtitles = self._request_automatic_caption(video_id, video_webpage) + (sub_error_auto, sub_lang, sub) = video_subtitles[0] + if sub is not None: + pass + else: + # We report the original error + self._downloader.report_error(sub_error) if self._downloader.params.get('allsubtitles', False): video_subtitles = self._extract_all_subtitles(video_id) |