diff options
| author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-06-01 14:18:27 -0700 | 
|---|---|---|
| committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2013-06-01 14:18:27 -0700 | 
| commit | 418f734a5877bb2d5d9cabe2ee158d076a1ef2c7 (patch) | |
| tree | bdc09e1095eba774c92c81c4a5a6efee0d97f330 | |
| parent | 1b2b22ed9f641eef34c05afb4230f2ff0aa57e0f (diff) | |
| parent | dc1c355b7214657d0649cef3ab0854d07eff6997 (diff) | |
Merge pull request #854 from rg3/youtube_automatic_captions
YoutubeIE: fallback to automatic captions when subtitles aren't found
| -rw-r--r-- | test/test_youtube_subtitles.py | 12 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 37 | 
2 files changed, 47 insertions, 2 deletions
| diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index a123e6d72..c80c90cbe 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -28,7 +28,9 @@ compat_urllib_request.install_opener(opener)  class FakeDownloader(FileDownloader):      def __init__(self):          self.result = [] -        self.params = parameters +        # Different instances of the downloader can't share the same dictionary +        # some test set the "sublang" parameter, which would break the md5 checks. +        self.params = dict(parameters)      def to_screen(self, s):          print(s)      def trouble(self, s, tb=None): @@ -96,6 +98,14 @@ class TestYoutubeSubtitles(unittest.TestCase):          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM')          self.assertEqual(info_dict, None) +    def test_youtube_automatic_captions(self): +        DL = FakeDownloader() +        DL.params['writesubtitles'] = True +        DL.params['subtitleslang'] = 'it' +        IE = YoutubeIE(DL) +        info_dict = IE.extract('8YoUxe5ncPo') +        sub = info_dict[0]['subtitles'][0] +        self.assertTrue(sub[2] is not None)  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 7a882b4ae..9fbe6d627 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -376,6 +376,34 @@ class YoutubeIE(InfoExtractor):              return (u'Did not fetch video subtitles', None, None)          return (None, sub_lang, sub) +    def _request_automatic_caption(self, video_id, webpage): +        """We need the webpage for getting the captions url, pass it as an +           argument to speed up the process.""" +        sub_lang = self._downloader.params.get('subtitleslang') +        sub_format = self._downloader.params.get('subtitlesformat') +        self.to_screen(u'%s: Looking for automatic captions' % video_id) +        mobj = re.search(r';ytplayer.config = ({.*?});', webpage) +        err_msg = u'Couldn\'t find automatic captions for "%s"' % sub_lang +        if mobj is None: +            return [(err_msg, None, None)] +        player_config = json.loads(mobj.group(1)) +        try: +            args = player_config[u'args'] +            caption_url = args[u'ttsurl'] +            timestamp = args[u'timestamp'] +            params = compat_urllib_parse.urlencode({ +                'lang': 'en', +                'tlang': sub_lang, +                'fmt': sub_format, +                'ts': timestamp, +                'kind': 'asr', +            }) +            subtitles_url = caption_url + '&' + params +            sub = self._download_webpage(subtitles_url, video_id, u'Downloading automatic captions') +            return [(None, sub_lang, sub)] +        except KeyError: +            return [(err_msg, None, None)] +      def _extract_subtitle(self, video_id):          """          Return a list with a tuple: @@ -623,7 +651,14 @@ class YoutubeIE(InfoExtractor):              if video_subtitles:                  (sub_error, sub_lang, sub) = video_subtitles[0]                  if sub_error: -                    self._downloader.report_error(sub_error) +                    # We try with the automatic captions +                    video_subtitles = self._request_automatic_caption(video_id, video_webpage) +                    (sub_error_auto, sub_lang, sub) = video_subtitles[0] +                    if sub is not None: +                        pass +                    else: +                        # We report the original error +                        self._downloader.report_error(sub_error)          if self._downloader.params.get('allsubtitles', False):              video_subtitles = self._extract_all_subtitles(video_id) | 
