diff options
| -rw-r--r-- | test/parameters.json | 6 | ||||
| -rw-r--r-- | test/test_youtube_subtitles.py | 31 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 28 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 73 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | 
5 files changed, 107 insertions, 35 deletions
| diff --git a/test/parameters.json b/test/parameters.json index 8215d25c5..0d4bd644c 100644 --- a/test/parameters.json +++ b/test/parameters.json @@ -36,5 +36,7 @@      "verbose": true,       "writedescription": false,       "writeinfojson": true,  -    "writesubtitles": false -}
\ No newline at end of file +    "writesubtitles": false, +    "onlysubtitles": false, +    "allsubtitles": false +} diff --git a/test/test_youtube_subtitles.py b/test/test_youtube_subtitles.py index 77c275b75..3b5a53fca 100644 --- a/test/test_youtube_subtitles.py +++ b/test/test_youtube_subtitles.py @@ -38,27 +38,48 @@ class FakeDownloader(object):  md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest()  class TestYoutubeSubtitles(unittest.TestCase): +    def setUp(self): +        DL = FakeDownloader() +        DL.params['allsubtitles'] = False +        DL.params['writesubtitles'] = False +         +    def test_youtube_no_subtitles(self): +        DL = FakeDownloader() +        DL.params['writesubtitles'] = False +        IE = YoutubeIE(DL) +        info_dict = IE.extract('QRS8MkLhQmM') +        subtitles = info_dict[0]['subtitles'] +        self.assertEqual(subtitles, None)      def test_youtube_subtitles(self):          DL = FakeDownloader()          DL.params['writesubtitles'] = True          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM') -        self.assertEqual(md5(info_dict[0]['subtitles']), '4cd9278a35ba2305f47354ee13472260') - +        sub = info_dict[0]['subtitles'][0] +        self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260')      def test_youtube_subtitles_it(self):          DL = FakeDownloader()          DL.params['writesubtitles'] = True          DL.params['subtitleslang'] = 'it'          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM') -        self.assertEqual(md5(info_dict[0]['subtitles']), '164a51f16f260476a05b50fe4c2f161d') - +        sub = info_dict[0]['subtitles'][0] +        self.assertEqual(md5(sub[2]), '164a51f16f260476a05b50fe4c2f161d')      def test_youtube_onlysubtitles(self):          DL = FakeDownloader() +        DL.params['writesubtitles'] = True          DL.params['onlysubtitles'] = True          IE = YoutubeIE(DL)          info_dict = IE.extract('QRS8MkLhQmM') -        self.assertEqual(md5(info_dict[0]['subtitles']), '4cd9278a35ba2305f47354ee13472260') +        sub = info_dict[0]['subtitles'][0] +        self.assertEqual(md5(sub[2]), '4cd9278a35ba2305f47354ee13472260') +    def test_youtube_allsubtitles(self): +        DL = FakeDownloader() +        DL.params['allsubtitles'] = True +        IE = YoutubeIE(DL) +        info_dict = IE.extract('QRS8MkLhQmM') +        subtitles = info_dict[0]['subtitles'] +        self.assertEqual(len(subtitles), 12)  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 487c9dadb..e496b8a8d 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -80,6 +80,7 @@ class FileDownloader(object):      writeinfojson:     Write the video description to a .info.json file      writesubtitles:    Write the video subtitles to a .srt file      onlysubtitles:     Downloads only the subtitles of the video +    allsubtitles:      Downloads all the subtitles of the video      subtitleslang:     Language of the subtitles to download      test:              Download only first bytes to test the downloader.      keepvideo:         Keep the video file after post-processing @@ -442,18 +443,33 @@ class FileDownloader(object):          if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:              # subtitles download errors are already managed as troubles in relevant IE              # that way it will silently go on when used with unsupporting IE +            subtitle = info_dict['subtitles'][0] +            (srt_error, srt_lang, srt) = subtitle              try: -                srtfn = filename.rsplit('.', 1)[0] + u'.srt' -                if self.params.get('subtitleslang', False): -                    srtfn = filename.rsplit('.', 1)[0] + u'.' + self.params['subtitleslang'] + u'.srt' +                srtfn = filename.rsplit('.', 1)[0] + u'.' + srt_lang + u'.srt'                  self.report_writesubtitles(srtfn)                  with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: -                    srtfile.write(info_dict['subtitles']) -                if self.params.get('onlysubtitles', False): -                    return  +                    srtfile.write(srt)              except (OSError, IOError):                  self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)                  return +            if self.params.get('onlysubtitles', False): +                return  + +        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: +            subtitles = info_dict['subtitles'] +            for subtitle in subtitles: +                (srt_error, srt_lang, srt) = subtitle +                try: +                    srtfn = filename.rsplit('.', 1)[0] + u'.' + srt_lang + u'.srt' +                    self.report_writesubtitles(srtfn) +                    with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: +                            srtfile.write(srt) +                except (OSError, IOError): +                    self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) +                    return +            if self.params.get('onlysubtitles', False): +                return           if self.params.get('writeinfojson', False):              infofn = filename + u'.info.json' diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 51b263383..a220de80a 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -216,6 +216,10 @@ class YoutubeIE(InfoExtractor):          """Report attempt to download video info webpage."""          self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id) +    def report_video_subtitles_request(self, video_id, lang): +        """Report attempt to download video info webpage.""" +        self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for lang: %s' % (video_id,lang)) +      def report_information_extraction(self, video_id):          """Report attempt to extract video information."""          self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) @@ -228,9 +232,7 @@ class YoutubeIE(InfoExtractor):          """Indicate the download will use the RTMP protocol."""          self._downloader.to_screen(u'[youtube] RTMP download detected') - -    def _extract_subtitles(self, video_id): -        self.report_video_subtitles_download(video_id) +    def _get_available_subtitles(self, video_id):          request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)          try:              srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') @@ -240,19 +242,15 @@ class YoutubeIE(InfoExtractor):          srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)          if not srt_lang_list:              return (u'WARNING: video has no closed captions', None) -        if self._downloader.params.get('subtitleslang', False): -            srt_lang = self._downloader.params.get('subtitleslang') -        elif 'en' in srt_lang_list: -            srt_lang = 'en' -        else: -            srt_lang = list(srt_lang_list.keys())[0] -        if not srt_lang in srt_lang_list: -            return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) +        return srt_lang_list + +    def _request_subtitle(self, str_lang, str_name, video_id, format = 'srt'): +        self.report_video_subtitles_request(video_id, str_lang)          params = compat_urllib_parse.urlencode({ -            'lang': srt_lang, -            'name': srt_lang_list[srt_lang].encode('utf-8'), +            'lang': str_lang, +            'name': str_name,              'v': video_id, -            'fmt': 'srt', +            'fmt': format,          })          url = 'http://www.youtube.com/api/timedtext?' + params          try: @@ -261,7 +259,32 @@ class YoutubeIE(InfoExtractor):              return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)          if not srt:              return (u'WARNING: Did not fetch video subtitles', None) -        return (None, srt) +        return (None, str_lang, srt) + +    def _extract_subtitle(self, video_id): +        self.report_video_subtitles_download(video_id) +        srt_lang_list = self._get_available_subtitles(video_id) + +        if self._downloader.params.get('subtitleslang', False): +            srt_lang = self._downloader.params.get('subtitleslang') +        elif 'en' in srt_lang_list: +            srt_lang = 'en' +        else: +            srt_lang = list(srt_lang_list.keys())[0] +        if not srt_lang in srt_lang_list: +            return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) + +        sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) +        return [sub] + +    def _extract_all_subtitles(self, video_id): +        self.report_video_subtitles_download(video_id) +        srt_lang_list = self._get_available_subtitles(video_id) +        subs = [] +        for srt_lang in srt_lang_list: +            sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) +            subs.append(sub) +        return subs      def _print_formats(self, formats):          print('Available formats:') @@ -484,14 +507,20 @@ class YoutubeIE(InfoExtractor):          # closed captions          video_subtitles = None -        if self._downloader.params.get('subtitleslang', False): -            self._downloader.params['writesubtitles'] = True -        if self._downloader.params.get('onlysubtitles', False): -            self._downloader.params['writesubtitles'] = True +          if self._downloader.params.get('writesubtitles', False): -            (srt_error, video_subtitles) = self._extract_subtitles(video_id) -            if srt_error: -                self._downloader.trouble(srt_error) +            video_subtitles = self._extract_subtitle(video_id) +            if video_subtitles: +                (srt_error, srt_lang, srt) = video_subtitles[0] +                if srt_error: +                    self._downloader.trouble(srt_error) + +        if self._downloader.params.get('allsubtitles', False): +            video_subtitles = self._extract_all_subtitles(video_id) +            for video_subtitle in video_subtitles: +                (srt_error, srt_lang, srt) = video_subtitle +                if srt_error: +                    self._downloader.trouble(srt_error)          if 'length_seconds' not in video_info:              self._downloader.trouble(u'WARNING: unable to extract video duration') diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ababeac87..20a22a4d1 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -179,6 +179,9 @@ def parseOpts():      video_format.add_option('--only-srt',              action='store_true', dest='onlysubtitles',              help='downloads only the subtitles of the video (currently youtube only)', default=False) +    video_format.add_option('--all-srt', +            action='store_true', dest='allsubtitles', +            help='downloads all the available subtitles of the video (currently youtube only)', default=False)      video_format.add_option('--srt-lang',              action='store', dest='subtitleslang', metavar='LANG',              help='language of the closed captions to download (optional) use IETF language tags like \'en\'') @@ -454,6 +457,7 @@ def _real_main():          'writeinfojson': opts.writeinfojson,          'writesubtitles': opts.writesubtitles,          'onlysubtitles': opts.onlysubtitles, +        'allsubtitles': opts.allsubtitles,          'subtitleslang': opts.subtitleslang,          'matchtitle': decodeOption(opts.matchtitle),          'rejecttitle': decodeOption(opts.rejecttitle), | 
