diff options
author | Ismael Mejia <iemejia@gmail.com> | 2013-02-22 02:52:55 +0100 |
---|---|---|
committer | Filippo Valsorda <filippo.valsorda@gmail.com> | 2013-03-20 08:41:53 +0100 |
commit | ae608b8076497d70e2a95e5e939c1fb31e2dde53 (patch) | |
tree | b61cdb2a029df436b4fcfd11a5db108bcd69d5d6 /youtube_dl/InfoExtractors.py | |
parent | cdb130b09a16865b81fd34d19b74fa634d45cad7 (diff) |
Added new option '--all-srt' to download all the subtitles of a video.
Only works in youtube for the moment.
Diffstat (limited to 'youtube_dl/InfoExtractors.py')
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 73 |
1 files changed, 51 insertions, 22 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 51b263383..a220de80a 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -216,6 +216,10 @@ class YoutubeIE(InfoExtractor): """Report attempt to download video info webpage.""" self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id) + def report_video_subtitles_request(self, video_id, lang): + """Report attempt to download video info webpage.""" + self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for lang: %s' % (video_id,lang)) + def report_information_extraction(self, video_id): """Report attempt to extract video information.""" self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id) @@ -228,9 +232,7 @@ class YoutubeIE(InfoExtractor): """Indicate the download will use the RTMP protocol.""" self._downloader.to_screen(u'[youtube] RTMP download detected') - - def _extract_subtitles(self, video_id): - self.report_video_subtitles_download(video_id) + def _get_available_subtitles(self, video_id): request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') @@ -240,19 +242,15 @@ class YoutubeIE(InfoExtractor): srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) if not srt_lang_list: return (u'WARNING: video has no closed captions', None) - if self._downloader.params.get('subtitleslang', False): - srt_lang = self._downloader.params.get('subtitleslang') - elif 'en' in srt_lang_list: - srt_lang = 'en' - else: - srt_lang = list(srt_lang_list.keys())[0] - if not srt_lang in srt_lang_list: - return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) + return srt_lang_list + + def _request_subtitle(self, str_lang, str_name, video_id, format = 'srt'): + self.report_video_subtitles_request(video_id, str_lang) params = compat_urllib_parse.urlencode({ - 'lang': srt_lang, - 'name': srt_lang_list[srt_lang].encode('utf-8'), + 'lang': str_lang, + 'name': str_name, 'v': video_id, - 'fmt': 'srt', + 'fmt': format, }) url = 'http://www.youtube.com/api/timedtext?' + params try: @@ -261,7 +259,32 @@ class YoutubeIE(InfoExtractor): return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) if not srt: return (u'WARNING: Did not fetch video subtitles', None) - return (None, srt) + return (None, str_lang, srt) + + def _extract_subtitle(self, video_id): + self.report_video_subtitles_download(video_id) + srt_lang_list = self._get_available_subtitles(video_id) + + if self._downloader.params.get('subtitleslang', False): + srt_lang = self._downloader.params.get('subtitleslang') + elif 'en' in srt_lang_list: + srt_lang = 'en' + else: + srt_lang = list(srt_lang_list.keys())[0] + if not srt_lang in srt_lang_list: + return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None) + + sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) + return [sub] + + def _extract_all_subtitles(self, video_id): + self.report_video_subtitles_download(video_id) + srt_lang_list = self._get_available_subtitles(video_id) + subs = [] + for srt_lang in srt_lang_list: + sub = self._request_subtitle(srt_lang, srt_lang_list[srt_lang].encode('utf-8'), video_id) + subs.append(sub) + return subs def _print_formats(self, formats): print('Available formats:') @@ -484,14 +507,20 @@ class YoutubeIE(InfoExtractor): # closed captions video_subtitles = None - if self._downloader.params.get('subtitleslang', False): - self._downloader.params['writesubtitles'] = True - if self._downloader.params.get('onlysubtitles', False): - self._downloader.params['writesubtitles'] = True + if self._downloader.params.get('writesubtitles', False): - (srt_error, video_subtitles) = self._extract_subtitles(video_id) - if srt_error: - self._downloader.trouble(srt_error) + video_subtitles = self._extract_subtitle(video_id) + if video_subtitles: + (srt_error, srt_lang, srt) = video_subtitles[0] + if srt_error: + self._downloader.trouble(srt_error) + + if self._downloader.params.get('allsubtitles', False): + video_subtitles = self._extract_all_subtitles(video_id) + for video_subtitle in video_subtitles: + (srt_error, srt_lang, srt) = video_subtitle + if srt_error: + self._downloader.trouble(srt_error) if 'length_seconds' not in video_info: self._downloader.trouble(u'WARNING: unable to extract video duration') |