diff options
author | Remita Amine <remitamine@gmail.com> | 2020-12-24 16:05:03 +0100 |
---|---|---|
committer | Remita Amine <remitamine@gmail.com> | 2020-12-24 16:05:03 +0100 |
commit | 4ef1fc970727a1bbdd9f20bea54c37deb8794fb0 (patch) | |
tree | f165bbee4b3d0cbab3345a98f4e6475722ccc07e | |
parent | f9e6aa1dcfa5e5731f04b407ac00ef24009891a3 (diff) |
[youtube] fix automatic captions extraction(closes #27162)(closes #27388)
-rw-r--r-- | youtube_dl/extractor/youtube.py | 32 |
1 files changed, 14 insertions, 18 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 915f9b485..19df6b412 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1322,17 +1322,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return self._parse_json( uppercase_escape(config), video_id, fatal=False) - def _get_automatic_captions(self, video_id, webpage): + def _get_automatic_captions(self, video_id, player_response, player_config): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" self.to_screen('%s: Looking for automatic captions' % video_id) - player_config = self._get_ytplayer_config(video_id, webpage) err_msg = 'Couldn\'t find automatic captions for %s' % video_id - if not player_config: + if not (player_response or player_config): self._downloader.report_warning(err_msg) return {} try: - args = player_config['args'] + args = player_config.get('args') if player_config else {} caption_url = args.get('ttsurl') if caption_url: timestamp = args['timestamp'] @@ -1391,19 +1390,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return captions # New captions format as of 22.06.2017 - player_response = args.get('player_response') - if player_response and isinstance(player_response, compat_str): - player_response = self._parse_json( - player_response, video_id, fatal=False) - if player_response: - renderer = player_response['captions']['playerCaptionsTracklistRenderer'] - base_url = renderer['captionTracks'][0]['baseUrl'] - sub_lang_list = [] - for lang in renderer['translationLanguages']: - lang_code = lang.get('languageCode') - if lang_code: - sub_lang_list.append(lang_code) - return make_captions(base_url, sub_lang_list) + if player_response: + renderer = player_response['captions']['playerCaptionsTracklistRenderer'] + base_url = renderer['captionTracks'][0]['baseUrl'] + sub_lang_list = [] + for lang in renderer['translationLanguages']: + lang_code = lang.get('languageCode') + if lang_code: + sub_lang_list.append(lang_code) + return make_captions(base_url, sub_lang_list) # Some videos don't provide ttsurl but rather caption_tracks and # caption_translation_languages (e.g. 20LmZk1hakA) @@ -1652,6 +1647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Get video info video_info = {} embed_webpage = None + ytplayer_config = None if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None: age_gate = True @@ -2276,7 +2272,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # subtitles video_subtitles = self.extract_subtitles(video_id, video_webpage) - automatic_captions = self.extract_automatic_captions(video_id, video_webpage) + automatic_captions = self.extract_automatic_captions(video_id, player_response, ytplayer_config) video_duration = try_get( video_info, lambda x: int_or_none(x['length_seconds'][0])) |