diff options
| -rw-r--r-- | test/test_subtitles.py | 5 | ||||
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 24 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 12 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 57 | 
4 files changed, 61 insertions, 37 deletions
| diff --git a/test/test_subtitles.py b/test/test_subtitles.py index 84ae0e714..91cebce28 100644 --- a/test/test_subtitles.py +++ b/test/test_subtitles.py @@ -50,11 +50,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles):      url = 'QRS8MkLhQmM'      IE = YoutubeIE -    def test_youtube_no_writesubtitles(self): -        self.DL.params['writesubtitles'] = False -        subtitles = self.getSubtitles() -        self.assertEqual(subtitles, None) -      def test_youtube_subtitles(self):          self.DL.params['writesubtitles'] = True          subtitles = self.getSubtitles() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 8545dc9e9..a47f8f5de 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1020,9 +1020,13 @@ class YoutubeDL(object):              info_dict['upload_date'] = upload_date.strftime('%Y%m%d')          if self.params.get('listsubtitles', False): -            self.list_subtitles(info_dict['id'], info_dict.get('subtitles')) +            if 'automatic_captions' in info_dict: +                self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') +            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')              return -        info_dict['requested_subtitles'] = self.process_subtitles(info_dict['id'], info_dict.get('subtitles')) +        info_dict['requested_subtitles'] = self.process_subtitles( +            info_dict['id'], info_dict.get('subtitles'), +            info_dict.get('automatic_captions'))          # This extractors handle format selection themselves          if info_dict['extractor'] in ['Youku']: @@ -1152,8 +1156,14 @@ class YoutubeDL(object):          info_dict.update(formats_to_download[-1])          return info_dict -    def process_subtitles(self, video_id, available_subs): +    def process_subtitles(self, video_id, available_subs, available_autocaps):          """Select the requested subtitles and their format""" +        if available_autocaps and self.params.get('writeautomaticsub'): +            available_subs = available_subs.copy() +            for lang, cap_info in available_autocaps.items(): +                if lang not in available_subs: +                    available_subs[lang] = cap_info +          if not available_subs:              return available_subs @@ -1645,17 +1655,17 @@ class YoutubeDL(object):              ['ID', 'width', 'height', 'URL'],              [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) -    def list_subtitles(self, video_id, subtitles): +    def list_subtitles(self, video_id, subtitles, name='subtitles'):          if not subtitles: -            self.to_screen('%s has no subtitles' % video_id) +            self.to_screen('%s has no %s' % (video_id, name))              return          header_line = 'Language    formats'          sub_lines = [              '%-12s%s' % (lang, ', '.join(f['ext'] for f in reversed(formats)))              for lang, formats in subtitles.items()]          self.to_screen( -            'Available subtitles for %s:\n%s\n%s' % -            (video_id, header_line, '\n'.join(sub_lines))) +            'Available %s for %s:\n%s\n%s' % +            (name, video_id, header_line, '\n'.join(sub_lines)))      def urlopen(self, req):          """ Start an HTTP download """ diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index d149e0f92..fe7d8dbc9 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -157,6 +157,8 @@ class InfoExtractor(object):                      with the "ext" entry and one of:                          * "data": The subtitles file contents                          * "url": A url pointing to the subtitles file +    automatic_captions: Like 'subtitles', used by the YoutubeIE for +                    automatically generated captions      duration:       Length of the video in seconds, as an integer.      view_count:     How many users have watched the video on the platform.      like_count:     Number of positive ratings of the video @@ -1007,6 +1009,16 @@ class InfoExtractor(object):      def _get_subtitles(self, *args, **kwargs):          raise NotImplementedError("This method must be implemented by subclasses") +    def extract_automatic_captions(self, *args, **kwargs): +        automatic_captions = {} +        list_subtitles = self._downloader.params.get('listsubtitles') +        if self._downloader.params.get('writeautomaticsub', False) or list_subtitles: +            automatic_captions.update(self._get_automatic_captions(*args, **kwargs)) +        return automatic_captions + +    def _get_automatic_captions(self, *args, **kwargs): +        raise NotImplementedError("This method must be implemented by subclasses") +  class SearchInfoExtractor(InfoExtractor):      """ diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 35ef4c303..1b2dbf276 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -11,7 +11,6 @@ import time  import traceback  from .common import InfoExtractor, SearchInfoExtractor -from .subtitles import SubtitlesInfoExtractor  from ..jsinterp import JSInterpreter  from ..swfinterp import SWFInterpreter  from ..compat import ( @@ -185,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):              return -class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): +class YoutubeIE(YoutubeBaseInfoExtractor):      IE_DESC = 'YouTube.com'      _VALID_URL = r"""(?x)^                       ( @@ -644,7 +643,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              raise ExtractorError(                  'Signature extraction failed: ' + tb, cause=e) -    def _get_available_subtitles(self, video_id, webpage): +    def _get_subtitles(self, video_id, webpage):          try:              subs_doc = self._download_xml(                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, @@ -658,23 +657,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              lang = track.attrib['lang_code']              if lang in sub_lang_list:                  continue -            params = compat_urllib_parse.urlencode({ -                'lang': lang, -                'v': video_id, -                'fmt': self._downloader.params.get('subtitlesformat', 'srt'), -                'name': track.attrib['name'].encode('utf-8'), -            }) -            url = 'https://www.youtube.com/api/timedtext?' + params -            sub_lang_list[lang] = url +            sub_formats = [] +            for ext in ['sbv', 'vtt', 'srt']: +                params = compat_urllib_parse.urlencode({ +                    'lang': lang, +                    'v': video_id, +                    'fmt': ext, +                    'name': track.attrib['name'].encode('utf-8'), +                }) +                sub_formats.append({ +                    'url': 'https://www.youtube.com/api/timedtext?' + params, +                    'ext': ext, +                }) +            sub_lang_list[lang] = sub_formats          if not sub_lang_list:              self._downloader.report_warning('video doesn\'t have subtitles')              return {}          return sub_lang_list -    def _get_available_automatic_caption(self, video_id, webpage): +    def _get_automatic_captions(self, video_id, webpage):          """We need the webpage for getting the captions url, pass it as an             argument to speed up the process.""" -        sub_format = self._downloader.params.get('subtitlesformat', 'srt')          self.to_screen('%s: Looking for automatic captions' % video_id)          mobj = re.search(r';ytplayer.config = ({.*?});', webpage)          err_msg = 'Couldn\'t find automatic captions for %s' % video_id @@ -704,14 +707,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              sub_lang_list = {}              for lang_node in caption_list.findall('target'):                  sub_lang = lang_node.attrib['lang_code'] -                params = compat_urllib_parse.urlencode({ -                    'lang': original_lang, -                    'tlang': sub_lang, -                    'fmt': sub_format, -                    'ts': timestamp, -                    'kind': caption_kind, -                }) -                sub_lang_list[sub_lang] = caption_url + '&' + params +                sub_formats = [] +                for ext in ['sbv', 'vtt', 'srt']: +                    params = compat_urllib_parse.urlencode({ +                        'lang': original_lang, +                        'tlang': sub_lang, +                        'fmt': ext, +                        'ts': timestamp, +                        'kind': caption_kind, +                    }) +                    sub_formats.append({ +                        'url': caption_url + '&' + params, +                        'ext': ext, +                    }) +                sub_lang_list[sub_lang] = sub_formats              return sub_lang_list          # An extractor error can be raise by the download process if there are          # no automatic captions but there are subtitles @@ -966,10 +975,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          # subtitles          video_subtitles = self.extract_subtitles(video_id, video_webpage) - -        if self._downloader.params.get('listsubtitles', False): -            self._list_available_subtitles(video_id, video_webpage) -            return +        automatic_captions = self.extract_automatic_captions(video_id, video_webpage)          if 'length_seconds' not in video_info:              self._downloader.report_warning('unable to extract video duration') @@ -1118,6 +1124,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              'description': video_description,              'categories': video_categories,              'subtitles': video_subtitles, +            'automatic_captions': automatic_captions,              'duration': video_duration,              'age_limit': 18 if age_gate else 0,              'annotations': video_annotations, | 
