diff options
| author | Filippo Valsorda <filippo.valsorda@gmail.com> | 2012-12-20 11:26:38 +0100 | 
|---|---|---|
| committer | Filippo Valsorda <filippo.valsorda@gmail.com> | 2012-12-20 14:20:24 +0100 | 
| commit | 056d857571158264aefb8d9f7d47c0dad768be63 (patch) | |
| tree | ab5aabb5ee64f84167695e39cff328e3289dd21b | |
| parent | 69a388319909456f4f666e500f5648589ada10b8 (diff) | |
refactor YouTube subtitles code, it was ugly (my bad)
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 60 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 8 | 
2 files changed, 31 insertions, 37 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index cf5b51bd8..aa4a6500b 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -219,6 +219,34 @@ class YoutubeIE(InfoExtractor):              srt += caption + '\n\n'          return srt +    def _extract_subtitles(self, video_id): +        self.report_video_subtitles_download(video_id) +        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) +        try: +            srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') +        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) +        srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) +        srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) +        if not srt_lang_list: +            return (u'WARNING: video has no closed captions', None) +        if self._downloader.params.get('subtitleslang', False): +            srt_lang = self._downloader.params.get('subtitleslang') +        elif 'en' in srt_lang_list: +            srt_lang = 'en' +        else: +            srt_lang = srt_lang_list.keys()[0] +        if not srt_lang in srt_lang_list: +            return (u'WARNING: no closed captions found in the specified language', None) +        request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) +        try: +            srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8') +        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: +            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None) +        if not srt_xml: +            return (u'WARNING: unable to download video subtitles', None) +        return (None, self._closed_captions_xml_to_srt(srt_xml)) +      def _print_formats(self, formats):          print('Available formats:')          for x in formats: @@ -395,35 +423,9 @@ class YoutubeIE(InfoExtractor):          # closed captions          video_subtitles = None          if self._downloader.params.get('writesubtitles', False): -            try: -                self.report_video_subtitles_download(video_id) -                request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) -                try: -                    srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8') -                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -                    raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) -                srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) -                srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) -                if not srt_lang_list: -                    raise Trouble(u'WARNING: video has no closed captions') -                if self._downloader.params.get('subtitleslang', False): -                    srt_lang = self._downloader.params.get('subtitleslang') -                elif 'en' in srt_lang_list: -                    srt_lang = 'en' -                else: -                    srt_lang = srt_lang_list.keys()[0] -                if not srt_lang in srt_lang_list: -                    raise Trouble(u'WARNING: no closed captions found in the specified language') -                request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) -                try: -                    srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8') -                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -                    raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) -                if not srt_xml: -                    raise Trouble(u'WARNING: unable to download video subtitles') -                video_subtitles = self._closed_captions_xml_to_srt(srt_xml) -            except Trouble as trouble: -                self._downloader.trouble(str(trouble)) +            (srt_error, video_subtitles) = self._extract_subtitles(video_id) +            if srt_error: +                self._downloader.trouble(srt_error)          if 'length_seconds' not in video_info:              self._downloader.trouble(u'WARNING: unable to extract video duration') diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a5196b0ae..c18c9beed 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -465,14 +465,6 @@ class ContentTooShortError(Exception):          self.downloaded = downloaded          self.expected = expected - -class Trouble(Exception): -    """Trouble helper exception - -    This is an exception to be handled with -    FileDownloader.trouble -    """ -  class YoutubeDLHandler(compat_urllib_request.HTTPHandler):      """Handler for HTTP requests and responses.  | 
