diff options
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 468 | 
1 files changed, 311 insertions, 157 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 702a6ad50..50425b8d7 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -21,6 +21,7 @@ import subprocess  import socket  import sys  import time +import tokenize  import traceback  if os.name == 'nt': @@ -34,22 +35,24 @@ from .compat import (      compat_http_client,      compat_kwargs,      compat_str, +    compat_tokenize_tokenize,      compat_urllib_error,      compat_urllib_request, +    compat_urllib_request_DataHandler,  )  from .utils import ( -    escape_url,      ContentTooShortError,      date_from_str,      DateRange,      DEFAULT_OUTTMPL,      determine_ext,      DownloadError, +    encode_compat_str,      encodeFilename, +    error_to_compat_str,      ExtractorError,      format_bytes,      formatSeconds, -    HEADRequest,      locked_file,      make_HTTPS_handler,      MaxDownloadsReached, @@ -63,6 +66,7 @@ from .utils import (      SameFileError,      sanitize_filename,      sanitize_path, +    sanitized_Request,      std_headers,      subtitles_filename,      UnavailableVideoError, @@ -70,6 +74,7 @@ from .utils import (      version_tuple,      write_json_file,      write_string, +    YoutubeDLCookieProcessor,      YoutubeDLHandler,      prepend_extension,      replace_extension, @@ -155,7 +160,7 @@ class YoutubeDL(object):      writethumbnail:    Write the thumbnail image to a file      write_all_thumbnails:  Write all thumbnail formats to files      writesubtitles:    Write the video subtitles to a file -    writeautomaticsub: Write the automatic subtitles to a file +    writeautomaticsub: Write the automatically generated subtitles to a file      allsubtitles:      Downloads all the subtitles of the video                         (requires writesubtitles or writeautomaticsub)      listsubtitles:     Lists all available subtitles for the video @@ -285,7 +290,11 @@ class YoutubeDL(object):          self._num_downloads = 0          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]          self._err_file = sys.stderr -        self.params = params +        self.params = { +            # Default parameters +            'nocheckcertificate': False, +        } +        self.params.update(params)          self.cache = Cache(self)          if params.get('bidi_workaround', False): @@ -488,7 +497,7 @@ class YoutubeDL(object):                      tb = ''                      if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:                          tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) -                    tb += compat_str(traceback.format_exc()) +                    tb += encode_compat_str(traceback.format_exc())                  else:                      tb_data = traceback.format_list(traceback.extract_stack())                      tb = ''.join(tb_data) @@ -567,7 +576,7 @@ class YoutubeDL(object):                                   if v is not None)              template_dict = collections.defaultdict(lambda: 'NA', template_dict) -            outtmpl = sanitize_path(self.params.get('outtmpl', DEFAULT_OUTTMPL)) +            outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)              tmpl = compat_expanduser(outtmpl)              filename = tmpl % template_dict              # Temporary fix for #4787 @@ -575,7 +584,7 @@ class YoutubeDL(object):              # to workaround encoding issues with subprocess on python2 @ Windows              if sys.version_info < (3, 0) and sys.platform == 'win32':                  filename = encodeFilename(filename, True).decode(preferredencoding()) -            return filename +            return sanitize_path(filename)          except ValueError as err:              self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')              return None @@ -667,14 +676,14 @@ class YoutubeDL(object):                      return self.process_ie_result(ie_result, download, extra_info)                  else:                      return ie_result -            except ExtractorError as de:  # An error we somewhat expected -                self.report_error(compat_str(de), de.format_traceback()) +            except ExtractorError as e:  # An error we somewhat expected +                self.report_error(compat_str(e), e.format_traceback())                  break              except MaxDownloadsReached:                  raise              except Exception as e:                  if self.params.get('ignoreerrors', False): -                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) +                    self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))                      break                  else:                      raise @@ -828,6 +837,7 @@ class YoutubeDL(object):                                                        extra_info=extra)                  playlist_results.append(entry_result)              ie_result['entries'] = playlist_results +            self.to_screen('[download] Finished downloading playlist: %s' % playlist)              return ie_result          elif result_type == 'compat_list':              self.report_warning( @@ -853,8 +863,8 @@ class YoutubeDL(object):          else:              raise Exception('Invalid result type: %s' % result_type) -    def _apply_format_filter(self, format_spec, available_formats): -        " Returns a tuple of the remaining format_spec and filtered formats " +    def _build_format_filter(self, filter_spec): +        " Returns a function to filter the formats according to the filter_spec "          OPERATORS = {              '<': operator.lt, @@ -864,13 +874,13 @@ class YoutubeDL(object):              '=': operator.eq,              '!=': operator.ne,          } -        operator_rex = re.compile(r'''(?x)\s*\[ +        operator_rex = re.compile(r'''(?x)\s*              (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)              \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*              (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?) -            \]$ +            $              ''' % '|'.join(map(re.escape, OPERATORS.keys()))) -        m = operator_rex.search(format_spec) +        m = operator_rex.search(filter_spec)          if m:              try:                  comparison_value = int(m.group('value')) @@ -881,7 +891,7 @@ class YoutubeDL(object):                  if comparison_value is None:                      raise ValueError(                          'Invalid value %r in format specification %r' % ( -                            m.group('value'), format_spec)) +                            m.group('value'), filter_spec))              op = OPERATORS[m.group('op')]          if not m: @@ -889,85 +899,289 @@ class YoutubeDL(object):                  '=': operator.eq,                  '!=': operator.ne,              } -            str_operator_rex = re.compile(r'''(?x)\s*\[ +            str_operator_rex = re.compile(r'''(?x)                  \s*(?P<key>ext|acodec|vcodec|container|protocol)                  \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?                  \s*(?P<value>[a-zA-Z0-9_-]+) -                \s*\]$ +                \s*$                  ''' % '|'.join(map(re.escape, STR_OPERATORS.keys()))) -            m = str_operator_rex.search(format_spec) +            m = str_operator_rex.search(filter_spec)              if m:                  comparison_value = m.group('value')                  op = STR_OPERATORS[m.group('op')]          if not m: -            raise ValueError('Invalid format specification %r' % format_spec) +            raise ValueError('Invalid filter specification %r' % filter_spec)          def _filter(f):              actual_value = f.get(m.group('key'))              if actual_value is None:                  return m.group('none_inclusive')              return op(actual_value, comparison_value) -        new_formats = [f for f in available_formats if _filter(f)] +        return _filter + +    def build_format_selector(self, format_spec): +        def syntax_error(note, start): +            message = ( +                'Invalid format specification: ' +                '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1])) +            return SyntaxError(message) + +        PICKFIRST = 'PICKFIRST' +        MERGE = 'MERGE' +        SINGLE = 'SINGLE' +        GROUP = 'GROUP' +        FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) + +        def _parse_filter(tokens): +            filter_parts = [] +            for type, string, start, _, _ in tokens: +                if type == tokenize.OP and string == ']': +                    return ''.join(filter_parts) +                else: +                    filter_parts.append(string) + +        def _remove_unused_ops(tokens): +            # Remove operators that we don't use and join them with the surrounding strings +            # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' +            ALLOWED_OPS = ('/', '+', ',', '(', ')') +            last_string, last_start, last_end, last_line = None, None, None, None +            for type, string, start, end, line in tokens: +                if type == tokenize.OP and string == '[': +                    if last_string: +                        yield tokenize.NAME, last_string, last_start, last_end, last_line +                        last_string = None +                    yield type, string, start, end, line +                    # everything inside brackets will be handled by _parse_filter +                    for type, string, start, end, line in tokens: +                        yield type, string, start, end, line +                        if type == tokenize.OP and string == ']': +                            break +                elif type == tokenize.OP and string in ALLOWED_OPS: +                    if last_string: +                        yield tokenize.NAME, last_string, last_start, last_end, last_line +                        last_string = None +                    yield type, string, start, end, line +                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: +                    if not last_string: +                        last_string = string +                        last_start = start +                        last_end = end +                    else: +                        last_string += string +            if last_string: +                yield tokenize.NAME, last_string, last_start, last_end, last_line + +        def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): +            selectors = [] +            current_selector = None +            for type, string, start, _, _ in tokens: +                # ENCODING is only defined in python 3.x +                if type == getattr(tokenize, 'ENCODING', None): +                    continue +                elif type in [tokenize.NAME, tokenize.NUMBER]: +                    current_selector = FormatSelector(SINGLE, string, []) +                elif type == tokenize.OP: +                    if string == ')': +                        if not inside_group: +                            # ')' will be handled by the parentheses group +                            tokens.restore_last_token() +                        break +                    elif inside_merge and string in ['/', ',']: +                        tokens.restore_last_token() +                        break +                    elif inside_choice and string == ',': +                        tokens.restore_last_token() +                        break +                    elif string == ',': +                        if not current_selector: +                            raise syntax_error('"," must follow a format selector', start) +                        selectors.append(current_selector) +                        current_selector = None +                    elif string == '/': +                        if not current_selector: +                            raise syntax_error('"/" must follow a format selector', start) +                        first_choice = current_selector +                        second_choice = _parse_format_selection(tokens, inside_choice=True) +                        current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) +                    elif string == '[': +                        if not current_selector: +                            current_selector = FormatSelector(SINGLE, 'best', []) +                        format_filter = _parse_filter(tokens) +                        current_selector.filters.append(format_filter) +                    elif string == '(': +                        if current_selector: +                            raise syntax_error('Unexpected "("', start) +                        group = _parse_format_selection(tokens, inside_group=True) +                        current_selector = FormatSelector(GROUP, group, []) +                    elif string == '+': +                        video_selector = current_selector +                        audio_selector = _parse_format_selection(tokens, inside_merge=True) +                        if not video_selector or not audio_selector: +                            raise syntax_error('"+" must be between two format selectors', start) +                        current_selector = FormatSelector(MERGE, (video_selector, audio_selector), []) +                    else: +                        raise syntax_error('Operator not recognized: "{0}"'.format(string), start) +                elif type == tokenize.ENDMARKER: +                    break +            if current_selector: +                selectors.append(current_selector) +            return selectors + +        def _build_selector_function(selector): +            if isinstance(selector, list): +                fs = [_build_selector_function(s) for s in selector] + +                def selector_function(formats): +                    for f in fs: +                        for format in f(formats): +                            yield format +                return selector_function +            elif selector.type == GROUP: +                selector_function = _build_selector_function(selector.selector) +            elif selector.type == PICKFIRST: +                fs = [_build_selector_function(s) for s in selector.selector] + +                def selector_function(formats): +                    for f in fs: +                        picked_formats = list(f(formats)) +                        if picked_formats: +                            return picked_formats +                    return [] +            elif selector.type == SINGLE: +                format_spec = selector.selector + +                def selector_function(formats): +                    formats = list(formats) +                    if not formats: +                        return +                    if format_spec == 'all': +                        for f in formats: +                            yield f +                    elif format_spec in ['best', 'worst', None]: +                        format_idx = 0 if format_spec == 'worst' else -1 +                        audiovideo_formats = [ +                            f for f in formats +                            if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] +                        if audiovideo_formats: +                            yield audiovideo_formats[format_idx] +                        # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format +                        elif (all(f.get('acodec') != 'none' for f in formats) or +                              all(f.get('vcodec') != 'none' for f in formats)): +                            yield formats[format_idx] +                    elif format_spec == 'bestaudio': +                        audio_formats = [ +                            f for f in formats +                            if f.get('vcodec') == 'none'] +                        if audio_formats: +                            yield audio_formats[-1] +                    elif format_spec == 'worstaudio': +                        audio_formats = [ +                            f for f in formats +                            if f.get('vcodec') == 'none'] +                        if audio_formats: +                            yield audio_formats[0] +                    elif format_spec == 'bestvideo': +                        video_formats = [ +                            f for f in formats +                            if f.get('acodec') == 'none'] +                        if video_formats: +                            yield video_formats[-1] +                    elif format_spec == 'worstvideo': +                        video_formats = [ +                            f for f in formats +                            if f.get('acodec') == 'none'] +                        if video_formats: +                            yield video_formats[0] +                    else: +                        extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] +                        if format_spec in extensions: +                            filter_f = lambda f: f['ext'] == format_spec +                        else: +                            filter_f = lambda f: f['format_id'] == format_spec +                        matches = list(filter(filter_f, formats)) +                        if matches: +                            yield matches[-1] +            elif selector.type == MERGE: +                def _merge(formats_info): +                    format_1, format_2 = [f['format_id'] for f in formats_info] +                    # The first format must contain the video and the +                    # second the audio +                    if formats_info[0].get('vcodec') == 'none': +                        self.report_error('The first format must ' +                                          'contain the video, try using ' +                                          '"-f %s+%s"' % (format_2, format_1)) +                        return +                    # Formats must be opposite (video+audio) +                    if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none': +                        self.report_error( +                            'Both formats %s and %s are video-only, you must specify "-f video+audio"' +                            % (format_1, format_2)) +                        return +                    output_ext = ( +                        formats_info[0]['ext'] +                        if self.params.get('merge_output_format') is None +                        else self.params['merge_output_format']) +                    return { +                        'requested_formats': formats_info, +                        'format': '%s+%s' % (formats_info[0].get('format'), +                                             formats_info[1].get('format')), +                        'format_id': '%s+%s' % (formats_info[0].get('format_id'), +                                                formats_info[1].get('format_id')), +                        'width': formats_info[0].get('width'), +                        'height': formats_info[0].get('height'), +                        'resolution': formats_info[0].get('resolution'), +                        'fps': formats_info[0].get('fps'), +                        'vcodec': formats_info[0].get('vcodec'), +                        'vbr': formats_info[0].get('vbr'), +                        'stretched_ratio': formats_info[0].get('stretched_ratio'), +                        'acodec': formats_info[1].get('acodec'), +                        'abr': formats_info[1].get('abr'), +                        'ext': output_ext, +                    } +                video_selector, audio_selector = map(_build_selector_function, selector.selector) -        new_format_spec = format_spec[:-len(m.group(0))] -        if not new_format_spec: -            new_format_spec = 'best' +                def selector_function(formats): +                    formats = list(formats) +                    for pair in itertools.product(video_selector(formats), audio_selector(formats)): +                        yield _merge(pair) -        return (new_format_spec, new_formats) +            filters = [self._build_format_filter(f) for f in selector.filters] -    def select_format(self, format_spec, available_formats): -        while format_spec.endswith(']'): -            format_spec, available_formats = self._apply_format_filter( -                format_spec, available_formats) -        if not available_formats: -            return None +            def final_selector(formats): +                for _filter in filters: +                    formats = list(filter(_filter, formats)) +                return selector_function(formats) +            return final_selector -        if format_spec in ['best', 'worst', None]: -            format_idx = 0 if format_spec == 'worst' else -1 -            audiovideo_formats = [ -                f for f in available_formats -                if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] -            if audiovideo_formats: -                return audiovideo_formats[format_idx] -            # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format -            elif (all(f.get('acodec') != 'none' for f in available_formats) or -                  all(f.get('vcodec') != 'none' for f in available_formats)): -                return available_formats[format_idx] -        elif format_spec == 'bestaudio': -            audio_formats = [ -                f for f in available_formats -                if f.get('vcodec') == 'none'] -            if audio_formats: -                return audio_formats[-1] -        elif format_spec == 'worstaudio': -            audio_formats = [ -                f for f in available_formats -                if f.get('vcodec') == 'none'] -            if audio_formats: -                return audio_formats[0] -        elif format_spec == 'bestvideo': -            video_formats = [ -                f for f in available_formats -                if f.get('acodec') == 'none'] -            if video_formats: -                return video_formats[-1] -        elif format_spec == 'worstvideo': -            video_formats = [ -                f for f in available_formats -                if f.get('acodec') == 'none'] -            if video_formats: -                return video_formats[0] -        else: -            extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] -            if format_spec in extensions: -                filter_f = lambda f: f['ext'] == format_spec -            else: -                filter_f = lambda f: f['format_id'] == format_spec -            matches = list(filter(filter_f, available_formats)) -            if matches: -                return matches[-1] -        return None +        stream = io.BytesIO(format_spec.encode('utf-8')) +        try: +            tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) +        except tokenize.TokenError: +            raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) + +        class TokenIterator(object): +            def __init__(self, tokens): +                self.tokens = tokens +                self.counter = 0 + +            def __iter__(self): +                return self + +            def __next__(self): +                if self.counter >= len(self.tokens): +                    raise StopIteration() +                value = self.tokens[self.counter] +                self.counter += 1 +                return value + +            next = __next__ + +            def restore_last_token(self): +                self.counter -= 1 + +        parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) +        return _build_selector_function(parsed_selector)      def _calc_headers(self, info_dict):          res = std_headers.copy() @@ -983,7 +1197,7 @@ class YoutubeDL(object):          return res      def _calc_cookies(self, info_dict): -        pr = compat_urllib_request.Request(info_dict['url']) +        pr = sanitized_Request(info_dict['url'])          self.cookiejar.add_cookie_header(pr)          return pr.get_header('Cookie') @@ -1030,13 +1244,20 @@ class YoutubeDL(object):              except (ValueError, OverflowError, OSError):                  pass +        subtitles = info_dict.get('subtitles') +        if subtitles: +            for _, subtitle in subtitles.items(): +                for subtitle_format in subtitle: +                    if 'ext' not in subtitle_format: +                        subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() +          if self.params.get('listsubtitles', False):              if 'automatic_captions' in info_dict:                  self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') -            self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') +            self.list_subtitles(info_dict['id'], subtitles, 'subtitles')              return          info_dict['requested_subtitles'] = self.process_subtitles( -            info_dict['id'], info_dict.get('subtitles'), +            info_dict['id'], subtitles,              info_dict.get('automatic_captions'))          # We now pick which formats have to be downloaded @@ -1111,56 +1332,8 @@ class YoutubeDL(object):                      req_format_list.append('bestvideo+bestaudio')              req_format_list.append('best')              req_format = '/'.join(req_format_list) -        formats_to_download = [] -        if req_format == 'all': -            formats_to_download = formats -        else: -            for rfstr in req_format.split(','): -                # We can accept formats requested in the format: 34/5/best, we pick -                # the first that is available, starting from left -                req_formats = rfstr.split('/') -                for rf in req_formats: -                    if re.match(r'.+?\+.+?', rf) is not None: -                        # Two formats have been requested like '137+139' -                        format_1, format_2 = rf.split('+') -                        formats_info = (self.select_format(format_1, formats), -                                        self.select_format(format_2, formats)) -                        if all(formats_info): -                            # The first format must contain the video and the -                            # second the audio -                            if formats_info[0].get('vcodec') == 'none': -                                self.report_error('The first format must ' -                                                  'contain the video, try using ' -                                                  '"-f %s+%s"' % (format_2, format_1)) -                                return -                            output_ext = ( -                                formats_info[0]['ext'] -                                if self.params.get('merge_output_format') is None -                                else self.params['merge_output_format']) -                            selected_format = { -                                'requested_formats': formats_info, -                                'format': '%s+%s' % (formats_info[0].get('format'), -                                                     formats_info[1].get('format')), -                                'format_id': '%s+%s' % (formats_info[0].get('format_id'), -                                                        formats_info[1].get('format_id')), -                                'width': formats_info[0].get('width'), -                                'height': formats_info[0].get('height'), -                                'resolution': formats_info[0].get('resolution'), -                                'fps': formats_info[0].get('fps'), -                                'vcodec': formats_info[0].get('vcodec'), -                                'vbr': formats_info[0].get('vbr'), -                                'stretched_ratio': formats_info[0].get('stretched_ratio'), -                                'acodec': formats_info[1].get('acodec'), -                                'abr': formats_info[1].get('abr'), -                                'ext': output_ext, -                            } -                        else: -                            selected_format = None -                    else: -                        selected_format = self.select_format(rf, formats) -                    if selected_format is not None: -                        formats_to_download.append(selected_format) -                        break +        format_selector = self.build_format_selector(req_format) +        formats_to_download = list(format_selector(formats))          if not formats_to_download:              raise ExtractorError('requested format not available',                                   expected=True) @@ -1288,7 +1461,7 @@ class YoutubeDL(object):              if dn and not os.path.exists(dn):                  os.makedirs(dn)          except (OSError, IOError) as err: -            self.report_error('unable to create directory ' + compat_str(err)) +            self.report_error('unable to create directory ' + error_to_compat_str(err))              return          if self.params.get('writedescription', False): @@ -1339,7 +1512,7 @@ class YoutubeDL(object):                              sub_info['url'], info_dict['id'], note=False)                      except ExtractorError as err:                          self.report_warning('Unable to download subtitle for "%s": %s' % -                                            (sub_lang, compat_str(err.cause))) +                                            (sub_lang, error_to_compat_str(err.cause)))                          continue                  try:                      sub_filename = subtitles_filename(filename, sub_lang, sub_format) @@ -1708,27 +1881,8 @@ class YoutubeDL(object):      def urlopen(self, req):          """ Start an HTTP download """ - -        # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not -        # always respected by websites, some tend to give out URLs with non percent-encoded -        # non-ASCII characters (see telemb.py, ard.py [#3412]) -        # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991) -        # To work around aforementioned issue we will replace request's original URL with -        # percent-encoded one -        req_is_string = isinstance(req, compat_basestring) -        url = req if req_is_string else req.get_full_url() -        url_escaped = escape_url(url) - -        # Substitute URL if any change after escaping -        if url != url_escaped: -            if req_is_string: -                req = url_escaped -            else: -                req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request -                req = req_type( -                    url_escaped, data=req.data, headers=req.headers, -                    origin_req_host=req.origin_req_host, unverifiable=req.unverifiable) - +        if isinstance(req, compat_basestring): +            req = sanitized_Request(req)          return self._opener.open(req, timeout=self._socket_timeout)      def print_debug_header(self): @@ -1811,8 +1965,7 @@ class YoutubeDL(object):              if os.access(opts_cookiefile, os.R_OK):                  self.cookiejar.load() -        cookie_processor = compat_urllib_request.HTTPCookieProcessor( -            self.cookiejar) +        cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)          if opts_proxy is not None:              if opts_proxy == '':                  proxies = {} @@ -1828,8 +1981,9 @@ class YoutubeDL(object):          debuglevel = 1 if self.params.get('debug_printtraffic') else 0          https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)          ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) +        data_handler = compat_urllib_request_DataHandler()          opener = compat_urllib_request.build_opener( -            proxy_handler, https_handler, cookie_processor, ydlh) +            proxy_handler, https_handler, cookie_processor, ydlh, data_handler)          # Delete the default user-agent header, which would otherwise apply in          # cases where our custom HTTP handler doesn't come into play @@ -1881,10 +2035,10 @@ class YoutubeDL(object):                                 (info_dict['extractor'], info_dict['id'], thumb_display_id))                  try:                      uf = self.urlopen(t['url']) -                    with open(thumb_filename, 'wb') as thumbf: +                    with open(encodeFilename(thumb_filename), 'wb') as thumbf:                          shutil.copyfileobj(uf, thumbf)                      self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %                                     (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:                      self.report_warning('Unable to download thumbnail "%s": %s' % -                                        (t['url'], compat_str(err))) +                                        (t['url'], error_to_compat_str(err))) | 
