diff options
| author | Sergey M․ <dstftw@gmail.com> | 2017-06-08 22:53:14 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2017-06-08 22:53:14 +0700 | 
| commit | c9969434183c562eb9935aa20f147f234aa61e53 (patch) | |
| tree | 204b14703f0858524eb5f2d95198e133030be3da | |
| parent | 76e6378358a618a20051e9f9fd38e43af3169683 (diff) | |
[YoutubeDL] Sanitize more fields (#13313)
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 52 | 
1 files changed, 37 insertions, 15 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 7efa0c948..c05103bb6 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -58,6 +58,7 @@ from .utils import (      format_bytes,      formatSeconds,      GeoRestrictedError, +    int_or_none,      ISO3166Utils,      locked_file,      make_HTTPS_handler, @@ -302,6 +303,17 @@ class YoutubeDL(object):                          postprocessor.      """ +    _NUMERIC_FIELDS = set(( +        'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', +        'timestamp', 'upload_year', 'upload_month', 'upload_day', +        'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', +        'average_rating', 'comment_count', 'age_limit', +        'start_time', 'end_time', +        'chapter_number', 'season_number', 'episode_number', +        'track_number', 'disc_number', 'release_year', +        'playlist_index', +    )) +      params = None      _ies = []      _pps = [] @@ -639,22 +651,11 @@ class YoutubeDL(object):                      r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],                      outtmpl) -            NUMERIC_FIELDS = set(( -                'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx', -                'timestamp', 'upload_year', 'upload_month', 'upload_day', -                'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', -                'average_rating', 'comment_count', 'age_limit', -                'start_time', 'end_time', -                'chapter_number', 'season_number', 'episode_number', -                'track_number', 'disc_number', 'release_year', -                'playlist_index', -            )) -              # Missing numeric fields used together with integer presentation types              # in format specification will break the argument substitution since              # string 'NA' is returned for missing fields. We will patch output              # template for missing fields to meet string presentation type. -            for numeric_field in NUMERIC_FIELDS: +            for numeric_field in self._NUMERIC_FIELDS:                  if numeric_field not in template_dict:                      # As of [1] format syntax is:                      #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type @@ -1345,9 +1346,28 @@ class YoutubeDL(object):          if 'title' not in info_dict:              raise ExtractorError('Missing "title" field in extractor result') -        if not isinstance(info_dict['id'], compat_str): -            self.report_warning('"id" field is not a string - forcing string conversion') -            info_dict['id'] = compat_str(info_dict['id']) +        def report_force_conversion(field, field_not, conversion): +            self.report_warning( +                '"%s" field is not %s - forcing %s conversion, there is an error in extractor' +                % (field, field_not, conversion)) + +        def sanitize_string_field(info, string_field): +            field = info.get(string_field) +            if field is None or isinstance(field, compat_str): +                return +            report_force_conversion(string_field, 'a string', 'string') +            info[string_field] = compat_str(field) + +        def sanitize_numeric_fields(info): +            for numeric_field in self._NUMERIC_FIELDS: +                field = info.get(numeric_field) +                if field is None or isinstance(field, compat_numeric_types): +                    continue +                report_force_conversion(numeric_field, 'numeric', 'int') +                info[numeric_field] = int_or_none(field) + +        sanitize_string_field(info_dict, 'id') +        sanitize_numeric_fields(info_dict)          if 'playlist' not in info_dict:              # It isn't part of a playlist @@ -1435,6 +1455,8 @@ class YoutubeDL(object):              if 'url' not in format:                  raise ExtractorError('Missing "url" key in result (index %d)' % i) +            sanitize_string_field(format, 'format_id') +            sanitize_numeric_fields(format)              format['url'] = sanitize_url(format['url'])              if format.get('format_id') is None: | 
