aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-06-08 22:53:14 +0700
committerSergey M․ <dstftw@gmail.com>2017-06-08 22:53:14 +0700
commitc9969434183c562eb9935aa20f147f234aa61e53 (patch)
tree204b14703f0858524eb5f2d95198e133030be3da
parent76e6378358a618a20051e9f9fd38e43af3169683 (diff)
[YoutubeDL] Sanitize more fields (#13313)
-rwxr-xr-xyoutube_dl/YoutubeDL.py52
1 files changed, 37 insertions, 15 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 7efa0c948..c05103bb6 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -58,6 +58,7 @@ from .utils import (
format_bytes,
formatSeconds,
GeoRestrictedError,
+ int_or_none,
ISO3166Utils,
locked_file,
make_HTTPS_handler,
@@ -302,6 +303,17 @@ class YoutubeDL(object):
postprocessor.
"""
+ _NUMERIC_FIELDS = set((
+ 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ 'playlist_index',
+ ))
+
params = None
_ies = []
_pps = []
@@ -639,22 +651,11 @@ class YoutubeDL(object):
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
- NUMERIC_FIELDS = set((
- 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- 'timestamp', 'upload_year', 'upload_month', 'upload_day',
- 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
- 'average_rating', 'comment_count', 'age_limit',
- 'start_time', 'end_time',
- 'chapter_number', 'season_number', 'episode_number',
- 'track_number', 'disc_number', 'release_year',
- 'playlist_index',
- ))
-
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
# string 'NA' is returned for missing fields. We will patch output
# template for missing fields to meet string presentation type.
- for numeric_field in NUMERIC_FIELDS:
+ for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
@@ -1345,9 +1346,28 @@ class YoutubeDL(object):
if 'title' not in info_dict:
raise ExtractorError('Missing "title" field in extractor result')
- if not isinstance(info_dict['id'], compat_str):
- self.report_warning('"id" field is not a string - forcing string conversion')
- info_dict['id'] = compat_str(info_dict['id'])
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, compat_str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = compat_str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, compat_numeric_types):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
if 'playlist' not in info_dict:
# It isn't part of a playlist
@@ -1435,6 +1455,8 @@ class YoutubeDL(object):
if 'url' not in format:
raise ExtractorError('Missing "url" key in result (index %d)' % i)
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
format['url'] = sanitize_url(format['url'])
if format.get('format_id') is None: