diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-06-23 21:18:33 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-06-23 21:18:33 +0700 |
commit | 73af5cc817ff19d21cb432c5a4e9e37dd35a353d (patch) | |
tree | 5ae82ae340aff31a8d9292390e1949cd6b01eba6 /youtube_dl | |
parent | b5f523ed62f6c84fe0c58274f1751e66c58282d8 (diff) |
[YoutubeDL] Skip malformed formats for better extraction robustness
Diffstat (limited to 'youtube_dl')
-rwxr-xr-x | youtube_dl/YoutubeDL.py | 16 |
1 files changed, 12 insertions, 4 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c05103bb6..b3a6d4d3b 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -1448,17 +1448,25 @@ class YoutubeDL(object): if not formats: raise ExtractorError('No video formats found!') + def is_wellformed(f): + url = f.get('url') + valid_url = url and isinstance(url, compat_str) + if not valid_url: + self.report_warning( + '"url" field is missing or empty - skipping format, ' + 'there is an error in extractor') + return valid_url + + # Filter out malformed formats for better extraction robustness + formats = list(filter(is_wellformed, formats)) + formats_dict = {} # We check that all the formats have the format and format_id fields for i, format in enumerate(formats): - if 'url' not in format: - raise ExtractorError('Missing "url" key in result (index %d)' % i) - sanitize_string_field(format, 'format_id') sanitize_numeric_fields(format) format['url'] = sanitize_url(format['url']) - if format.get('format_id') is None: format['format_id'] = compat_str(i) else: |