aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rwxr-xr-xyoutube_dl/YoutubeDL.py29
1 files changed, 27 insertions, 2 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 3b2be3159..09d2b18f2 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -46,6 +46,7 @@ from .utils import (
DateRange,
DEFAULT_OUTTMPL,
determine_ext,
+ determine_protocol,
DownloadError,
encode_compat_str,
encodeFilename,
@@ -898,6 +899,9 @@ class YoutubeDL(object):
STR_OPERATORS = {
'=': operator.eq,
'!=': operator.ne,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
\s*(?P<key>ext|acodec|vcodec|container|protocol)
@@ -1244,6 +1248,12 @@ class YoutubeDL(object):
except (ValueError, OverflowError, OSError):
pass
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
subtitles = info_dict.get('subtitles')
if subtitles:
for _, subtitle in subtitles.items():
@@ -1300,6 +1310,10 @@ class YoutubeDL(object):
# Automatically determine file extension if missing
if 'ext' not in format:
format['ext'] = determine_ext(format['url']).lower()
+ # Automatically determine protocol if missing (useful for format
+ # selection purposes)
+ if 'protocol' not in format:
+ format['protocol'] = determine_protocol(format)
# Add HTTP headers, so that external programs can use them from the
# json output
full_format_info = info_dict.copy()
@@ -1312,7 +1326,7 @@ class YoutubeDL(object):
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
# element in the 'formats' field in info_dict is info_dict itself,
- # wich can't be exported to json
+ # which can't be exported to json
info_dict['formats'] = formats
if self.params.get('listformats'):
self.list_formats(info_dict)
@@ -1986,8 +2000,19 @@ class YoutubeDL(object):
https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
data_handler = compat_urllib_request_DataHandler()
+
+ # When passing our own FileHandler instance, build_opener won't add the
+ # default FileHandler and allows us to disable the file protocol, which
+ # can be used for malicious purposes (see
+ # https://github.com/rg3/youtube-dl/issues/8227)
+ file_handler = compat_urllib_request.FileHandler()
+
+ def file_open(*args, **kwargs):
+ raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
+ file_handler.file_open = file_open
+
opener = compat_urllib_request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh, data_handler)
+ proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
# Delete the default user-agent header, which would otherwise apply in
# cases where our custom HTTP handler doesn't come into play