diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-08-04 22:29:23 +0200 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2015-08-04 22:29:23 +0200 |
commit | 232541df441741d3d55605f03e28ec3c34249a5b (patch) | |
tree | 88a8f7aa20f2c29c5b62867eb8d9af68bf2632f4 /youtube_dl/YoutubeDL.py | |
parent | a346b1ff57a94382e80fd4edd5a6d4b91a7cb45e (diff) |
[YoutubeDL] format spec: correctly handle dashes and other unused operators
'mp4-baseline-16x9' must be handled as a single string, but the '-' was treated as an operator.
Diffstat (limited to 'youtube_dl/YoutubeDL.py')
-rwxr-xr-x | youtube_dl/YoutubeDL.py | 33 |
1 files changed, 32 insertions, 1 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index c608ff91a..1446b3254 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -933,6 +933,37 @@ class YoutubeDL(object): else: filter_parts.append(string) + def _remove_unused_ops(tokens): + # Remove operators that we don't use and join them with the sourrounding strings + # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' + ALLOWED_OPS = ('/', '+', ',', '(', ')') + last_string, last_start, last_end, last_line = None, None, None, None + for type, string, start, end, line in tokens: + if type == tokenize.OP and string == '[': + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + # everything inside brackets will be handled by _parse_filter + for type, string, start, end, line in tokens: + yield type, string, start, end, line + if type == tokenize.OP and string == ']': + break + elif type == tokenize.OP and string in ALLOWED_OPS: + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + last_string = None + yield type, string, start, end, line + elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: + if not last_string: + last_string = string + last_start = start + last_end = end + else: + last_string += string + if last_string: + yield tokenize.NAME, last_string, last_start, last_end, last_line + def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None @@ -1111,7 +1142,7 @@ class YoutubeDL(object): stream = io.BytesIO(format_spec.encode('utf-8')) try: - tokens = list(compat_tokenize_tokenize(stream.readline)) + tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) |