Merge pull request #6463 from jaimeMF/format_spec_fix_dashes

[YoutubeDL] format spec: correctly handle dashes and other unused operators
author: Sergey M. <dstftw@gmail.com> 2015-08-06 00:04:40 +0600
committer: Sergey M. <dstftw@gmail.com> 2015-08-06 00:04:40 +0600
commit: 47f53ad95884d92c8e5be6ba5c35e2955b941b0c (patch)
tree: 0661c4d1aff57e428c1051e49cb96aef4b1b015c /youtube_dl
parent: c73cdd800f0dc7b465ac0b36d338875bb80c23aa (diff)
parent: 232541df441741d3d55605f03e28ec3c34249a5b (diff)
1 files changed, 32 insertions, 1 deletions
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c608ff91a..1446b3254 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -933,6 +933,37 @@ class YoutubeDL(object):
                 else:
                     filter_parts.append(string)
 
+        def _remove_unused_ops(tokens):
+            # Remove operators that we don't use and join them with the sourrounding strings
+            # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+            ALLOWED_OPS = ('/', '+', ',', '(', ')')
+            last_string, last_start, last_end, last_line = None, None, None, None
+            for type, string, start, end, line in tokens:
+                if type == tokenize.OP and string == '[':
+                    if last_string:
+                        yield tokenize.NAME, last_string, last_start, last_end, last_line
+                        last_string = None
+                    yield type, string, start, end, line
+                    # everything inside brackets will be handled by _parse_filter
+                    for type, string, start, end, line in tokens:
+                        yield type, string, start, end, line
+                        if type == tokenize.OP and string == ']':
+                            break
+                elif type == tokenize.OP and string in ALLOWED_OPS:
+                    if last_string:
+                        yield tokenize.NAME, last_string, last_start, last_end, last_line
+                        last_string = None
+                    yield type, string, start, end, line
+                elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+                    if not last_string:
+                        last_string = string
+                        last_start = start
+                        last_end = end
+                    else:
+                        last_string += string
+            if last_string:
+                yield tokenize.NAME, last_string, last_start, last_end, last_line
+
         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
             selectors = []
             current_selector = None
@@ -1111,7 +1142,7 @@ class YoutubeDL(object):
 
         stream = io.BytesIO(format_spec.encode('utf-8'))
         try:
-            tokens = list(compat_tokenize_tokenize(stream.readline))
+            tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
         except tokenize.TokenError:
             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
author	Sergey M. <dstftw@gmail.com>	2015-08-06 00:04:40 +0600
committer	Sergey M. <dstftw@gmail.com>	2015-08-06 00:04:40 +0600
commit	47f53ad95884d92c8e5be6ba5c35e2955b941b0c (patch)
tree	0661c4d1aff57e428c1051e49cb96aef4b1b015c /youtube_dl
parent	c73cdd800f0dc7b465ac0b36d338875bb80c23aa (diff)
parent	232541df441741d3d55605f03e28ec3c34249a5b (diff)