aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-08-04 22:29:23 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2015-08-04 22:29:23 +0200
commit232541df441741d3d55605f03e28ec3c34249a5b (patch)
tree88a8f7aa20f2c29c5b62867eb8d9af68bf2632f4
parenta346b1ff57a94382e80fd4edd5a6d4b91a7cb45e (diff)
[YoutubeDL] format spec: correctly handle dashes and other unused operators
'mp4-baseline-16x9' must be handled as a single string, but the '-' was treated as an operator.
-rw-r--r--test/test_YoutubeDL.py6
-rwxr-xr-xyoutube_dl/YoutubeDL.py33
2 files changed, 38 insertions, 1 deletions
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 9a3c28f8c..0388c0bf3 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -105,6 +105,7 @@ class TestFormatSelection(unittest.TestCase):
def test_format_selection(self):
formats = [
{'format_id': '35', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL},
+ {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL},
{'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL},
{'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL},
{'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL},
@@ -136,6 +137,11 @@ class TestFormatSelection(unittest.TestCase):
downloaded = ydl.downloaded_info_dicts[0]
self.assertEqual(downloaded['format_id'], '35')
+ ydl = YDL({'format': 'example-with-dashes'})
+ ydl.process_ie_result(info_dict.copy())
+ downloaded = ydl.downloaded_info_dicts[0]
+ self.assertEqual(downloaded['format_id'], 'example-with-dashes')
+
def test_format_selection_audio(self):
formats = [
{'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL},
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index c608ff91a..1446b3254 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -933,6 +933,37 @@ class YoutubeDL(object):
else:
filter_parts.append(string)
+ def _remove_unused_ops(tokens):
+ # Remove operators that we don't use and join them with the sourrounding strings
+ # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ ALLOWED_OPS = ('/', '+', ',', '(', ')')
+ last_string, last_start, last_end, last_line = None, None, None, None
+ for type, string, start, end, line in tokens:
+ if type == tokenize.OP and string == '[':
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ # everything inside brackets will be handled by _parse_filter
+ for type, string, start, end, line in tokens:
+ yield type, string, start, end, line
+ if type == tokenize.OP and string == ']':
+ break
+ elif type == tokenize.OP and string in ALLOWED_OPS:
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+ if not last_string:
+ last_string = string
+ last_start = start
+ last_end = end
+ else:
+ last_string += string
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+
def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
selectors = []
current_selector = None
@@ -1111,7 +1142,7 @@ class YoutubeDL(object):
stream = io.BytesIO(format_spec.encode('utf-8'))
try:
- tokens = list(compat_tokenize_tokenize(stream.readline))
+ tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
except tokenize.TokenError:
raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))