aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2012-10-28 15:33:59 -0700
committerPhilipp Hagemeister <phihag@phihag.de>2012-10-28 15:33:59 -0700
commitaeca861f227abd282e1ee57b13d1bcba28fe40db (patch)
tree39e1a4b11aeb7290f77103611dba0a6dc718a3f8
parentfe4d68e1967cc39cc00a06c3e2e722d51b8e1419 (diff)
parent42cb53fcfa2e8bbc7a96bc1a0ad1e90c1917dccd (diff)
Merge pull request #502 from FiloSottile/new_sanitize_filename
My sanitize_filename proposal
-rw-r--r--test/test_utils.py10
-rw-r--r--youtube_dl/utils.py16
2 files changed, 18 insertions, 8 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index eb0af703f..878840120 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -22,13 +22,13 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(u'123'), u'123')
- self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
- self.assertTrue(u'de' in sanitize_filename(u'abc/de'))
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
- self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de'))
- self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de'))
- self.assertTrue(u'de' in sanitize_filename(u'abc\\de'))
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
+ self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
+ self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
+ self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 839da17d0..a64937b4c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -194,10 +194,20 @@ def timeconvert(timestr):
def sanitize_filename(s):
"""Sanitizes a string so it could be used as part of a filename."""
def replace_insane(char):
- if char in u' .\\/|?*<>:"' or ord(char) < 32:
- return '_'
+ if char == '?' or ord(char) < 32 or ord(char) == 127:
+ return ''
+ elif char == '"':
+ return '\''
+ elif char == ':':
+ return ' -'
+ elif char in '\\/|*<>':
+ return '-'
return char
- return u''.join(map(replace_insane, s)).strip('_')
+
+ result = u''.join(map(replace_insane, s))
+ while '--' in result:
+ result = result.replace('--', '-')
+ return result.strip('-')
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """