aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2012-12-03 15:36:24 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2012-12-03 15:36:41 +0100
commit796173d08b514182eedc704541eb55d5c9e1dc0d (patch)
treefc25c18ca5724bfdf486106fe58dfa1d1c693d5c
parente575b6821ec185ef4f14c433ccc018d748c0fb60 (diff)
downloadyoutube-dl-796173d08b514182eedc704541eb55d5c9e1dc0d.tar.xz
Keep video IDs verbatim if possible (Closes #571)
-rw-r--r--test/test_utils.py5
-rw-r--r--youtube_dl/FileDownloader.py7
-rw-r--r--youtube_dl/utils.py20
3 files changed, 21 insertions, 11 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index fd8190f51..8afc30370 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -79,6 +79,11 @@ class TestUtil(unittest.TestCase):
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')
+ def test_sanitize_ids(self):
+ self.assertEquals(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
+ self.assertEquals(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
+ self.assertEquals(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI')
+
def test_ordered_set(self):
self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
self.assertEqual(orderedSet([]), [])
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 48c8eb126..c471cc160 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -334,8 +334,11 @@ class FileDownloader(object):
template_dict['epoch'] = int(time.time())
template_dict['autonumber'] = u'%05d' % self._num_downloads
- template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items())
- template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items())
+ sanitize = lambda k,v: sanitize_filename(
+ u'NA' if v is None else compat_str(v),
+ restricted=self.params.get('restrictfilenames'),
+ is_id=(k==u'id'))
+ template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
filename = self.params['outtmpl'] % template_dict
return filename
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7f73b8476..4dcf18991 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -317,9 +317,10 @@ def timeconvert(timestr):
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
-def sanitize_filename(s, restricted=False):
+def sanitize_filename(s, restricted=False, is_id=False):
"""Sanitizes a string so it could be used as part of a filename.
If restricted is set, use a stricter subset of allowed characters.
+ Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
"""
def replace_insane(char):
if char == '?' or ord(char) < 32 or ord(char) == 127:
@@ -337,14 +338,15 @@ def sanitize_filename(s, restricted=False):
return char
result = u''.join(map(replace_insane, s))
- while '__' in result:
- result = result.replace('__', '_')
- result = result.strip('_')
- # Common case of "Foreign band name - English song title"
- if restricted and result.startswith('-_'):
- result = result[2:]
- if not result:
- result = '_'
+ if not is_id:
+ while '__' in result:
+ result = result.replace('__', '_')
+ result = result.strip('_')
+ # Common case of "Foreign band name - English song title"
+ if restricted and result.startswith('-_'):
+ result = result[2:]
+ if not result:
+ result = '_'
return result
def orderedSet(iterable):