From 796173d08b514182eedc704541eb55d5c9e1dc0d Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 3 Dec 2012 15:36:24 +0100 Subject: Keep video IDs verbatim if possible (Closes #571) --- youtube_dl/FileDownloader.py | 7 +++++-- youtube_dl/utils.py | 20 +++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) (limited to 'youtube_dl') diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 48c8eb126..c471cc160 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -334,8 +334,11 @@ class FileDownloader(object): template_dict['epoch'] = int(time.time()) template_dict['autonumber'] = u'%05d' % self._num_downloads - template_dict = dict((key, u'NA' if val is None else val) for key, val in template_dict.items()) - template_dict = dict((k, sanitize_filename(compat_str(v), self.params.get('restrictfilenames'))) for k,v in template_dict.items()) + sanitize = lambda k,v: sanitize_filename( + u'NA' if v is None else compat_str(v), + restricted=self.params.get('restrictfilenames'), + is_id=(k==u'id')) + template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) filename = self.params['outtmpl'] % template_dict return filename diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 7f73b8476..4dcf18991 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -317,9 +317,10 @@ def timeconvert(timestr): timestamp = email.utils.mktime_tz(timetuple) return timestamp -def sanitize_filename(s, restricted=False): +def sanitize_filename(s, restricted=False, is_id=False): """Sanitizes a string so it could be used as part of a filename. If restricted is set, use a stricter subset of allowed characters. + Set is_id if this is not an arbitrary string, but an ID that should be kept if possible """ def replace_insane(char): if char == '?' or ord(char) < 32 or ord(char) == 127: @@ -337,14 +338,15 @@ def sanitize_filename(s, restricted=False): return char result = u''.join(map(replace_insane, s)) - while '__' in result: - result = result.replace('__', '_') - result = result.strip('_') - # Common case of "Foreign band name - English song title" - if restricted and result.startswith('-_'): - result = result[2:] - if not result: - result = '_' + if not is_id: + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if not result: + result = '_' return result def orderedSet(iterable): -- cgit v1.2.3