diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 38 | 
1 files changed, 32 insertions, 6 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 839da17d0..4ace22c2f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,6 +26,11 @@ std_headers = {  	'Accept-Language': 'en-us,en;q=0.5',  } +try: +    compat_str = unicode # Python 2 +except NameError: +    compat_str = str +  def preferredencoding():  	"""Get preferred encoding. @@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser):  		HTMLParser.HTMLParser.__init__(self)  	def error(self, message): -		print >> sys.stderr, self.getpos()  		if self.error_count > 10 or self.started:  			raise HTMLParser.HTMLParseError(message, self.getpos())  		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +194,36 @@ def timeconvert(timestr):  	if timetuple is not None:  		timestamp = email.utils.mktime_tz(timetuple)  	return timestamp -	 -def sanitize_filename(s): -	"""Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): +	"""Sanitizes a string so it could be used as part of a filename. +	If restricted is set, use a stricter subset of allowed characters. +	"""  	def replace_insane(char): -		if char in u' .\\/|?*<>:"' or ord(char) < 32: +		if char == '?' or ord(char) < 32 or ord(char) == 127: +			return '' +		elif char == '"': +			return '' if restricted else '\'' +		elif char == ':': +			return '_-' if restricted else ' -' +		elif char in '\\/|*<>': +			return '_' +		if restricted and (char in '&\'' or char.isspace()): +			return '_' +		if restricted and ord(char) > 127:  			return '_'  		return char -	return u''.join(map(replace_insane, s)).strip('_') + +	result = u''.join(map(replace_insane, s)) +	while '__' in result: +		result = result.replace('__', '_') +	result = result.strip('_') +	# Common case of "Foreign band name - English song title" +	if restricted and result.startswith('-_'): +		result = result[2:] +	if not result: +		result = '_' +	return result  def orderedSet(iterable):  	""" Remove all duplicates from the input iterable """ | 
