diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 42 | 
1 files changed, 34 insertions, 8 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 2853ba50f..4ace22c2f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -19,13 +19,18 @@ except ImportError:  	import StringIO  std_headers = { -	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', +	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',  	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',  	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',  	'Accept-Encoding': 'gzip, deflate',  	'Accept-Language': 'en-us,en;q=0.5',  } +try: +    compat_str = unicode # Python 2 +except NameError: +    compat_str = str +  def preferredencoding():  	"""Get preferred encoding. @@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser):  		HTMLParser.HTMLParser.__init__(self)  	def error(self, message): -		print >> sys.stderr, self.getpos()  		if self.error_count > 10 or self.started:  			raise HTMLParser.HTMLParseError(message, self.getpos())  		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +194,36 @@ def timeconvert(timestr):  	if timetuple is not None:  		timestamp = email.utils.mktime_tz(timetuple)  	return timestamp -	 -def sanitize_filename(s): -	"""Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): +	"""Sanitizes a string so it could be used as part of a filename. +	If restricted is set, use a stricter subset of allowed characters. +	"""  	def replace_insane(char): -		if char in u' .\\/|?*<>:"' or ord(char) < 32: +		if char == '?' or ord(char) < 32 or ord(char) == 127: +			return '' +		elif char == '"': +			return '' if restricted else '\'' +		elif char == ':': +			return '_-' if restricted else ' -' +		elif char in '\\/|*<>': +			return '_' +		if restricted and (char in '&\'' or char.isspace()): +			return '_' +		if restricted and ord(char) > 127:  			return '_'  		return char -	return u''.join(map(replace_insane, s)).strip('_') + +	result = u''.join(map(replace_insane, s)) +	while '__' in result: +		result = result.replace('__', '_') +	result = result.strip('_') +	# Common case of "Foreign band name - English song title" +	if restricted and result.startswith('-_'): +		result = result[2:] +	if not result: +		result = '_' +	return result  def orderedSet(iterable):  	""" Remove all duplicates from the input iterable """ @@ -223,7 +249,7 @@ def encodeFilename(s):  	assert type(s) == type(u'') -	if sys.platform == 'win32' and sys.getwindowsversion().major >= 5: +	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:  		# Pass u'' directly to use Unicode APIs on Windows 2000 and up  		# (Detecting Windows NT 4 is tricky because 'major >= 4' would  		# match Windows 9x series as well. Besides, NT 4 is obsolete.) | 
