diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2012-11-26 23:58:46 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2012-11-26 23:58:46 +0100 | 
| commit | 1c469a9480e9d8bea45950898eb46e07b0c58290 (patch) | |
| tree | 36e38f86d0fc967a76c991ccd68b22c7a622024c | |
| parent | 71f36332dd9f17edef7c1f8d3b0bedc737b250e4 (diff) | |
New optoin --restrict-filenames
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | test/test_utils.py | 27 | ||||
| -rw-r--r-- | youtube-dl.1 | 4 | ||||
| -rw-r--r-- | youtube-dl.bash-completion | 2 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 65 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 14 | 
7 files changed, 77 insertions, 41 deletions
| @@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.                               %(extractor)s for the provider (youtube, metacafe,                               etc), %(id)s for the video id and %% for a literal                               percent. Use - to output to stdout. +    --restrict-filenames     Avoid some characters such as "&" and spaces in +                             filenames      -a, --batch-file FILE    file containing URLs to download ('-' for stdin)      -w, --no-overwrites      do not overwrite files      -c, --continue           resume partially downloaded files diff --git a/test/test_utils.py b/test/test_utils.py index e7c6d5b3d..0a435ddc5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase):  		self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))  		self.assertEqual(u'this - that', sanitize_filename(u'this: that')) +		self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')  		self.assertEqual(sanitize_filename(u'ä'), u'ä')  		self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') -		for forbidden in u'"\0\\/': -			self.assertTrue(forbidden not in sanitize_filename(forbidden)) +		forbidden = u'"\0\\/' +		for fc in forbidden: +			for fbc in forbidden: +				self.assertTrue(fbc not in sanitize_filename(fc)) + +	def test_sanitize_filename_restricted(self): +		self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc') +		self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e') + +		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') + +		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) +		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) + +		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) +		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) +		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) +		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) + +		forbidden = u'"\0\\/&: \'\t\n' +		for fc in forbidden: +			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) +			for fbc in forbidden: +				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))  	def test_ordered_set(self):  		self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) diff --git a/youtube-dl.1 b/youtube-dl.1 index cfaefd0c8..64120a8d2 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -59,6 +59,8 @@ redistribute it or use it however you like.  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. +--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames  -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)  -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files  -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files @@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore.  .PP  Since June 2012 (#342) youtube-dl is packed as an executable zipfile,  simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on -some systems) or clone the git repo to see the code. +some systems) or clone the git repository, as laid out above.  If you modify the code, you can run it by executing the  \f[C]__main__.py\f[] file.  To recompile the executable, run \f[C]make\ youtube-dl\f[]. diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion index 76451a2b2..dee191cd4 100644 --- a/youtube-dl.bash-completion +++ b/youtube-dl.bash-completion @@ -3,7 +3,7 @@ __youtube-dl()      local cur prev opts      COMPREPLY=()      cur="${COMP_WORDS[COMP_CWORD]}" -    opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" +    opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"      if [[ ${cur} == * ]] ; then          COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 37a842cdd..4c79be432 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -44,37 +44,38 @@ class FileDownloader(object):  	Available options: -	username:         Username for authentication purposes. -	password:         Password for authentication purposes. -	usenetrc:         Use netrc for authentication instead. -	quiet:            Do not print messages to stdout. -	forceurl:         Force printing final URL. -	forcetitle:       Force printing title. -	forcethumbnail:   Force printing thumbnail URL. -	forcedescription: Force printing description. -	forcefilename:    Force printing final filename. -	simulate:         Do not download the video files. -	format:           Video format code. -	format_limit:     Highest quality format to try. -	outtmpl:          Template for output names. -	ignoreerrors:     Do not stop on download errors. -	ratelimit:        Download speed limit, in bytes/sec. -	nooverwrites:     Prevent overwriting files. -	retries:          Number of times to retry for HTTP error 5xx -	continuedl:       Try to continue downloads if possible. -	noprogress:       Do not print the progress bar. -	playliststart:    Playlist item to start at. -	playlistend:      Playlist item to end at. -	matchtitle:       Download only matching titles. -	rejecttitle:      Reject downloads for matching titles. -	logtostderr:      Log messages to stderr instead of stdout. -	consoletitle:     Display progress in console window's titlebar. -	nopart:           Do not use temporary .part files. -	updatetime:       Use the Last-modified header to set output file timestamps. -	writedescription: Write the video description to a .description file -	writeinfojson:    Write the video description to a .info.json file -	writesubtitles:   Write the video subtitles to a .srt file -	subtitleslang:    Language of the subtitles to download +	username:          Username for authentication purposes. +	password:          Password for authentication purposes. +	usenetrc:          Use netrc for authentication instead. +	quiet:             Do not print messages to stdout. +	forceurl:          Force printing final URL. +	forcetitle:        Force printing title. +	forcethumbnail:    Force printing thumbnail URL. +	forcedescription:  Force printing description. +	forcefilename:     Force printing final filename. +	simulate:          Do not download the video files. +	format:            Video format code. +	format_limit:      Highest quality format to try. +	outtmpl:           Template for output names. +	restrictfilenames: Do not allow "&" and spaces in file names +	ignoreerrors:      Do not stop on download errors. +	ratelimit:         Download speed limit, in bytes/sec. +	nooverwrites:      Prevent overwriting files. +	retries:           Number of times to retry for HTTP error 5xx +	continuedl:        Try to continue downloads if possible. +	noprogress:        Do not print the progress bar. +	playliststart:     Playlist item to start at. +	playlistend:       Playlist item to end at. +	matchtitle:        Download only matching titles. +	rejecttitle:       Reject downloads for matching titles. +	logtostderr:       Log messages to stderr instead of stdout. +	consoletitle:      Display progress in console window's titlebar. +	nopart:            Do not use temporary .part files. +	updatetime:        Use the Last-modified header to set output file timestamps. +	writedescription:  Write the video description to a .description file +	writeinfojson:     Write the video description to a .info.json file +	writesubtitles:    Write the video subtitles to a .srt file +	subtitleslang:     Language of the subtitles to download  	"""  	params = None @@ -349,7 +350,7 @@ class FileDownloader(object):  	def process_info(self, info_dict):  		"""Process a single dictionary returned by an InfoExtractor.""" -		info_dict['stitle'] = sanitize_filename(info_dict['title']) +		info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))  		reason = self._match_entry(info_dict)  		if reason is not None: diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index bf4b55f48..1109e05cd 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -272,6 +272,9 @@ def parseOpts():  			help='number downloaded files starting from 00000', default=False)  	filesystem.add_option('-o', '--output',  			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') +	filesystem.add_option('--restrict-filenames', +			action='store_true', dest='restrictfilenames', +			help='Avoid some characters such as "&" and spaces in filenames', default=False)  	filesystem.add_option('-a', '--batch-file',  			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')  	filesystem.add_option('-w', '--no-overwrites', @@ -485,6 +488,7 @@ def _real_main():  			or (opts.useid and u'%(id)s.%(ext)s')  			or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')  			or u'%(id)s.%(ext)s'), +		'restrictfilenames': opts.restrictfilenames,  		'ignoreerrors': opts.ignoreerrors,  		'ratelimit': opts.ratelimit,  		'nooverwrites': opts.nooverwrites, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 658fd2686..55f2fe02c 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -194,18 +194,22 @@ def timeconvert(timestr):  	if timetuple is not None:  		timestamp = email.utils.mktime_tz(timetuple)  	return timestamp -	 -def sanitize_filename(s): -	"""Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): +	"""Sanitizes a string so it could be used as part of a filename. +	If restricted is set, use a stricter subset of allowed characters. +	"""  	def replace_insane(char):  		if char == '?' or ord(char) < 32 or ord(char) == 127:  			return ''  		elif char == '"': -			return '\'' +			return '' if restricted else 'FOO\''  		elif char == ':': -			return ' -' +			return '_-' if restricted else ' -'  		elif char in '\\/|*<>':  			return '-' +		if restricted and (char in '&\'' or char.isspace()): +			return '_'  		return char  	result = u''.join(map(replace_insane, s)) | 
