aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2012-11-26 23:58:46 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2012-11-26 23:58:46 +0100
commit1c469a9480e9d8bea45950898eb46e07b0c58290 (patch)
tree36e38f86d0fc967a76c991ccd68b22c7a622024c
parent71f36332dd9f17edef7c1f8d3b0bedc737b250e4 (diff)
New optoin --restrict-filenames
-rw-r--r--README.md2
-rw-r--r--test/test_utils.py27
-rw-r--r--youtube-dl.14
-rw-r--r--youtube-dl.bash-completion2
-rw-r--r--youtube_dl/FileDownloader.py65
-rw-r--r--youtube_dl/__init__.py4
-rw-r--r--youtube_dl/utils.py14
7 files changed, 77 insertions, 41 deletions
diff --git a/README.md b/README.md
index e267760d6..14acddbd0 100644
--- a/README.md
+++ b/README.md
@@ -47,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout.
+ --restrict-filenames Avoid some characters such as "&" and spaces in
+ filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files
diff --git a/test/test_utils.py b/test/test_utils.py
index e7c6d5b3d..0a435ddc5 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -30,11 +30,34 @@ class TestUtil(unittest.TestCase):
self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
+ self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
- for forbidden in u'"\0\\/':
- self.assertTrue(forbidden not in sanitize_filename(forbidden))
+ forbidden = u'"\0\\/'
+ for fc in forbidden:
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc))
+
+ def test_sanitize_filename_restricted(self):
+ self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
+ self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
+
+ self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
+
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
+ self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
+
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
+ self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
+ self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
+ self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
+
+ forbidden = u'"\0\\/&: \'\t\n'
+ for fc in forbidden:
+ print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])
diff --git a/youtube-dl.1 b/youtube-dl.1
index cfaefd0c8..64120a8d2 100644
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -59,6 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
+--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@@ -210,7 +212,7 @@ Please note that Python 2.5 is not supported anymore.
.PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
-some systems) or clone the git repo to see the code.
+some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file.
To recompile the executable, run \f[C]make\ youtube-dl\f[].
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion
index 76451a2b2..dee191cd4 100644
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
- opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
+ opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index 37a842cdd..4c79be432 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -44,37 +44,38 @@ class FileDownloader(object):
Available options:
- username: Username for authentication purposes.
- password: Password for authentication purposes.
- usenetrc: Use netrc for authentication instead.
- quiet: Do not print messages to stdout.
- forceurl: Force printing final URL.
- forcetitle: Force printing title.
- forcethumbnail: Force printing thumbnail URL.
- forcedescription: Force printing description.
- forcefilename: Force printing final filename.
- simulate: Do not download the video files.
- format: Video format code.
- format_limit: Highest quality format to try.
- outtmpl: Template for output names.
- ignoreerrors: Do not stop on download errors.
- ratelimit: Download speed limit, in bytes/sec.
- nooverwrites: Prevent overwriting files.
- retries: Number of times to retry for HTTP error 5xx
- continuedl: Try to continue downloads if possible.
- noprogress: Do not print the progress bar.
- playliststart: Playlist item to start at.
- playlistend: Playlist item to end at.
- matchtitle: Download only matching titles.
- rejecttitle: Reject downloads for matching titles.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
- nopart: Do not use temporary .part files.
- updatetime: Use the Last-modified header to set output file timestamps.
- writedescription: Write the video description to a .description file
- writeinfojson: Write the video description to a .info.json file
- writesubtitles: Write the video subtitles to a .srt file
- subtitleslang: Language of the subtitles to download
+ username: Username for authentication purposes.
+ password: Password for authentication purposes.
+ usenetrc: Use netrc for authentication instead.
+ quiet: Do not print messages to stdout.
+ forceurl: Force printing final URL.
+ forcetitle: Force printing title.
+ forcethumbnail: Force printing thumbnail URL.
+ forcedescription: Force printing description.
+ forcefilename: Force printing final filename.
+ simulate: Do not download the video files.
+ format: Video format code.
+ format_limit: Highest quality format to try.
+ outtmpl: Template for output names.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ ignoreerrors: Do not stop on download errors.
+ ratelimit: Download speed limit, in bytes/sec.
+ nooverwrites: Prevent overwriting files.
+ retries: Number of times to retry for HTTP error 5xx
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ playliststart: Playlist item to start at.
+ playlistend: Playlist item to end at.
+ matchtitle: Download only matching titles.
+ rejecttitle: Reject downloads for matching titles.
+ logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ writedescription: Write the video description to a .description file
+ writeinfojson: Write the video description to a .info.json file
+ writesubtitles: Write the video subtitles to a .srt file
+ subtitleslang: Language of the subtitles to download
"""
params = None
@@ -349,7 +350,7 @@ class FileDownloader(object):
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
- info_dict['stitle'] = sanitize_filename(info_dict['title'])
+ info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
reason = self._match_entry(info_dict)
if reason is not None:
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index bf4b55f48..1109e05cd 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -272,6 +272,9 @@ def parseOpts():
help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
+ filesystem.add_option('--restrict-filenames',
+ action='store_true', dest='restrictfilenames',
+ help='Avoid some characters such as "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites',
@@ -485,6 +488,7 @@ def _real_main():
or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'),
+ 'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 658fd2686..55f2fe02c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -194,18 +194,22 @@ def timeconvert(timestr):
if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
-
-def sanitize_filename(s):
- """Sanitizes a string so it could be used as part of a filename."""
+
+def sanitize_filename(s, restricted=False):
+ """Sanitizes a string so it could be used as part of a filename.
+ If restricted is set, use a stricter subset of allowed characters.
+ """
def replace_insane(char):
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':
- return '\''
+ return '' if restricted else 'FOO\''
elif char == ':':
- return ' -'
+ return '_-' if restricted else ' -'
elif char in '\\/|*<>':
return '-'
+ if restricted and (char in '&\'' or char.isspace()):
+ return '_'
return char
result = u''.join(map(replace_insane, s))