aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--LATEST_VERSION2
-rw-r--r--Makefile27
-rw-r--r--README.md15
-rw-r--r--test/test_utils.py36
-rwxr-xr-xyoutube-dlbin43730 -> 44592 bytes
-rw-r--r--youtube-dl.119
-rw-r--r--youtube-dl.bash-completion2
-rwxr-xr-xyoutube-dl.exebin3994108 -> 3995263 bytes
-rw-r--r--youtube_dl/FileDownloader.py105
-rw-r--r--youtube_dl/InfoExtractors.py142
-rw-r--r--youtube_dl/PostProcessor.py4
-rw-r--r--youtube_dl/__init__.py46
-rw-r--r--youtube_dl/utils.py30
13 files changed, 259 insertions, 169 deletions
diff --git a/LATEST_VERSION b/LATEST_VERSION
index d070c6ea3..5db716ac7 100644
--- a/LATEST_VERSION
+++ b/LATEST_VERSION
@@ -1 +1 @@
-2012.10.09
+2012.11.27
diff --git a/Makefile b/Makefile
index c3bbf2a7f..aea967148 100644
--- a/Makefile
+++ b/Makefile
@@ -5,12 +5,22 @@ clean:
rm -f youtube-dl youtube-dl.exe youtube-dl.1 LATEST_VERSION
PREFIX=/usr/local
+BINDIR=$(PREFIX)/bin
+MANDIR=$(PREFIX)/man
+SYSCONFDIR=/etc
+
install: youtube-dl youtube-dl.1 youtube-dl.bash-completion
- install -m 755 --owner root --group root youtube-dl $(PREFIX)/bin/
- install -m 644 --owner root --group root youtube-dl.1 $(PREFIX)/man/man1
- install -m 644 --owner root --group root youtube-dl.bash-completion /etc/bash_completion.d/youtube-dl
+ install -d $(DESTDIR)$(BINDIR)
+ install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
+ install -d $(DESTDIR)$(MANDIR)/man1
+ install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
+ install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
+ install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
+
+test:
+ nosetests2 --nocapture test
-.PHONY: all clean install README.md youtube-dl.bash-completion
+.PHONY: all clean install test README.md youtube-dl.bash-completion
# TODO un-phony README.md and youtube-dl.bash_completion by reading from .in files and generating from them
youtube-dl: youtube_dl/*.py
@@ -26,13 +36,13 @@ youtube-dl.exe: youtube_dl/*.py
README.md: youtube_dl/*.py
@options=$$(COLUMNS=80 python -m youtube_dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/## \1/') && \
header=$$(sed -e '/.*# OPTIONS/,$$ d' README.md) && \
- footer=$$(sed -e '1,/.*# FAQ/ d' README.md) && \
+ footer=$$(sed -e '1,/.*# CONFIGURATION/ d' README.md) && \
echo "$${header}" > README.md && \
echo >> README.md && \
echo '# OPTIONS' >> README.md && \
echo "$${options}" >> README.md&& \
echo >> README.md && \
- echo '# FAQ' >> README.md && \
+ echo '# CONFIGURATION' >> README.md && \
echo "$${footer}" >> README.md
youtube-dl.1: README.md
@@ -45,8 +55,3 @@ youtube-dl.bash-completion: README.md
LATEST_VERSION: youtube_dl/__init__.py
python -m youtube_dl --version > LATEST_VERSION
-
-test:
- nosetests2 --nocapture test
-
-.PHONY: default compile update update-latest update-readme test clean
diff --git a/README.md b/README.md
index 64a64c610..14acddbd0 100644
--- a/README.md
+++ b/README.md
@@ -36,9 +36,10 @@ which means you can modify it, redistribute it or use it however you like.
## Filesystem Options:
-t, --title use title in file name
- -l, --literal use literal title in file name
+ --id use video ID in file name
+ -l, --literal [deprecated] alias of --title
-A, --auto-number number downloaded files starting from 00000
- -o, --output TEMPLATE output filename template. Use %(stitle)s to get the
+ -o, --output TEMPLATE output filename template. Use %(title)s to get the
title, %(uploader)s for the uploader name,
%(autonumber)s to get an automatically incremented
number, %(ext)s for the filename extension,
@@ -46,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like.
%(extractor)s for the provider (youtube, metacafe,
etc), %(id)s for the video id and %% for a literal
percent. Use - to output to stdout.
+ --restrict-filenames Avoid some characters such as "&" and spaces in
+ filenames
-a, --batch-file FILE file containing URLs to download ('-' for stdin)
-w, --no-overwrites do not overwrite files
-c, --continue resume partially downloaded files
@@ -91,7 +94,7 @@ which means you can modify it, redistribute it or use it however you like.
-n, --netrc use .netrc authentication data
## Post-processing Options:
- --extract-audio convert video files to audio-only files (requires
+ -x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
best by default
@@ -101,6 +104,10 @@ which means you can modify it, redistribute it or use it however you like.
-k, --keep-video keeps the video file on disk after the post-
processing; the video is erased by default
+# CONFIGURATION
+
+You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`.
+
# FAQ
### Can you please put the -b option back?
@@ -146,7 +153,7 @@ Please note that Python 2.5 is not supported anymore.
### What is this binary file? Where has the code gone?
-Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repo to see the code. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make compile`.
+Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
### The exe throws a *Runtime error from Visual C++*
diff --git a/test/test_utils.py b/test/test_utils.py
index eb0af703f..0a435ddc5 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -22,17 +22,43 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(u'123'), u'123')
- self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))
- self.assertTrue(u'de' in sanitize_filename(u'abc/de'))
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/de'))
self.assertFalse(u'/' in sanitize_filename(u'abc/de///'))
- self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de'))
- self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de'))
- self.assertTrue(u'de' in sanitize_filename(u'abc\\de'))
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de'))
+ self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))
+ self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))
+ self.assertEqual(u'this - that', sanitize_filename(u'this: that'))
+ self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T')
self.assertEqual(sanitize_filename(u'ä'), u'ä')
self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица')
+ forbidden = u'"\0\\/'
+ for fc in forbidden:
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc))
+
+ def test_sanitize_filename_restricted(self):
+ self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc')
+ self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e')
+
+ self.assertEqual(sanitize_filename(u'123', restricted=True), u'123')
+
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True))
+ self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True))
+
+ self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True))
+ self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))
+ self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))
+ self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True))
+
+ forbidden = u'"\0\\/&: \'\t\n'
+ for fc in forbidden:
+ print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))
+ for fbc in forbidden:
+ self.assertTrue(fbc not in sanitize_filename(fc, restricted=True))
+
def test_ordered_set(self):
self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7])
self.assertEqual(orderedSet([]), [])
diff --git a/youtube-dl b/youtube-dl
index 4da0fcb96..064508692 100755
--- a/youtube-dl
+++ b/youtube-dl
Binary files differ
diff --git a/youtube-dl.1 b/youtube-dl.1
index a3100e9e4..64120a8d2 100644
--- a/youtube-dl.1
+++ b/youtube-dl.1
@@ -48,9 +48,10 @@ redistribute it or use it however you like.
.nf
\f[C]
-t,\ --title\ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name
--l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ use\ literal\ title\ in\ file\ name
+--id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ video\ ID\ in\ file\ name
+-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ --title
-A,\ --auto-number\ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000
--o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(stitle)s\ to\ get\ the
+-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get\ the
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ title,\ %(uploader)s\ for\ the\ uploader\ name,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically\ incremented
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ number,\ %(ext)s\ for\ the\ filename\ extension,
@@ -58,6 +59,8 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout.
+--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in
+\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames
-a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)
-w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files
-c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files
@@ -119,7 +122,7 @@ redistribute it or use it however you like.
.IP
.nf
\f[C]
---extract-audio\ \ \ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires
+-x,\ --extract-audio\ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe)
--audio-format\ FORMAT\ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ or\ "wav";
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ best\ by\ default
@@ -130,6 +133,12 @@ redistribute it or use it however you like.
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default
\f[]
.fi
+.SH CONFIGURATION
+.PP
+You can configure youtube-dl by placing default arguments (such as
+\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not
+copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or
+\f[C]~/.local/config/youtube-dl.conf\f[].
.SH FAQ
.SS Can you please put the -b option back?
.PP
@@ -203,10 +212,10 @@ Please note that Python 2.5 is not supported anymore.
.PP
Since June 2012 (#342) youtube-dl is packed as an executable zipfile,
simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on
-some systems) or clone the git repo to see the code.
+some systems) or clone the git repository, as laid out above.
If you modify the code, you can run it by executing the
\f[C]__main__.py\f[] file.
-To recompile the executable, run \f[C]make\ compile\f[].
+To recompile the executable, run \f[C]make\ youtube-dl\f[].
.SS The exe throws a \f[I]Runtime error from Visual C++\f[]
.PP
To run the exe you need to install first the Microsoft Visual C++ 2008
diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion
index 1eca2adf3..dee191cd4 100644
--- a/youtube-dl.bash-completion
+++ b/youtube-dl.bash-completion
@@ -3,7 +3,7 @@ __youtube-dl()
local cur prev opts
COMPREPLY=()
cur="${COMP_WORDS[COMP_CWORD]}"
- opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
+ opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt"
if [[ ${cur} == * ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
diff --git a/youtube-dl.exe b/youtube-dl.exe
index 9341e800f..48ca04c29 100755
--- a/youtube-dl.exe
+++ b/youtube-dl.exe
Binary files differ
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py
index ed5a79f13..4c79be432 100644
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -44,37 +44,38 @@ class FileDownloader(object):
Available options:
- username: Username for authentication purposes.
- password: Password for authentication purposes.
- usenetrc: Use netrc for authentication instead.
- quiet: Do not print messages to stdout.
- forceurl: Force printing final URL.
- forcetitle: Force printing title.
- forcethumbnail: Force printing thumbnail URL.
- forcedescription: Force printing description.
- forcefilename: Force printing final filename.
- simulate: Do not download the video files.
- format: Video format code.
- format_limit: Highest quality format to try.
- outtmpl: Template for output names.
- ignoreerrors: Do not stop on download errors.
- ratelimit: Download speed limit, in bytes/sec.
- nooverwrites: Prevent overwriting files.
- retries: Number of times to retry for HTTP error 5xx
- continuedl: Try to continue downloads if possible.
- noprogress: Do not print the progress bar.
- playliststart: Playlist item to start at.
- playlistend: Playlist item to end at.
- matchtitle: Download only matching titles.
- rejecttitle: Reject downloads for matching titles.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
- nopart: Do not use temporary .part files.
- updatetime: Use the Last-modified header to set output file timestamps.
- writedescription: Write the video description to a .description file
- writeinfojson: Write the video description to a .info.json file
- writesubtitles: Write the video subtitles to a .srt file
- subtitleslang: Language of the subtitles to download
+ username: Username for authentication purposes.
+ password: Password for authentication purposes.
+ usenetrc: Use netrc for authentication instead.
+ quiet: Do not print messages to stdout.
+ forceurl: Force printing final URL.
+ forcetitle: Force printing title.
+ forcethumbnail: Force printing thumbnail URL.
+ forcedescription: Force printing description.
+ forcefilename: Force printing final filename.
+ simulate: Do not download the video files.
+ format: Video format code.
+ format_limit: Highest quality format to try.
+ outtmpl: Template for output names.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ ignoreerrors: Do not stop on download errors.
+ ratelimit: Download speed limit, in bytes/sec.
+ nooverwrites: Prevent overwriting files.
+ retries: Number of times to retry for HTTP error 5xx
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ playliststart: Playlist item to start at.
+ playlistend: Playlist item to end at.
+ matchtitle: Download only matching titles.
+ rejecttitle: Reject downloads for matching titles.
+ logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ writedescription: Write the video description to a .description file
+ writeinfojson: Write the video description to a .info.json file
+ writesubtitles: Write the video subtitles to a .srt file
+ subtitleslang: Language of the subtitles to download
"""
params = None
@@ -139,23 +140,23 @@ class FileDownloader(object):
new_min = max(bytes / 2.0, 1.0)
new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
if elapsed_time < 0.001:
- return long(new_max)
+ return int(new_max)
rate = bytes / elapsed_time
if rate > new_max:
- return long(new_max)
+ return int(new_max)
if rate < new_min:
- return long(new_min)
- return long(rate)
+ return int(new_min)
+ return int(rate)
@staticmethod
def parse_bytes(bytestr):
- """Parse a string indicating a byte quantity into a long integer."""
+ """Parse a string indicating a byte quantity into an integer."""
matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
if matchobj is None:
return None
number = float(matchobj.group(1))
multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
- return long(round(number * multiplier))
+ return int(round(number * multiplier))
def add_info_extractor(self, ie):
"""Add an InfoExtractor object to the end of the list."""
@@ -180,7 +181,8 @@ class FileDownloader(object):
def to_stderr(self, message):
"""Print message to stderr."""
- print >>sys.stderr, message.encode(preferredencoding())
+ assert type(message) == type(u'')
+ sys.stderr.write((message + u'\n').encode(preferredencoding()))
def to_cons_title(self, message):
"""Set console/terminal window title to message."""
@@ -322,6 +324,7 @@ class FileDownloader(object):
template_dict = dict(info_dict)
template_dict['epoch'] = unicode(long(time.time()))
template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+ template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility
filename = self.params['outtmpl'] % template_dict
return filename
except (ValueError, KeyError), err:
@@ -333,17 +336,21 @@ class FileDownloader(object):
title = info_dict['title']
matchtitle = self.params.get('matchtitle', False)
- if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
- return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+ if matchtitle:
+ matchtitle = matchtitle.decode('utf8')
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
- return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ if rejecttitle:
+ rejecttitle = rejecttitle.decode('utf8')
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
return None
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
- info_dict['stitle'] = sanitize_filename(info_dict['title'])
+ info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames'))
reason = self._match_entry(info_dict)
if reason is not None:
@@ -359,17 +366,17 @@ class FileDownloader(object):
# Forced printings
if self.params.get('forcetitle', False):
- print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
+ print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forceurl', False):
- print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
+ print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
- print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
+ print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcedescription', False) and 'description' in info_dict:
- print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+ print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forcefilename', False) and filename is not None:
- print filename.encode(preferredencoding(), 'xmlcharrefreplace')
+ print(filename.encode(preferredencoding(), 'xmlcharrefreplace'))
if self.params.get('forceformat', False):
- print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
+ print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace'))
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 6e59ba8fd..35ba6cc5c 100644
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -213,9 +213,9 @@ class YoutubeIE(InfoExtractor):
return srt
def _print_formats(self, formats):
- print 'Available formats:'
+ print('Available formats:')
for x in formats:
- print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
+ print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
def _real_initialize(self):
if self._downloader is None:
@@ -238,7 +238,7 @@ class YoutubeIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
- self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
# Set language
@@ -247,7 +247,7 @@ class YoutubeIE(InfoExtractor):
self.report_lang()
urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
return
# No authentication to be performed
@@ -270,7 +270,7 @@ class YoutubeIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
# Confirm age
@@ -283,7 +283,7 @@ class YoutubeIE(InfoExtractor):
self.report_age_confirmation()
age_results = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -305,7 +305,7 @@ class YoutubeIE(InfoExtractor):
try:
video_webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Attempt to extract SWF player URL
@@ -327,7 +327,7 @@ class YoutubeIE(InfoExtractor):
if 'token' in video_info:
break
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if 'token' not in video_info:
if 'reason' in video_info:
@@ -390,7 +390,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_list = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
if not srt_lang_list:
@@ -407,7 +407,7 @@ class YoutubeIE(InfoExtractor):
try:
srt_xml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+ raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
if not srt_xml:
raise Trouble(u'WARNING: unable to download video subtitles')
video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@@ -526,7 +526,7 @@ class MetacafeIE(InfoExtractor):
self.report_disclaimer()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
return
# Confirm age
@@ -539,7 +539,7 @@ class MetacafeIE(InfoExtractor):
self.report_age_confirmation()
disclaimer = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -563,7 +563,7 @@ class MetacafeIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -603,7 +603,7 @@ class MetacafeIE(InfoExtractor):
return
video_title = mobj.group(1).decode('utf-8')
- mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
+ mobj = re.search(r'submitter=(.*?);', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
@@ -656,7 +656,7 @@ class DailymotionIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader and title from webpage
@@ -692,9 +692,14 @@ class DailymotionIE(InfoExtractor):
video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
video_uploader = u'NA'
- mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
+ mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
if mobj is None:
- self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+ # lookin for official user
+ mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage)
+ if mobj_official is None:
+ self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+ else:
+ video_uploader = mobj_official.group(1)
else:
video_uploader = mobj.group(1)
@@ -749,7 +754,7 @@ class GoogleIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -788,7 +793,7 @@ class GoogleIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
if mobj is None:
@@ -844,7 +849,7 @@ class PhotobucketIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract URL, uploader, and title from webpage
@@ -914,7 +919,7 @@ class YahooIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -938,7 +943,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract uploader and title from webpage
@@ -996,7 +1001,7 @@ class YahooIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Extract media URL from playlist XML
@@ -1025,7 +1030,7 @@ class VimeoIE(InfoExtractor):
"""Information extractor for vimeo.com."""
# _VALID_URL matches Vimeo URLs
- _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
+ _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
IE_NAME = u'vimeo'
def __init__(self, downloader=None):
@@ -1054,7 +1059,7 @@ class VimeoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
# Now we begin extracting as much information as we can from what we
@@ -1095,21 +1100,32 @@ class VimeoIE(InfoExtractor):
timestamp = config['request']['timestamp']
# Vimeo specific: extract video codec and quality information
+ # First consider quality, then codecs, then take everything
# TODO bind to format param
codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')]
- for codec in codecs:
- if codec[0] in config["video"]["files"]:
- video_codec = codec[0]
- video_extension = codec[1]
- if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd'
- else: quality = 'sd'
+ files = { 'hd': [], 'sd': [], 'other': []}
+ for codec_name, codec_extension in codecs:
+ if codec_name in config["video"]["files"]:
+ if 'hd' in config["video"]["files"][codec_name]:
+ files['hd'].append((codec_name, codec_extension, 'hd'))
+ elif 'sd' in config["video"]["files"][codec_name]:
+ files['sd'].append((codec_name, codec_extension, 'sd'))
+ else:
+ files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0]))
+
+ for quality in ('hd', 'sd', 'other'):
+ if len(files[quality]) > 0:
+ video_quality = files[quality][0][2]
+ video_codec = files[quality][0][0]
+ video_extension = files[quality][0][1]
+ self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
break
else:
self._downloader.trouble(u'ERROR: no known codec found')
return
video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
- %(video_id, sig, timestamp, quality, video_codec.upper())
+ %(video_id, sig, timestamp, video_quality, video_codec.upper())
return [{
'id': video_id,
@@ -1209,7 +1225,7 @@ class GenericIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
except ValueError, err:
# since this is the last-resort InfoExtractor, if
@@ -1330,7 +1346,7 @@ class YoutubeSearchIE(InfoExtractor):
try:
data = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
return
api_response = json.loads(data)['data']
@@ -1407,7 +1423,7 @@ class GoogleSearchIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1490,7 +1506,7 @@ class YahooSearchIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1560,7 +1576,7 @@ class YoutubePlaylistIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1617,7 +1633,7 @@ class YoutubeChannelIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1680,7 +1696,7 @@ class YoutubeUserIE(InfoExtractor):
try:
page = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
# Extract video identifiers
@@ -1752,7 +1768,7 @@ class BlipTVUserIE(InfoExtractor):
mobj = re.search(r'data-users-id="([^"]+)"', page)
page_base = page_base % mobj.group(1)
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
return
@@ -1840,7 +1856,7 @@ class DepositFilesIE(InfoExtractor):
self.report_download_webpage(file_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
return
# Search for the real file URL
@@ -1957,7 +1973,7 @@ class FacebookIE(InfoExtractor):
else:
raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
except (IOError, netrc.NetrcParseError), err:
- self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
return
if useremail is None:
@@ -1977,7 +1993,7 @@ class FacebookIE(InfoExtractor):
self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
return
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+ self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
return
def _real_extract(self, url):
@@ -1994,7 +2010,7 @@ class FacebookIE(InfoExtractor):
page = urllib2.urlopen(request)
video_webpage = page.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
# Start extracting information
@@ -2128,13 +2144,13 @@ class BlipTVIE(InfoExtractor):
'urlhandle': urlh
}
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
return
if info is None: # Regular URL
try:
json_code = urlh.read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
return
try:
@@ -2202,7 +2218,7 @@ class MyVideoIE(InfoExtractor):
self.report_download_webpage(video_id)
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -2464,7 +2480,7 @@ class CollegeHumorIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@@ -2483,7 +2499,7 @@ class CollegeHumorIE(InfoExtractor):
try:
metaXml = urllib2.urlopen(xmlUrl).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metaXml)
@@ -2529,7 +2545,7 @@ class XVideosIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -2615,7 +2631,7 @@ class SoundcloudIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction('%s/%s' % (uploader, slug_title))
@@ -2642,7 +2658,7 @@ class SoundcloudIE(InfoExtractor):
mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
if mobj:
description = mobj.group(1)
-
+
# upload date
upload_date = None
mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
@@ -2650,7 +2666,7 @@ class SoundcloudIE(InfoExtractor):
try:
upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
except Exception, e:
- self._downloader.to_stderr(str(e))
+ self._downloader.to_stderr(compat_str(e))
# for soundcloud, a request to a cross domain is required for cookies
request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@@ -2694,7 +2710,7 @@ class InfoQIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
self.report_extraction(url)
@@ -2780,15 +2796,15 @@ class MixcloudIE(InfoExtractor):
return None
def _print_formats(self, formats):
- print 'Available formats:'
+ print('Available formats:')
for fmt in formats.keys():
for b in formats[fmt]:
try:
ext = formats[fmt][b][0]
- print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
+ print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
except TypeError: # we have no bitrate info
ext = formats[fmt][0]
- print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
+ print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
break
def _real_extract(self, url):
@@ -2808,7 +2824,7 @@ class MixcloudIE(InfoExtractor):
self.report_download_json(file_url)
jsonData = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
return
# parse JSON
@@ -2992,7 +3008,7 @@ class MTVIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@@ -3025,7 +3041,7 @@ class MTVIE(InfoExtractor):
try:
metadataXml = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
+ self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
return
mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@@ -3112,7 +3128,7 @@ class YoukuIE(InfoExtractor):
self.report_download_webpage(video_id)
jsondata = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extraction(video_id)
@@ -3288,7 +3304,7 @@ class GooglePlusIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
return
# Extract update date
@@ -3330,7 +3346,7 @@ class GooglePlusIE(InfoExtractor):
try:
webpage = urllib2.urlopen(request).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
- self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+ self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
self.report_extract_vid_page(video_page)
diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py
index d14fe71a2..0501cc7f6 100644
--- a/youtube_dl/PostProcessor.py
+++ b/youtube_dl/PostProcessor.py
@@ -146,7 +146,7 @@ class FFmpegExtractAudioPP(PostProcessor):
if int(self._preferredquality) < 10:
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
- more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
+ more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
else:
# We convert the audio (lossy)
acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
@@ -156,7 +156,7 @@ class FFmpegExtractAudioPP(PostProcessor):
if int(self._preferredquality) < 10:
more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
else:
- more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality]
+ more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
if self._preferredcodec == 'aac':
more_opts += ['-f', 'adts']
if self._preferredcodec == 'm4a':
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index ad9a06c55..cbf1dd1a7 100644
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -21,7 +21,7 @@ __authors__ = (
)
__license__ = 'Public Domain'
-__version__ = '2012.10.09'
+__version__ = '2012.11.27'
UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
@@ -48,7 +48,7 @@ from PostProcessor import *
def updateSelf(downloader, filename):
''' Update the program file with the latest version from the repository '''
# Note: downloader only used for options
-
+
if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
@@ -66,7 +66,7 @@ def updateSelf(downloader, filename):
directory = os.path.dirname(exe)
if not os.access(directory, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % directory)
-
+
try:
urlh = urllib2.urlopen(UPDATE_URL_EXE)
newcontent = urlh.read()
@@ -75,20 +75,18 @@ def updateSelf(downloader, filename):
outf.write(newcontent)
except (IOError, OSError), err:
sys.exit('ERROR: unable to download latest version')
-
+
try:
bat = os.path.join(directory, 'youtube-dl-updater.bat')
b = open(bat, 'w')
-
- print >> b, """
+ b.write("""
echo Updating youtube-dl...
ping 127.0.0.1 -n 5 -w 1000 > NUL
move /Y "%s.new" "%s"
del "%s"
- """ %(exe, exe, bat)
-
+ \n""" %(exe, exe, bat))
b.close()
-
+
os.startfile(bat)
except (IOError, OSError), err:
sys.exit('ERROR: unable to overwrite current version')
@@ -265,13 +263,18 @@ def parseOpts():
filesystem.add_option('-t', '--title',
action='store_true', dest='usetitle', help='use title in file name', default=False)
+ filesystem.add_option('--id',
+ action='store_true', dest='useid', help='use video ID in file name', default=False)
filesystem.add_option('-l', '--literal',
- action='store_true', dest='useliteral', help='use literal title in file name', default=False)
+ action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False)
filesystem.add_option('-A', '--auto-number',
action='store_true', dest='autonumber',
help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output',
- dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
+ dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')
+ filesystem.add_option('--restrict-filenames',
+ action='store_true', dest='restrictfilenames',
+ help='Avoid some characters such as "&" and spaces in filenames', default=False)
filesystem.add_option('-a', '--batch-file',
dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
filesystem.add_option('-w', '--no-overwrites',
@@ -296,7 +299,7 @@ def parseOpts():
help='write video metadata to a .info.json file', default=False)
- postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
+ postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False,
help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
@@ -424,10 +427,10 @@ def _real_main():
parser.error(u'using .netrc conflicts with giving username/password')
if opts.password is not None and opts.username is None:
parser.error(u'account username missing')
- if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
- parser.error(u'using output template conflicts with using title, literal title or auto number')
- if opts.usetitle and opts.useliteral:
- parser.error(u'using title conflicts with using literal title')
+ if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
+ parser.error(u'using output template conflicts with using title, video ID or auto number')
+ if opts.usetitle and opts.useid:
+ parser.error(u'using title conflicts with using video ID')
if opts.username is not None and opts.password is None:
opts.password = getpass.getpass(u'Type account password and press return:')
if opts.ratelimit is not None:
@@ -478,15 +481,14 @@ def _real_main():
'format_limit': opts.format_limit,
'listformats': opts.listformats,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
- or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
- or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
+ or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s')
or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
- or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
- or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
- or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
- or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s')
+ or (opts.useid and u'%(id)s.%(ext)s')
or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
or u'%(id)s.%(ext)s'),
+ 'restrictfilenames': opts.restrictfilenames,
'ignoreerrors': opts.ignoreerrors,
'ratelimit': opts.ratelimit,
'nooverwrites': opts.nooverwrites,
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 839da17d0..1f60d34ae 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -26,6 +26,11 @@ std_headers = {
'Accept-Language': 'en-us,en;q=0.5',
}
+try:
+ compat_str = unicode # Python 2
+except NameError:
+ compat_str = str
+
def preferredencoding():
"""Get preferred encoding.
@@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser):
HTMLParser.HTMLParser.__init__(self)
def error(self, message):
- print >> sys.stderr, self.getpos()
if self.error_count > 10 or self.started:
raise HTMLParser.HTMLParseError(message, self.getpos())
self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
@@ -190,14 +194,28 @@ def timeconvert(timestr):
if timetuple is not None:
timestamp = email.utils.mktime_tz(timetuple)
return timestamp
-
-def sanitize_filename(s):
- """Sanitizes a string so it could be used as part of a filename."""
+
+def sanitize_filename(s, restricted=False):
+ """Sanitizes a string so it could be used as part of a filename.
+ If restricted is set, use a stricter subset of allowed characters.
+ """
def replace_insane(char):
- if char in u' .\\/|?*<>:"' or ord(char) < 32:
+ if char == '?' or ord(char) < 32 or ord(char) == 127:
+ return ''
+ elif char == '"':
+ return '' if restricted else '\''
+ elif char == ':':
+ return '_-' if restricted else ' -'
+ elif char in '\\/|*<>':
+ return '-'
+ if restricted and (char in '&\'' or char.isspace()):
return '_'
return char
- return u''.join(map(replace_insane, s)).strip('_')
+
+ result = u''.join(map(replace_insane, s))
+ while '--' in result:
+ result = result.replace('--', '-')
+ return result.strip('-')
def orderedSet(iterable):
""" Remove all duplicates from the input iterable """