diff options
-rw-r--r-- | LATEST_VERSION | 2 | ||||
-rw-r--r-- | Makefile | 27 | ||||
-rw-r--r-- | README.md | 15 | ||||
-rw-r--r-- | test/test_utils.py | 36 | ||||
-rwxr-xr-x | youtube-dl | bin | 43730 -> 44592 bytes | |||
-rw-r--r-- | youtube-dl.1 | 19 | ||||
-rw-r--r-- | youtube-dl.bash-completion | 2 | ||||
-rwxr-xr-x | youtube-dl.exe | bin | 3994108 -> 3995263 bytes | |||
-rw-r--r-- | youtube_dl/FileDownloader.py | 105 | ||||
-rw-r--r-- | youtube_dl/InfoExtractors.py | 142 | ||||
-rw-r--r-- | youtube_dl/PostProcessor.py | 4 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 46 | ||||
-rw-r--r-- | youtube_dl/utils.py | 30 |
13 files changed, 259 insertions, 169 deletions
diff --git a/LATEST_VERSION b/LATEST_VERSION index d070c6ea3..5db716ac7 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2012.10.09 +2012.11.27 @@ -5,12 +5,22 @@ clean: rm -f youtube-dl youtube-dl.exe youtube-dl.1 LATEST_VERSION PREFIX=/usr/local +BINDIR=$(PREFIX)/bin +MANDIR=$(PREFIX)/man +SYSCONFDIR=/etc + install: youtube-dl youtube-dl.1 youtube-dl.bash-completion - install -m 755 --owner root --group root youtube-dl $(PREFIX)/bin/ - install -m 644 --owner root --group root youtube-dl.1 $(PREFIX)/man/man1 - install -m 644 --owner root --group root youtube-dl.bash-completion /etc/bash_completion.d/youtube-dl + install -d $(DESTDIR)$(BINDIR) + install -m 755 youtube-dl $(DESTDIR)$(BINDIR) + install -d $(DESTDIR)$(MANDIR)/man1 + install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1 + install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d + install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl + +test: + nosetests2 --nocapture test -.PHONY: all clean install README.md youtube-dl.bash-completion +.PHONY: all clean install test README.md youtube-dl.bash-completion # TODO un-phony README.md and youtube-dl.bash_completion by reading from .in files and generating from them youtube-dl: youtube_dl/*.py @@ -26,13 +36,13 @@ youtube-dl.exe: youtube_dl/*.py README.md: youtube_dl/*.py @options=$$(COLUMNS=80 python -m youtube_dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/## \1/') && \ header=$$(sed -e '/.*# OPTIONS/,$$ d' README.md) && \ - footer=$$(sed -e '1,/.*# FAQ/ d' README.md) && \ + footer=$$(sed -e '1,/.*# CONFIGURATION/ d' README.md) && \ echo "$${header}" > README.md && \ echo >> README.md && \ echo '# OPTIONS' >> README.md && \ echo "$${options}" >> README.md&& \ echo >> README.md && \ - echo '# FAQ' >> README.md && \ + echo '# CONFIGURATION' >> README.md && \ echo "$${footer}" >> README.md youtube-dl.1: README.md @@ -45,8 +55,3 @@ youtube-dl.bash-completion: README.md LATEST_VERSION: youtube_dl/__init__.py python -m youtube_dl --version > LATEST_VERSION - -test: - nosetests2 --nocapture test - -.PHONY: default compile update update-latest update-readme test clean @@ -36,9 +36,10 @@ which means you can modify it, redistribute it or use it however you like. ## Filesystem Options: -t, --title use title in file name - -l, --literal use literal title in file name + --id use video ID in file name + -l, --literal [deprecated] alias of --title -A, --auto-number number downloaded files starting from 00000 - -o, --output TEMPLATE output filename template. Use %(stitle)s to get the + -o, --output TEMPLATE output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, @@ -46,6 +47,8 @@ which means you can modify it, redistribute it or use it however you like. %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout. + --restrict-filenames Avoid some characters such as "&" and spaces in + filenames -a, --batch-file FILE file containing URLs to download ('-' for stdin) -w, --no-overwrites do not overwrite files -c, --continue resume partially downloaded files @@ -91,7 +94,7 @@ which means you can modify it, redistribute it or use it however you like. -n, --netrc use .netrc authentication data ## Post-processing Options: - --extract-audio convert video files to audio-only files (requires + -x, --extract-audio convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe) --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default @@ -101,6 +104,10 @@ which means you can modify it, redistribute it or use it however you like. -k, --keep-video keeps the video file on disk after the post- processing; the video is erased by default +# CONFIGURATION + +You can configure youtube-dl by placing default arguments (such as `--extract-audio --no-mtime` to always extract the audio and not copy the mtime) into `/etc/youtube-dl.conf` and/or `~/.local/config/youtube-dl.conf`. + # FAQ ### Can you please put the -b option back? @@ -146,7 +153,7 @@ Please note that Python 2.5 is not supported anymore. ### What is this binary file? Where has the code gone? -Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repo to see the code. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make compile`. +Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`. ### The exe throws a *Runtime error from Visual C++* diff --git a/test/test_utils.py b/test/test_utils.py index eb0af703f..0a435ddc5 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,17 +22,43 @@ class TestUtil(unittest.TestCase): self.assertEqual(sanitize_filename(u'123'), u'123') - self.assertEqual(u'abc_de', sanitize_filename(u'abc/de')) - self.assertTrue(u'de' in sanitize_filename(u'abc/de')) + self.assertEqual(u'abc-de', sanitize_filename(u'abc/de')) self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) - self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de')) - self.assertEqual(u'abc_de', sanitize_filename(u'abc\\de')) - self.assertTrue(u'de' in sanitize_filename(u'abc\\de')) + self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de')) + self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|')) + self.assertEqual(u'yes no', sanitize_filename(u'yes? no')) + self.assertEqual(u'this - that', sanitize_filename(u'this: that')) + self.assertEqual(sanitize_filename(u'AT&T'), u'AT&T') self.assertEqual(sanitize_filename(u'ä'), u'ä') self.assertEqual(sanitize_filename(u'кириллица'), u'кириллица') + forbidden = u'"\0\\/' + for fc in forbidden: + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc)) + + def test_sanitize_filename_restricted(self): + self.assertEqual(sanitize_filename(u'abc', restricted=True), u'abc') + self.assertEqual(sanitize_filename(u'abc_d-e', restricted=True), u'abc_d-e') + + self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') + + self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) + self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) + + self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) + self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True)) + self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True)) + self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) + + forbidden = u'"\0\\/&: \'\t\n' + for fc in forbidden: + print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True))) + for fbc in forbidden: + self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) + def test_ordered_set(self): self.assertEqual(orderedSet([1,1,2,3,4,4,5,6,7,3,5]), [1,2,3,4,5,6,7]) self.assertEqual(orderedSet([]), []) diff --git a/youtube-dl b/youtube-dl Binary files differindex 4da0fcb96..064508692 100755 --- a/youtube-dl +++ b/youtube-dl diff --git a/youtube-dl.1 b/youtube-dl.1 index a3100e9e4..64120a8d2 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -48,9 +48,10 @@ redistribute it or use it however you like. .nf \f[C] -t,\ --title\ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ title\ in\ file\ name --l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ use\ literal\ title\ in\ file\ name +--id\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ use\ video\ ID\ in\ file\ name +-l,\ --literal\ \ \ \ \ \ \ \ \ \ \ \ [deprecated]\ alias\ of\ --title -A,\ --auto-number\ \ \ \ \ \ \ \ number\ downloaded\ files\ starting\ from\ 00000 --o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(stitle)s\ to\ get\ the +-o,\ --output\ TEMPLATE\ \ \ \ output\ filename\ template.\ Use\ %(title)s\ to\ get\ the \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ title,\ %(uploader)s\ for\ the\ uploader\ name, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(autonumber)s\ to\ get\ an\ automatically\ incremented \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ number,\ %(ext)s\ for\ the\ filename\ extension, @@ -58,6 +59,8 @@ redistribute it or use it however you like. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe, \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. +--restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin) -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files @@ -119,7 +122,7 @@ redistribute it or use it however you like. .IP .nf \f[C] ---extract-audio\ \ \ \ \ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires +-x,\ --extract-audio\ \ \ \ \ \ convert\ video\ files\ to\ audio-only\ files\ (requires \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ ffmpeg\ or\ avconv\ and\ ffprobe\ or\ avprobe) --audio-format\ FORMAT\ \ \ \ "best",\ "aac",\ "vorbis",\ "mp3",\ "m4a",\ or\ "wav"; \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ best\ by\ default @@ -130,6 +133,12 @@ redistribute it or use it however you like. \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ processing;\ the\ video\ is\ erased\ by\ default \f[] .fi +.SH CONFIGURATION +.PP +You can configure youtube-dl by placing default arguments (such as +\f[C]--extract-audio\ --no-mtime\f[] to always extract the audio and not +copy the mtime) into \f[C]/etc/youtube-dl.conf\f[] and/or +\f[C]~/.local/config/youtube-dl.conf\f[]. .SH FAQ .SS Can you please put the -b option back? .PP @@ -203,10 +212,10 @@ Please note that Python 2.5 is not supported anymore. .PP Since June 2012 (#342) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to \f[C]youtube-dl.zip\f[] first on -some systems) or clone the git repo to see the code. +some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the \f[C]__main__.py\f[] file. -To recompile the executable, run \f[C]make\ compile\f[]. +To recompile the executable, run \f[C]make\ youtube-dl\f[]. .SS The exe throws a \f[I]Runtime error from Visual C++\f[] .PP To run the exe you need to install first the Microsoft Visual C++ 2008 diff --git a/youtube-dl.bash-completion b/youtube-dl.bash-completion index 1eca2adf3..dee191cd4 100644 --- a/youtube-dl.bash-completion +++ b/youtube-dl.bash-completion @@ -3,7 +3,7 @@ __youtube-dl() local cur prev opts COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" - opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" + opts="--all-formats --audio-format --audio-quality --auto-number --batch-file --console-title --continue --cookies --dump-user-agent --extract-audio --format --get-description --get-filename --get-format --get-thumbnail --get-title --get-url --help --id --ignore-errors --keep-video --list-extractors --list-formats --literal --match-title --max-downloads --max-quality --netrc --no-continue --no-mtime --no-overwrites --no-part --no-progress --output --password --playlist-end --playlist-start --prefer-free-formats --quiet --rate-limit --reject-title --restrict-filenames --retries --simulate --skip-download --srt-lang --title --update --user-agent --username --verbose --version --write-description --write-info-json --write-srt" if [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) diff --git a/youtube-dl.exe b/youtube-dl.exe Binary files differindex 9341e800f..48ca04c29 100755 --- a/youtube-dl.exe +++ b/youtube-dl.exe diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ed5a79f13..4c79be432 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -44,37 +44,38 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + forcefilename: Force printing final filename. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + restrictfilenames: Do not allow "&" and spaces in file names + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + writesubtitles: Write the video subtitles to a .srt file + subtitleslang: Language of the subtitles to download """ params = None @@ -139,23 +140,23 @@ class FileDownloader(object): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: - return long(new_max) + return int(new_max) rate = bytes / elapsed_time if rate > new_max: - return long(new_max) + return int(new_max) if rate < new_min: - return long(new_min) - return long(rate) + return int(new_min) + return int(rate) @staticmethod def parse_bytes(bytestr): - """Parse a string indicating a byte quantity into a long integer.""" + """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return long(round(number * multiplier)) + return int(round(number * multiplier)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -180,7 +181,8 @@ class FileDownloader(object): def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(preferredencoding()) + assert type(message) == type(u'') + sys.stderr.write((message + u'\n').encode(preferredencoding())) def to_cons_title(self, message): """Set console/terminal window title to message.""" @@ -322,6 +324,7 @@ class FileDownloader(object): template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) template_dict['autonumber'] = unicode('%05d' % self._num_downloads) + template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility filename = self.params['outtmpl'] % template_dict return filename except (ValueError, KeyError), err: @@ -333,17 +336,21 @@ class FileDownloader(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + if matchtitle: + matchtitle = matchtitle.decode('utf8') + if not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + if rejecttitle: + rejecttitle = rejecttitle.decode('utf8') + if re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - info_dict['stitle'] = sanitize_filename(info_dict['title']) + info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames')) reason = self._match_entry(info_dict) if reason is not None: @@ -359,17 +366,17 @@ class FileDownloader(object): # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') + print(filename.encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceformat', False): - print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')) # Do nothing else if in simulate mode if self.params.get('simulate', False): diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 6e59ba8fd..35ba6cc5c 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -213,9 +213,9 @@ class YoutubeIE(InfoExtractor): return srt def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for x in formats: - print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')) + print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) def _real_initialize(self): if self._downloader is None: @@ -238,7 +238,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return # Set language @@ -247,7 +247,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return # No authentication to be performed @@ -270,7 +270,7 @@ class YoutubeIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return # Confirm age @@ -283,7 +283,7 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -305,7 +305,7 @@ class YoutubeIE(InfoExtractor): try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Attempt to extract SWF player URL @@ -327,7 +327,7 @@ class YoutubeIE(InfoExtractor): if 'token' in video_info: break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: if 'reason' in video_info: @@ -390,7 +390,7 @@ class YoutubeIE(InfoExtractor): try: srt_list = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) if not srt_lang_list: @@ -407,7 +407,7 @@ class YoutubeIE(InfoExtractor): try: srt_xml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) if not srt_xml: raise Trouble(u'WARNING: unable to download video subtitles') video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8')) @@ -526,7 +526,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) return # Confirm age @@ -539,7 +539,7 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -563,7 +563,7 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -603,7 +603,7 @@ class MetacafeIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage) + mobj = re.search(r'submitter=(.*?);', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -656,7 +656,7 @@ class DailymotionIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -692,9 +692,14 @@ class DailymotionIE(InfoExtractor): video_title = unescapeHTML(mobj.group('title').decode('utf-8')) video_uploader = u'NA' - mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage) + mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage) if mobj is None: - self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + # lookin for official user + mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage) + if mobj_official is None: + self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + else: + video_uploader = mobj_official.group(1) else: video_uploader = mobj.group(1) @@ -749,7 +754,7 @@ class GoogleIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -788,7 +793,7 @@ class GoogleIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage) if mobj is None: @@ -844,7 +849,7 @@ class PhotobucketIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -914,7 +919,7 @@ class YahooIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) @@ -938,7 +943,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract uploader and title from webpage @@ -996,7 +1001,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract media URL from playlist XML @@ -1025,7 +1030,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' IE_NAME = u'vimeo' def __init__(self, downloader=None): @@ -1054,7 +1059,7 @@ class VimeoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Now we begin extracting as much information as we can from what we @@ -1095,21 +1100,32 @@ class VimeoIE(InfoExtractor): timestamp = config['request']['timestamp'] # Vimeo specific: extract video codec and quality information + # First consider quality, then codecs, then take everything # TODO bind to format param codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] - for codec in codecs: - if codec[0] in config["video"]["files"]: - video_codec = codec[0] - video_extension = codec[1] - if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd' - else: quality = 'sd' + files = { 'hd': [], 'sd': [], 'other': []} + for codec_name, codec_extension in codecs: + if codec_name in config["video"]["files"]: + if 'hd' in config["video"]["files"][codec_name]: + files['hd'].append((codec_name, codec_extension, 'hd')) + elif 'sd' in config["video"]["files"][codec_name]: + files['sd'].append((codec_name, codec_extension, 'sd')) + else: + files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) + + for quality in ('hd', 'sd', 'other'): + if len(files[quality]) > 0: + video_quality = files[quality][0][2] + video_codec = files[quality][0][0] + video_extension = files[quality][0][1] + self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality)) break else: self._downloader.trouble(u'ERROR: no known codec found') return video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ - %(video_id, sig, timestamp, quality, video_codec.upper()) + %(video_id, sig, timestamp, video_quality, video_codec.upper()) return [{ 'id': video_id, @@ -1209,7 +1225,7 @@ class GenericIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return except ValueError, err: # since this is the last-resort InfoExtractor, if @@ -1330,7 +1346,7 @@ class YoutubeSearchIE(InfoExtractor): try: data = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) return api_response = json.loads(data)['data'] @@ -1407,7 +1423,7 @@ class GoogleSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1490,7 +1506,7 @@ class YahooSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1560,7 +1576,7 @@ class YoutubePlaylistIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1617,7 +1633,7 @@ class YoutubeChannelIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1680,7 +1696,7 @@ class YoutubeUserIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1752,7 +1768,7 @@ class BlipTVUserIE(InfoExtractor): mobj = re.search(r'data-users-id="([^"]+)"', page) page_base = page_base % mobj.group(1) except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return @@ -1840,7 +1856,7 @@ class DepositFilesIE(InfoExtractor): self.report_download_webpage(file_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err)) return # Search for the real file URL @@ -1957,7 +1973,7 @@ class FacebookIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return if useremail is None: @@ -1977,7 +1993,7 @@ class FacebookIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return def _real_extract(self, url): @@ -1994,7 +2010,7 @@ class FacebookIE(InfoExtractor): page = urllib2.urlopen(request) video_webpage = page.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Start extracting information @@ -2128,13 +2144,13 @@ class BlipTVIE(InfoExtractor): 'urlhandle': urlh } except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if info is None: # Regular URL try: json_code = urlh.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err)) return try: @@ -2202,7 +2218,7 @@ class MyVideoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -2464,7 +2480,7 @@ class CollegeHumorIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage) @@ -2483,7 +2499,7 @@ class CollegeHumorIE(InfoExtractor): try: metaXml = urllib2.urlopen(xmlUrl).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metaXml) @@ -2529,7 +2545,7 @@ class XVideosIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -2615,7 +2631,7 @@ class SoundcloudIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction('%s/%s' % (uploader, slug_title)) @@ -2642,7 +2658,7 @@ class SoundcloudIE(InfoExtractor): mobj = re.search('track-description-value"><p>(.*?)</p>', webpage) if mobj: description = mobj.group(1) - + # upload date upload_date = None mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage) @@ -2650,7 +2666,7 @@ class SoundcloudIE(InfoExtractor): try: upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') except Exception, e: - self._downloader.to_stderr(str(e)) + self._downloader.to_stderr(compat_str(e)) # for soundcloud, a request to a cross domain is required for cookies request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) @@ -2694,7 +2710,7 @@ class InfoQIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction(url) @@ -2780,15 +2796,15 @@ class MixcloudIE(InfoExtractor): return None def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for fmt in formats.keys(): for b in formats[fmt]: try: ext = formats[fmt][b][0] - print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]) + print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) except TypeError: # we have no bitrate info ext = formats[fmt][0] - print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]) + print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) break def _real_extract(self, url): @@ -2808,7 +2824,7 @@ class MixcloudIE(InfoExtractor): self.report_download_json(file_url) jsonData = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err)) return # parse JSON @@ -2992,7 +3008,7 @@ class MTVIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage) @@ -3025,7 +3041,7 @@ class MTVIE(InfoExtractor): try: metadataXml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metadataXml) @@ -3112,7 +3128,7 @@ class YoukuIE(InfoExtractor): self.report_download_webpage(video_id) jsondata = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -3288,7 +3304,7 @@ class GooglePlusIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err)) return # Extract update date @@ -3330,7 +3346,7 @@ class GooglePlusIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extract_vid_page(video_page) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index d14fe71a2..0501cc7f6 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -146,7 +146,7 @@ class FFmpegExtractAudioPP(PostProcessor): if int(self._preferredquality) < 10: more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: - more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] + more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] else: # We convert the audio (lossy) acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] @@ -156,7 +156,7 @@ class FFmpegExtractAudioPP(PostProcessor): if int(self._preferredquality) < 10: more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: - more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] + more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ad9a06c55..cbf1dd1a7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -21,7 +21,7 @@ __authors__ = ( ) __license__ = 'Public Domain' -__version__ = '2012.10.09' +__version__ = '2012.11.27' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION' @@ -48,7 +48,7 @@ from PostProcessor import * def updateSelf(downloader, filename): ''' Update the program file with the latest version from the repository ''' # Note: downloader only used for options - + if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) @@ -66,7 +66,7 @@ def updateSelf(downloader, filename): directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): sys.exit('ERROR: no write permissions on %s' % directory) - + try: urlh = urllib2.urlopen(UPDATE_URL_EXE) newcontent = urlh.read() @@ -75,20 +75,18 @@ def updateSelf(downloader, filename): outf.write(newcontent) except (IOError, OSError), err: sys.exit('ERROR: unable to download latest version') - + try: bat = os.path.join(directory, 'youtube-dl-updater.bat') b = open(bat, 'w') - - print >> b, """ + b.write(""" echo Updating youtube-dl... ping 127.0.0.1 -n 5 -w 1000 > NUL move /Y "%s.new" "%s" del "%s" - """ %(exe, exe, bat) - + \n""" %(exe, exe, bat)) b.close() - + os.startfile(bat) except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') @@ -265,13 +263,18 @@ def parseOpts(): filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('--id', + action='store_true', dest='useid', help='use video ID in file name', default=False) filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) + action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-A', '--auto-number', action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + filesystem.add_option('--restrict-filenames', + action='store_true', dest='restrictfilenames', + help='Avoid some characters such as "&" and spaces in filenames', default=False) filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', @@ -296,7 +299,7 @@ def parseOpts(): help='write video metadata to a .info.json file', default=False) - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') @@ -424,10 +427,10 @@ def _real_main(): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') + if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): + parser.error(u'using output template conflicts with using title, video ID or auto number') + if opts.usetitle and opts.useid: + parser.error(u'using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = getpass.getpass(u'Type account password and press return:') if opts.ratelimit is not None: @@ -478,15 +481,14 @@ def _real_main(): 'format_limit': opts.format_limit, 'listformats': opts.listformats, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s') + or (opts.useid and u'%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), + 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 839da17d0..1f60d34ae 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,6 +26,11 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + def preferredencoding(): """Get preferred encoding. @@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser): HTMLParser.HTMLParser.__init__(self) def error(self, message): - print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +194,28 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): - if char in u' .\\/|?*<>:"' or ord(char) < 32: + if char == '?' or ord(char) < 32 or ord(char) == 127: + return '' + elif char == '"': + return '' if restricted else '\'' + elif char == ':': + return '_-' if restricted else ' -' + elif char in '\\/|*<>': + return '-' + if restricted and (char in '&\'' or char.isspace()): return '_' return char - return u''.join(map(replace_insane, s)).strip('_') + + result = u''.join(map(replace_insane, s)) + while '--' in result: + result = result.replace('--', '-') + return result.strip('-') def orderedSet(iterable): """ Remove all duplicates from the input iterable """ |