diff options
| -rw-r--r-- | LATEST_VERSION | 2 | ||||
| -rw-r--r-- | Makefile | 20 | ||||
| -rw-r--r-- | README.md | 102 | ||||
| -rwxr-xr-x | youtube-dl | 1436 | 
4 files changed, 1188 insertions, 372 deletions
| diff --git a/LATEST_VERSION b/LATEST_VERSION index 295c9c4fa..afa0b7718 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2011.03.29 +2011.09.13 diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..e725dc720 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +default: update + +update: update-readme update-latest + +update-latest: +	./youtube-dl --version > LATEST_VERSION + +update-readme: +	@options=$$(COLUMNS=80 ./youtube-dl --help | sed -e '1,/.*General Options.*/ d' -e 's/^\W\{2\}\(\w\)/### \1/') && \ +		header=$$(sed -e '/.*## OPTIONS/,$$ d' README.md) && \ +		footer=$$(sed -e '1,/.*## FAQ/ d' README.md) && \ +		echo "$${header}" > README.md && \ +		echo -e '\n## OPTIONS' >> README.md && \ +		echo "$${options}" >> README.md&& \ +		echo -e '\n## FAQ' >> README.md && \ +		echo "$${footer}" >> README.md + + + +.PHONY: default update update-latest update-readme diff --git a/README.md b/README.md new file mode 100644 index 000000000..345a0b752 --- /dev/null +++ b/README.md @@ -0,0 +1,102 @@ +# youtube-dl + +## USAGE +youtube-dl [OPTIONS] URL + +## DESCRIPTION +**youtube-dl** is a small command-line program to download videos from +YouTube.com and a few more sites. It requires the Python interpreter, version +2.x (x being at least 5), and it is not platform specific. It should work in +your Unix box, in Windows or in Mac OS X. It is released to the public domain, +which means you can modify it, redistribute it or use it however you like. + +## OPTIONS +    -h, --help               print this help text and exit +    -v, --version            print program version and exit +    -U, --update             update this program to latest version +    -i, --ignore-errors      continue on download errors +    -r, --rate-limit LIMIT   download rate limit (e.g. 50k or 44.6m) +    -R, --retries RETRIES    number of retries (default is 10) +    --playlist-start NUMBER  playlist video to start at (default is 1) +    --playlist-end NUMBER    playlist video to end at (default is last) +    --dump-user-agent        display the current browser identification + +### Filesystem Options: +    -t, --title              use title in file name +    -l, --literal            use literal title in file name +    -A, --auto-number        number downloaded files starting from 00000 +    -o, --output TEMPLATE    output filename template +    -a, --batch-file FILE    file containing URLs to download ('-' for stdin) +    -w, --no-overwrites      do not overwrite files +    -c, --continue           resume partially downloaded files +    --cookies FILE           file to dump cookie jar to +    --no-part                do not use .part files +    --no-mtime               do not use the Last-modified header to set the file +                             modification time +    --write-description      write video description to a .description file +    --write-info-json        write video metadata to a .info.json file + +### Verbosity / Simulation Options: +    -q, --quiet              activates quiet mode +    -s, --simulate           do not download video +    -g, --get-url            simulate, quiet but print URL +    -e, --get-title          simulate, quiet but print title +    --get-thumbnail          simulate, quiet but print thumbnail URL +    --get-description        simulate, quiet but print video description +    --get-filename           simulate, quiet but print output filename +    --no-progress            do not print progress bar +    --console-title          display progress in console titlebar + +### Video Format Options: +    -f, --format FORMAT      video format code +    --all-formats            download all available video formats +    --max-quality FORMAT     highest quality format to download + +### Authentication Options: +    -u, --username USERNAME  account username +    -p, --password PASSWORD  account password +    -n, --netrc              use .netrc authentication data + +### Post-processing Options: +    --extract-audio          convert video files to audio-only files (requires +                             ffmpeg and ffprobe) +    --audio-format FORMAT    "best", "aac" or "mp3"; best by default + +## FAQ + +### Can you please put the -b option back? + +Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the -b option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you''re interested in. In that case, simply request it with the -f option and youtube-dl will try to download it. + +### I get HTTP error 402 when trying to download a video. What''s this? + +Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We''re [considering to provide a way to let you solve the CAPTCHA](https://github.com/phihag/youtube-dl/issues/8), but at the moment, your best course of action is pointing a webbrowser to the youtube URL, solving the CAPTCHA, and restart youtube-dl. + +### I have downloaded a video but how can I play it? + +Once the video is fully downloaded, use any video player, such as [vlc](http://www.videolan.org) or [mplayer](http://www.mplayerhq.hu/). + +### The links provided by youtube-dl -g are not working anymore + +The URLs youtube-dl outputs require the downloader to have the correct cookies. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. + +### ERROR: no fmt_url_map or conn information found in video info + +youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. + +## COPYRIGHT +**youtube-dl**: Copyright © 2006-2011 Ricardo Garcia Gonzalez. The program is +released into the public domain by the copyright holder. This README file was +originally written by Daniel Bolton (<https://github.com/dbbolton>) and is +likewise released into the public domain. + +## BUGS + +Bugs and suggestions should be reported at: <https://github.com/phihag/youtube-dl/issues> + +Please include: + +* Your exact command line, like `youtube-dl -t "http://www.youtube.com/watch?v=uHlDtZ6Oc3s&feature=channel_video_title"`. A common mistake is not to escape the `&`. Putting URLs in quotes should solve this problem. +* The output of `youtube-dl --version` +* The output of `python --version` +* The name and version of your Operating System ("Ubuntu 11.04 x64" or "Windows 7 x64" is usually enough). diff --git a/youtube-dl b/youtube-dl index 377ceff58..781dff758 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,17 +1,26 @@  #!/usr/bin/env python  # -*- coding: utf-8 -*- -# Author: Ricardo Garcia Gonzalez -# Author: Danny Colligan -# Author: Benjamin Johnson -# Author: Vasyl' Vavrychuk -# Author: Witold Baryluk -# Author: Paweł Paprota -# Author: Gergely Imreh -# License: Public domain code + +__author__  = ( +	'Ricardo Garcia Gonzalez', +	'Danny Colligan', +	'Benjamin Johnson', +	'Vasyl\' Vavrychuk', +	'Witold Baryluk', +	'Paweł Paprota', +	'Gergely Imreh', +	'Rogério Brito', +	'Philipp Hagemeister', +	'Sören Schulze', +	) + +__license__ = 'Public Domain' +__version__ = '2011.09.13' + +UPDATE_URL = 'https://raw.github.com/phihag/youtube-dl/master/youtube-dl' +  import cookielib -import ctypes  import datetime -import email.utils  import gzip  import htmlentitydefs  import httplib @@ -23,22 +32,44 @@ import os.path  import re  import socket  import string -import StringIO  import subprocess  import sys  import time  import urllib  import urllib2 +import warnings  import zlib +if os.name == 'nt': +	import ctypes + +try: +	import email.utils +except ImportError: # Python 2.4 +	import email.Utils +try: +	import cStringIO as StringIO +except ImportError: +	import StringIO +  # parse_qs was moved from the cgi module to the urlparse module recently.  try:  	from urlparse import parse_qs  except ImportError:  	from cgi import parse_qs +try: +	import lxml.etree +except ImportError: +	pass # Handled below + +try: +	import xml.etree.ElementTree +except ImportError: # Python<2.5 +	pass # Not officially supported, but let it slip +  std_headers = { -	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', +	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',  	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',  	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',  	'Accept-Encoding': 'gzip, deflate', @@ -47,6 +78,119 @@ std_headers = {  simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +try: +	import json +except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): +	import re +	class json(object): +		@staticmethod +		def loads(s): +			s = s.decode('UTF-8') +			def raiseError(msg, i): +				raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:])) +			def skipSpace(i, expectMore=True): +				while i < len(s) and s[i] in ' \t\r\n': +					i += 1 +				if expectMore: +					if i >= len(s): +						raiseError('Premature end', i) +				return i +			def decodeEscape(match): +				esc = match.group(1) +				_STATIC = { +					'"': '"', +					'\\': '\\', +					'/': '/', +					'b': unichr(0x8), +					'f': unichr(0xc), +					'n': '\n', +					'r': '\r', +					't': '\t', +				} +				if esc in _STATIC: +					return _STATIC[esc] +				if esc[0] == 'u': +					if len(esc) == 1+4: +						return unichr(int(esc[1:5], 16)) +					if len(esc) == 5+6 and esc[5:7] == '\\u': +						hi = int(esc[1:5], 16) +						low = int(esc[7:11], 16) +						return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000) +				raise ValueError('Unknown escape ' + str(esc)) +			def parseString(i): +				i += 1 +				e = i +				while True: +					e = s.index('"', e) +					bslashes = 0 +					while s[e-bslashes-1] == '\\': +						bslashes += 1 +					if bslashes % 2 == 1: +						e += 1 +						continue +					break +				rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') +				stri = rexp.sub(decodeEscape, s[i:e]) +				return (e+1,stri) +			def parseObj(i): +				i += 1 +				res = {} +				i = skipSpace(i) +				if s[i] == '}': # Empty dictionary +					return (i+1,res) +				while True: +					if s[i] != '"': +						raiseError('Expected a string object key', i) +					i,key = parseString(i) +					i = skipSpace(i) +					if i >= len(s) or s[i] != ':': +						raiseError('Expected a colon', i) +					i,val = parse(i+1) +					res[key] = val +					i = skipSpace(i) +					if s[i] == '}': +						return (i+1, res) +					if s[i] != ',': +						raiseError('Expected comma or closing curly brace', i) +					i = skipSpace(i+1) +			def parseArray(i): +				res = [] +				i = skipSpace(i+1) +				if s[i] == ']': # Empty array +					return (i+1,res) +				while True: +					i,val = parse(i) +					res.append(val) +					i = skipSpace(i) # Raise exception if premature end +					if s[i] == ']': +						return (i+1, res) +					if s[i] != ',': +						raiseError('Expected a comma or closing bracket', i) +					i = skipSpace(i+1) +			def parseDiscrete(i): +				for k,v in {'true': True, 'false': False, 'null': None}.items(): +					if s.startswith(k, i): +						return (i+len(k), v) +				raiseError('Not a boolean (or null)', i) +			def parseNumber(i): +				mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:]) +				if mobj is None: +					raiseError('Not a number', i) +				nums = mobj.group(1) +				if '.' in nums or 'e' in nums or 'E' in nums: +					return (i+len(nums), float(nums)) +				return (i+len(nums), int(nums)) +			CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete} +			def parse(i): +				i = skipSpace(i) +				i,res = CHARMAP.get(s[i], parseNumber)(i) +				i = skipSpace(i, False) +				return (i,res) +			i,res = parse(0) +			if i < len(s): +				raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')') +			return res +  def preferredencoding():  	"""Get preferred encoding. @@ -63,6 +207,7 @@ def preferredencoding():  			yield pref  	return yield_preferredencoding().next() +  def htmlentity_transform(matchobj):  	"""Transforms an HTML entity to a Unicode character. @@ -89,11 +234,13 @@ def htmlentity_transform(matchobj):  	# Unknown entity in name, return its literal representation  	return (u'&%s;' % entity) +  def sanitize_title(utitle):  	"""Sanitizes a video title so it could be used as part of a filename."""  	utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)  	return utitle.replace(unicode(os.sep), u'%') +  def sanitize_open(filename, open_mode):  	"""Try to open the given filename, and slightly tweak it if this fails. @@ -120,13 +267,15 @@ def sanitize_open(filename, open_mode):  		stream = open(filename, open_mode)  		return (stream, filename) +  def timeconvert(timestr): -    """Convert RFC 2822 defined time string into system timestamp""" -    timestamp = None -    timetuple = email.utils.parsedate_tz(timestr) -    if timetuple is not None: -        timestamp = email.utils.mktime_tz(timetuple) -    return timestamp +	"""Convert RFC 2822 defined time string into system timestamp""" +	timestamp = None +	timetuple = email.utils.parsedate_tz(timestr) +	if timetuple is not None: +		timestamp = email.utils.mktime_tz(timetuple) +	return timestamp +  class DownloadError(Exception):  	"""Download Error exception. @@ -137,6 +286,7 @@ class DownloadError(Exception):  	"""  	pass +  class SameFileError(Exception):  	"""Same File exception. @@ -145,6 +295,7 @@ class SameFileError(Exception):  	"""  	pass +  class PostProcessingError(Exception):  	"""Post Processing exception. @@ -153,6 +304,7 @@ class PostProcessingError(Exception):  	"""  	pass +  class UnavailableVideoError(Exception):  	"""Unavailable Format exception. @@ -161,6 +313,7 @@ class UnavailableVideoError(Exception):  	"""  	pass +  class ContentTooShortError(Exception):  	"""Content Too Short exception. @@ -176,6 +329,7 @@ class ContentTooShortError(Exception):  		self.downloaded = downloaded  		self.expected = expected +  class YoutubeDLHandler(urllib2.HTTPHandler):  	"""Handler for HTTP requests and responses. @@ -185,11 +339,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  	a particular request, the original request in the program code only has  	to include the HTTP header "Youtubedl-No-Compression", which will be  	removed before making the real request. -	 +  	Part of this code was copied from: -	  http://techknack.net/python-urllib2-handlers/ -	   +	http://techknack.net/python-urllib2-handlers/ +  	Andrew Rowls, the author of that code, agreed to release it to the  	public domain.  	""" @@ -200,7 +354,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  			return zlib.decompress(data, -zlib.MAX_WBITS)  		except zlib.error:  			return zlib.decompress(data) -	 +  	@staticmethod  	def addinfourl_wrapper(stream, headers, url, code):  		if hasattr(urllib2.addinfourl, 'getcode'): @@ -208,7 +362,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  		ret = urllib2.addinfourl(stream, headers, url)  		ret.code = code  		return ret -	 +  	def http_request(self, req):  		for h in std_headers:  			if h in req.headers: @@ -234,6 +388,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler):  			resp.msg = old_resp.msg  		return resp +  class FileDownloader(object):  	"""File Downloader class. @@ -286,6 +441,8 @@ class FileDownloader(object):  	consoletitle:     Display progress in console window's titlebar.  	nopart:           Do not use temporary .part files.  	updatetime:       Use the Last-modified header to set output file timestamps. +	writedescription: Write the video description to a .description file +	writeinfojson:    Write the video description to a .info.json file  	"""  	params = None @@ -305,16 +462,6 @@ class FileDownloader(object):  		self.params = params  	@staticmethod -	def pmkdir(filename): -		"""Create directory components in filename. Similar to Unix "mkdir -p".""" -		components = filename.split(os.sep) -		aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] -		aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator -		for dir in aggregate: -			if not os.path.exists(dir): -				os.mkdir(dir) - -	@staticmethod  	def format_bytes(bytes):  		if bytes is None:  			return 'N/A' @@ -325,7 +472,7 @@ class FileDownloader(object):  		else:  			exponent = long(math.log(bytes, 1024.0))  		suffix = 'bkMGTPEZY'[exponent] -		converted = float(bytes) / float(1024**exponent) +		converted = float(bytes) / float(1024 ** exponent)  		return '%.2f%s' % (converted, suffix)  	@staticmethod @@ -463,7 +610,7 @@ class FileDownloader(object):  			os.rename(old_filename, new_filename)  		except (IOError, OSError), err:  			self.trouble(u'ERROR: unable to rename file') -	 +  	def try_utime(self, filename, last_modified_hdr):  		"""Try to set the last-modified time of the given file."""  		if last_modified_hdr is None: @@ -477,10 +624,18 @@ class FileDownloader(object):  		if filetime is None:  			return  		try: -			os.utime(filename,(time.time(), filetime)) +			os.utime(filename, (time.time(), filetime))  		except:  			pass +	def report_writedescription(self, descfn): +		""" Report that the description file is being written """ +		self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True) + +	def report_writeinfojson(self, infofn): +		""" Report that the metadata file has been written """ +		self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True) +  	def report_destination(self, filename):  		"""Report destination filename."""  		self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True) @@ -562,11 +717,44 @@ class FileDownloader(object):  			return  		try: -			self.pmkdir(filename) +			dn = os.path.dirname(filename) +			if dn != '' and not os.path.exists(dn): +				os.makedirs(dn)  		except (OSError, IOError), err: -			self.trouble(u'ERROR: unable to create directories: %s' % str(err)) +			self.trouble(u'ERROR: unable to create directory ' + unicode(err))  			return +		if self.params.get('writedescription', False): +			try: +				descfn = filename + '.description' +				self.report_writedescription(descfn) +				descfile = open(descfn, 'wb') +				try: +					descfile.write(info_dict['description'].encode('utf-8')) +				finally: +					descfile.close() +			except (OSError, IOError): +				self.trouble(u'ERROR: Cannot write description file ' + descfn) +				return + +		if self.params.get('writeinfojson', False): +			infofn = filename + '.info.json' +			self.report_writeinfojson(infofn) +			try: +				json.dump +			except (NameError,AttributeError): +				self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') +				return +			try: +				infof = open(infofn, 'wb') +				try: +					json.dump(info_dict, infof) +				finally: +					infof.close() +			except (OSError, IOError): +				self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) +				return +  		try:  			success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))  		except (OSError, IOError), err: @@ -634,7 +822,7 @@ class FileDownloader(object):  		# Download using rtmpdump. rtmpdump returns exit code 2 when  		# the connection was interrumpted and resuming appears to be  		# possible. This is part of rtmpdump's normal usage, AFAIK. -		basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename] +		basic_args = ['rtmpdump'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]  		retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])  		while retval == 2 or retval == 1:  			prevsize = os.path.getsize(tmpfilename) @@ -680,7 +868,7 @@ class FileDownloader(object):  		# Request parameters in case of being able to resume  		if self.params.get('continuedl', False) and resume_len != 0:  			self.report_resuming_byte(resume_len) -			request.add_header('Range','bytes=%d-' % resume_len) +			request.add_header('Range', 'bytes=%d-' % resume_len)  			open_mode = 'ab'  		count = 0 @@ -706,7 +894,7 @@ class FileDownloader(object):  					else:  						# Examine the reported length  						if (content_length is not None and -						    (resume_len - 100 < long(content_length) < resume_len + 100)): +								(resume_len - 100 < long(content_length) < resume_len + 100)):  							# The file had already been fully downloaded.  							# Explanation to the above condition: in issue #175 it was revealed that  							# YouTube sometimes adds or removes a few bytes from the end of the file, @@ -751,6 +939,7 @@ class FileDownloader(object):  			if stream is None:  				try:  					(stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) +					assert stream is not None  					filename = self.undo_temp_name(tmpfilename)  					self.report_destination(filename)  				except (OSError, IOError), err: @@ -772,6 +961,9 @@ class FileDownloader(object):  			# Apply rate limit  			self.slow_down(start, byte_counter - resume_len) +		if stream is None: +			self.trouble(u'\nERROR: Did not get any data blocks') +			return False  		stream.close()  		self.report_finish()  		if data_len is not None and byte_counter != data_len: @@ -784,6 +976,7 @@ class FileDownloader(object):  		return True +  class InfoExtractor(object):  	"""Information Extractor class. @@ -855,16 +1048,17 @@ class InfoExtractor(object):  		"""Real extraction process. Redefine in subclasses."""  		pass +  class YoutubeIE(InfoExtractor):  	"""Information extractor for youtube.com.""" -	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' +	_VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'  	_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'  	_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'  	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'  	_NETRC_MACHINE = 'youtube'  	# Listed in order of quality -	_available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] +	_available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']  	_video_extensions = {  		'13': '3gp',  		'17': 'mp4', @@ -1009,7 +1203,7 @@ class YoutubeIE(InfoExtractor):  		self.report_video_info_webpage_download(video_id)  		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:  			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' -					   % (video_id, el_type)) +					% (video_id, el_type))  			request = urllib2.Request(video_info_url)  			try:  				video_info_webpage = urllib2.urlopen(request).read() @@ -1067,11 +1261,19 @@ class YoutubeIE(InfoExtractor):  					pass  		# description -		video_description = 'No description available.' -		if self._downloader.params.get('forcedescription', False): -			mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage) -			if mobj is not None: -				video_description = mobj.group(1) +		try: +			lxml.etree +		except NameError: +			video_description = u'No description available.' +			if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): +				mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage) +				if mobj is not None: +					video_description = mobj.group(1).decode('utf-8') +		else: +			html_parser = lxml.etree.HTMLParser(encoding='utf-8') +			vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) +			video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) +			# TODO use another parser  		# token  		video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1079,8 +1281,15 @@ class YoutubeIE(InfoExtractor):  		# Decide which formats to download  		req_format = self._downloader.params.get('format', None) -		if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: -			url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) +		if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): +			self.report_rtmp_download() +			video_url_list = [(None, video_info['conn'][0])] +		elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: +			url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') +			url_data = [parse_qs(uds) for uds in url_data_strs] +			url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) +			url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) +  			format_limit = self._downloader.params.get('format_limit', None)  			if format_limit is not None and format_limit in self._available_formats:  				format_list = self._available_formats[self._available_formats.index(format_limit):] @@ -1100,13 +1309,8 @@ class YoutubeIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: requested format not available')  					return  				video_url_list = [(req_format, url_map[req_format])] # Specific format - -		elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): -			self.report_rtmp_download() -			video_url_list = [(None, video_info['conn'][0])] -  		else: -			self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') +			self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')  			return  		for format_param, video_real_url in video_url_list: @@ -1116,7 +1320,6 @@ class YoutubeIE(InfoExtractor):  			# Extension  			video_extension = self._video_extensions.get(format_param, 'flv') -			# Find the video URL in fmt_url_map or conn paramters  			try:  				# Process video information  				self._downloader.process_info({ @@ -1129,7 +1332,7 @@ class YoutubeIE(InfoExtractor):  					'ext':		video_extension.decode('utf-8'),  					'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),  					'thumbnail':	video_thumbnail.decode('utf-8'), -					'description':	video_description.decode('utf-8'), +					'description':	video_description,  					'player_url':	player_url,  				})  			except UnavailableVideoError, err: @@ -1374,6 +1577,7 @@ class DailymotionIE(InfoExtractor):  		except UnavailableVideoError:  			self._downloader.trouble(u'\nERROR: unable to download video') +  class GoogleIE(InfoExtractor):  	"""Information extractor for video.google.com.""" @@ -1467,7 +1671,6 @@ class GoogleIE(InfoExtractor):  		else:	# we need something to pass to process_info  			video_thumbnail = '' -  		try:  			# Process video information  			self._downloader.process_info({ @@ -1667,7 +1870,8 @@ class YahooIE(InfoExtractor):  			self._downloader.trouble(u'ERROR: unable to extract video description')  			return  		video_description = mobj.group(1).decode('utf-8') -		if not video_description: video_description = 'No description available.' +		if not video_description: +			video_description = 'No description available.'  		# Extract video height and width  		mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage) @@ -1688,8 +1892,8 @@ class YahooIE(InfoExtractor):  		yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents  		yv_bitrate = '700'  # according to Wikipedia this is hard-coded  		request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + -				          '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + -					  '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') +				'&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + +				'&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')  		try:  			self.report_download_webpage(video_id)  			webpage = urllib2.urlopen(request).read() @@ -1718,13 +1922,128 @@ class YahooIE(InfoExtractor):  				'thumbnail':	video_thumbnail.decode('utf-8'),  				'description':	video_description,  				'thumbnail':	video_thumbnail, -				'description':	video_description,  				'player_url':	None,  			})  		except UnavailableVideoError:  			self._downloader.trouble(u'\nERROR: unable to download video') +class VimeoIE(InfoExtractor): +	"""Information extractor for vimeo.com.""" + +	# _VALID_URL matches Vimeo URLs +	_VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) + +	@staticmethod +	def suitable(url): +		return (re.match(VimeoIE._VALID_URL, url) is not None) + +	def report_download_webpage(self, video_id): +		"""Report webpage download.""" +		self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) + +	def report_extraction(self, video_id): +		"""Report information extraction.""" +		self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) + +	def _real_initialize(self): +		return + +	def _real_extract(self, url, new_video=True): +		# Extract ID from URL +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) +			return + +		# At this point we have a new video +		self._downloader.increment_downloads() +		video_id = mobj.group(1) + +		# Retrieve video webpage to extract further information +		request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) +		try: +			self.report_download_webpage(video_id) +			webpage = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) +			return + +		# Now we begin extracting as much information as we can from what we +		# retrieved. First we extract the information common to all extractors, +		# and latter we extract those that are Vimeo specific. +		self.report_extraction(video_id) + +		# Extract title +		mobj = re.search(r'<caption>(.*?)</caption>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract video title') +			return +		video_title = mobj.group(1).decode('utf-8') +		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + +		# Extract uploader +		mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract video uploader') +			return +		video_uploader = mobj.group(1).decode('utf-8') + +		# Extract video thumbnail +		mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract video thumbnail') +			return +		video_thumbnail = mobj.group(1).decode('utf-8') + +		# # Extract video description +		# mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage) +		# if mobj is None: +		# 	self._downloader.trouble(u'ERROR: unable to extract video description') +		# 	return +		# video_description = mobj.group(1).decode('utf-8') +		# if not video_description: video_description = 'No description available.' +		video_description = 'Foo.' + +		# Vimeo specific: extract request signature +		mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract request signature') +			return +		sig = mobj.group(1).decode('utf-8') + +		# Vimeo specific: Extract request signature expiration +		mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract request signature expiration') +			return +		sig_exp = mobj.group(1).decode('utf-8') + +		video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp) + +		try: +			# Process video information +			self._downloader.process_info({ +				'id':		video_id.decode('utf-8'), +				'url':		video_url, +				'uploader':	video_uploader, +				'upload_date':	u'NA', +				'title':	video_title, +				'stitle':	simple_title, +				'ext':		u'mp4', +				'thumbnail':	video_thumbnail.decode('utf-8'), +				'description':	video_description, +				'thumbnail':	video_thumbnail, +				'description':	video_description, +				'player_url':	None, +			}) +		except UnavailableVideoError: +			self._downloader.trouble(u'ERROR: unable to download video') + +  class GenericIE(InfoExtractor):  	"""Generic last-resort information extractor.""" @@ -1782,11 +2101,11 @@ class GenericIE(InfoExtractor):  			return  		video_url = urllib.unquote(mobj.group(1)) -		video_id  = os.path.basename(video_url) +		video_id = os.path.basename(video_url)  		# here's a fun little line of code for you:  		video_extension = os.path.splitext(video_id)[1][1:] -		video_id        = os.path.splitext(video_id)[0] +		video_id = os.path.splitext(video_id)[0]  		# it's tempting to parse this further, but you would  		# have to take into account all the variations like @@ -1859,7 +2178,7 @@ class YoutubeSearchIE(InfoExtractor):  		prefix, query = query.split(':')  		prefix = prefix[8:] -		query  = query.encode('utf-8') +		query = query.encode('utf-8')  		if prefix == '':  			self._download_n_results(query, 1)  			return @@ -1873,7 +2192,7 @@ class YoutubeSearchIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))  					return  				elif n > self._max_youtube_results: -					self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n)) +					self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))  					n = self._max_youtube_results  				self._download_n_results(query, n)  				return @@ -1917,6 +2236,7 @@ class YoutubeSearchIE(InfoExtractor):  			pagenum = pagenum + 1 +  class GoogleSearchIE(InfoExtractor):  	"""Information Extractor for Google Video search queries."""  	_VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' @@ -1950,7 +2270,7 @@ class GoogleSearchIE(InfoExtractor):  		prefix, query = query.split(':')  		prefix = prefix[8:] -		query  = query.encode('utf-8') +		query = query.encode('utf-8')  		if prefix == '':  			self._download_n_results(query, 1)  			return @@ -1964,7 +2284,7 @@ class GoogleSearchIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))  					return  				elif n > self._max_google_results: -					self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n)) +					self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))  					n = self._max_google_results  				self._download_n_results(query, n)  				return @@ -2008,6 +2328,7 @@ class GoogleSearchIE(InfoExtractor):  			pagenum = pagenum + 1 +  class YahooSearchIE(InfoExtractor):  	"""Information Extractor for Yahoo! Video search queries."""  	_VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' @@ -2041,7 +2362,7 @@ class YahooSearchIE(InfoExtractor):  		prefix, query = query.split(':')  		prefix = prefix[8:] -		query  = query.encode('utf-8') +		query = query.encode('utf-8')  		if prefix == '':  			self._download_n_results(query, 1)  			return @@ -2055,7 +2376,7 @@ class YahooSearchIE(InfoExtractor):  					self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))  					return  				elif n > self._max_yahoo_results: -					self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n)) +					self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))  					n = self._max_yahoo_results  				self._download_n_results(query, n)  				return @@ -2099,10 +2420,11 @@ class YahooSearchIE(InfoExtractor):  			pagenum = pagenum + 1 +  class YoutubePlaylistIE(InfoExtractor):  	"""Information Extractor for YouTube playlists.""" -	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' +	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'  	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'  	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'  	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' @@ -2175,6 +2497,7 @@ class YoutubePlaylistIE(InfoExtractor):  			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)  		return +  class YoutubeUserIE(InfoExtractor):  	"""Information Extractor for YouTube users.""" @@ -2196,7 +2519,7 @@ class YoutubeUserIE(InfoExtractor):  	def report_download_page(self, username, start_index):  		"""Report attempt to download user page."""  		self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % -				           (username, start_index, start_index + self._GDATA_PAGE_SIZE)) +				(username, start_index, start_index + self._GDATA_PAGE_SIZE))  	def _real_initialize(self):  		self._youtube_ie.initialize() @@ -2258,9 +2581,9 @@ class YoutubeUserIE(InfoExtractor):  			video_ids = video_ids[playliststart:]  		else:  			video_ids = video_ids[playliststart:playlistend] -			 +  		self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % -				           (username, all_ids_count, len(video_ids))) +				(username, all_ids_count, len(video_ids)))  		for video_id in video_ids:  			self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2345,6 +2668,7 @@ class DepositFilesIE(InfoExtractor):  		except UnavailableVideoError, err:  			self._downloader.trouble(u'ERROR: unable to download file') +  class FacebookIE(InfoExtractor):  	"""Information Extractor for Facebook""" @@ -2511,10 +2835,7 @@ class FacebookIE(InfoExtractor):  					pass  		# description -		video_description = 'No description available.' -		if (self._downloader.params.get('forcedescription', False) and -		    'description' in video_info): -			video_description = video_info['description'] +		video_description = video_info.get('description', 'No description available.')  		url_map = video_info['video_urls']  		if len(url_map.keys()) > 0: @@ -2549,7 +2870,6 @@ class FacebookIE(InfoExtractor):  			# Extension  			video_extension = self._video_extensions.get(format_param, 'mp4') -			# Find the video URL in fmt_url_map or conn paramters  			try:  				# Process video information  				self._downloader.process_info({ @@ -2568,6 +2888,297 @@ class FacebookIE(InfoExtractor):  			except UnavailableVideoError, err:  				self._downloader.trouble(u'\nERROR: unable to download video') +class BlipTVIE(InfoExtractor): +	"""Information extractor for blip.tv""" + +	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' +	_URL_EXT = r'^.*\.([a-z0-9]+)$' + +	@staticmethod +	def suitable(url): +		return (re.match(BlipTVIE._VALID_URL, url) is not None) + +	def report_extraction(self, file_id): +		"""Report information extraction.""" +		self._downloader.to_screen(u'[blip.tv] %s: Extracting information' % file_id) + +	def _simplify_title(self, title): +		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) +		res = res.strip(ur'_') +		return res + +	def _real_extract(self, url): +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: invalid URL: %s' % url) +			return + +		if '?' in url: +			cchar = '&' +		else: +			cchar = '?' +		json_url = url + cchar + 'skin=json&version=2&no_wrap=1' +		request = urllib2.Request(json_url) +		self.report_extraction(mobj.group(1)) +		try: +			json_code = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) +			return +		try: +			json_data = json.loads(json_code) +			if 'Post' in json_data: +				data = json_data['Post'] +			else: +				data = json_data + +			upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') +			video_url = data['media']['url'] +			umobj = re.match(self._URL_EXT, video_url) +			if umobj is None: +				raise ValueError('Can not determine filename extension') +			ext = umobj.group(1) + +			self._downloader.increment_downloads() + +			info = { +				'id': data['item_id'], +				'url': video_url, +				'uploader': data['display_name'], +				'upload_date': upload_date, +				'title': data['title'], +				'stitle': self._simplify_title(data['title']), +				'ext': ext, +				'format': data['media']['mimeType'], +				'thumbnail': data['thumbnailUrl'], +				'description': data['description'], +				'player_url': data['embedUrl'] +			} +		except (ValueError,KeyError), err: +			self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err)) +			return + +		try: +			self._downloader.process_info(info) +		except UnavailableVideoError, err: +			self._downloader.trouble(u'\nERROR: unable to download video') + + +class MyVideoIE(InfoExtractor): +	"""Information Extractor for myvideo.de.""" + +	_VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) +	 +	@staticmethod +	def suitable(url): +		return (re.match(MyVideoIE._VALID_URL, url) is not None) + +	def report_download_webpage(self, video_id): +		"""Report webpage download.""" +		self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) + +	def report_extraction(self, video_id): +		"""Report information extraction.""" +		self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) + +	def _real_initialize(self): +		return + +	def _real_extract(self,url): +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._download.trouble(u'ERROR: invalid URL: %s' % url) +			return + +		video_id = mobj.group(1) +		simple_title = mobj.group(2).decode('utf-8') +		# should actually not be necessary +		simple_title = sanitize_title(simple_title) +		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) + +		# Get video webpage +		request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) +		try: +			self.report_download_webpage(video_id) +			webpage = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) +			return + +		self.report_extraction(video_id) +		mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />', +				 webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract media URL') +			return +		video_url = mobj.group(1) + ('/%s.flv' % video_id) + +		mobj = re.search('<title>([^<]+)</title>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract title') +			return + +		video_title = mobj.group(1) +		video_title = sanitize_title(video_title) + +		try: +			print(video_url) +			self._downloader.process_info({ +				'id':		video_id, +				'url':		video_url, +				'uploader':	u'NA', +				'upload_date':  u'NA', +				'title':	video_title, +				'stitle':	simple_title, +				'ext':		u'flv', +				'format':	u'NA', +				'player_url':	None, +			}) +		except UnavailableVideoError: +			self._downloader.trouble(u'\nERROR: Unable to download video') + +class ComedyCentralIE(InfoExtractor): +	"""Information extractor for The Daily Show and Colbert Report """ + +	_VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$' + +	@staticmethod +	def suitable(url): +		return (re.match(ComedyCentralIE._VALID_URL, url) is not None) + +	def report_extraction(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) +	 +	def report_config_download(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + +	def report_player_url(self, episode_id): +		self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + +	def _simplify_title(self, title): +		res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) +		res = res.strip(ur'_') +		return res + +	def _real_extract(self, url): +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: invalid URL: %s' % url) +			return + +		if mobj.group('shortname'): +			if mobj.group('shortname') in ('tds', 'thedailyshow'): +				url = 'http://www.thedailyshow.com/full-episodes/' +			else: +				url = 'http://www.colbertnation.com/full-episodes/' +			mobj = re.match(self._VALID_URL, url) +			assert mobj is not None + +		dlNewest = not mobj.group('episode') +		if dlNewest: +			epTitle = mobj.group('showname') +		else: +			epTitle = mobj.group('episode') + +		req = urllib2.Request(url) +		self.report_extraction(epTitle) +		try: +			htmlHandle = urllib2.urlopen(req) +			html = htmlHandle.read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) +			return +		if dlNewest: +			url = htmlHandle.geturl() +			mobj = re.match(self._VALID_URL, url) +			if mobj is None: +				self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) +				return +			if mobj.group('episode') == '': +				self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) +				return +			epTitle = mobj.group('episode') + +		mMovieParams = re.findall('<param name="movie" value="(http://media.mtvnservices.com/(.*?:episode:([^:]*):)(.*?))"/>', html) +		if len(mMovieParams) == 0: +			self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) +			return +		show_id = mMovieParams[0][2] +		ACT_COUNT = { # TODO: Detect this dynamically +			'thedailyshow.com': 4, +			'colbertnation.com': 3, +		}.get(show_id, 4) +		OFFSET = { +			'thedailyshow.com': 1, +			'colbertnation.com': 1, +		}.get(show_id, 1) + +		first_player_url = mMovieParams[0][0] +		startMediaNum = int(mMovieParams[0][3]) + OFFSET +		movieId = mMovieParams[0][1] + +		playerReq = urllib2.Request(first_player_url) +		self.report_player_url(epTitle) +		try: +			playerResponse = urllib2.urlopen(playerReq) +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download player: %s' % unicode(err)) +			return +		player_url = playerResponse.geturl() + +		for actNum in range(ACT_COUNT): +			mediaNum = startMediaNum + actNum +			mediaId = movieId + str(mediaNum) +			configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + +						urllib.urlencode({'uri': mediaId})) +			configReq = urllib2.Request(configUrl) +			self.report_config_download(epTitle) +			try: +				configXml = urllib2.urlopen(configReq).read() +			except (urllib2.URLError, httplib.HTTPException, socket.error), err: +				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) +				return + +			cdoc = xml.etree.ElementTree.fromstring(configXml) +			turls = [] +			for rendition in cdoc.findall('.//rendition'): +				finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) +				turls.append(finfo) + +			if len(turls) == 0: +				self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum) + ': No videos found') +				continue + +			# For now, just pick the highest bitrate +			format,video_url = turls[-1] + +			self._downloader.increment_downloads() + +			effTitle = show_id.replace('.com', '') + '-' + epTitle +			info = { +				'id': str(mediaNum), +				'url': video_url, +				'uploader': show_id, +				'upload_date': 'NA', +				'title': effTitle, +				'stitle': self._simplify_title(effTitle), +				'ext': 'mp4', +				'format': format, +				'thumbnail': None, +				'description': 'TODO: Not yet supported', +				'player_url': player_url +			} + +			try: +				self._downloader.process_info(info) +			except UnavailableVideoError, err: +				self._downloader.trouble(u'\nERROR: unable to download ' + str(mediaNum)) +				continue + +  class PostProcessor(object):  	"""Post Processor class. @@ -2614,6 +3225,7 @@ class PostProcessor(object):  		"""  		return information # by default, do nothing +  class FFmpegExtractAudioPP(PostProcessor):  	def __init__(self, downloader=None, preferredcodec=None): @@ -2696,311 +3308,393 @@ class FFmpegExtractAudioPP(PostProcessor):  		information['filepath'] = new_path  		return information -### MAIN PROGRAM ### -if __name__ == '__main__': + +def updateSelf(downloader, filename): +	''' Update the program file with the latest version from the repository ''' +	# Note: downloader only used for options +	if not os.access(filename, os.W_OK): +		sys.exit('ERROR: no write permissions on %s' % filename) + +	downloader.to_screen('Updating to latest version...') +  	try: -		# Modules needed only when running the main program -		import getpass -		import optparse +		try: +			urlh = urllib.urlopen(UPDATE_URL) +			newcontent = urlh.read() +		finally: +			urlh.close() +	except (IOError, OSError), err: +		sys.exit('ERROR: unable to download latest version') -		# Function to update the program file with the latest version from the repository. -		def update_self(downloader, filename): -			# Note: downloader only used for options -			if not os.access(filename, os.W_OK): -				sys.exit('ERROR: no write permissions on %s' % filename) +	try: +		outf = open(filename, 'wb') +		try: +			outf.write(newcontent) +		finally: +			outf.close() +	except (IOError, OSError), err: +		sys.exit('ERROR: unable to overwrite current version') -			downloader.to_screen('Updating to latest stable version...') -			try: -				latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' -				latest_version = urllib.urlopen(latest_url).read().strip() -				prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version -				newcontent = urllib.urlopen(prog_url).read() -			except (IOError, OSError), err: -				sys.exit('ERROR: unable to download latest version') -			try: -				stream = open(filename, 'w') -				stream.write(newcontent) -				stream.close() -			except (IOError, OSError), err: -				sys.exit('ERROR: unable to overwrite current version') -			downloader.to_screen('Updated to version %s' % latest_version) - -		# Parse command line -		parser = optparse.OptionParser( -			usage='Usage: %prog [options] url...', -			version='2011.03.29', -			conflict_handler='resolve', -		) - -		parser.add_option('-h', '--help', -				action='help', help='print this help text and exit') -		parser.add_option('-v', '--version', -				action='version', help='print program version and exit') -		parser.add_option('-U', '--update', -				action='store_true', dest='update_self', help='update this program to latest stable version') -		parser.add_option('-i', '--ignore-errors', -				action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) -		parser.add_option('-r', '--rate-limit', -				dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') -		parser.add_option('-R', '--retries', -				dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) -		parser.add_option('--playlist-start', -				dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) -		parser.add_option('--playlist-end', -				dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) -		parser.add_option('--dump-user-agent', -				action='store_true', dest='dump_user_agent', -				help='display the current browser identification', default=False) - -		authentication = optparse.OptionGroup(parser, 'Authentication Options') -		authentication.add_option('-u', '--username', -				dest='username', metavar='USERNAME', help='account username') -		authentication.add_option('-p', '--password', -				dest='password', metavar='PASSWORD', help='account password') -		authentication.add_option('-n', '--netrc', -				action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) -		parser.add_option_group(authentication) - -		video_format = optparse.OptionGroup(parser, 'Video Format Options') -		video_format.add_option('-f', '--format', -				action='store', dest='format', metavar='FORMAT', help='video format code') -		video_format.add_option('--all-formats', -				action='store_const', dest='format', help='download all available video formats', const='-1') -		video_format.add_option('--max-quality', -				action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') -		parser.add_option_group(video_format) - -		verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') -		verbosity.add_option('-q', '--quiet', -				action='store_true', dest='quiet', help='activates quiet mode', default=False) -		verbosity.add_option('-s', '--simulate', -				action='store_true', dest='simulate', help='do not download video', default=False) -		verbosity.add_option('-g', '--get-url', -				action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) -		verbosity.add_option('-e', '--get-title', -				action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) -		verbosity.add_option('--get-thumbnail', -				action='store_true', dest='getthumbnail', -				help='simulate, quiet but print thumbnail URL', default=False) -		verbosity.add_option('--get-description', -				action='store_true', dest='getdescription', -				help='simulate, quiet but print video description', default=False) -		verbosity.add_option('--get-filename', -				action='store_true', dest='getfilename', -				help='simulate, quiet but print output filename', default=False) -		verbosity.add_option('--no-progress', -				action='store_true', dest='noprogress', help='do not print progress bar', default=False) -		verbosity.add_option('--console-title', -				action='store_true', dest='consoletitle', -				help='display progress in console titlebar', default=False) -		parser.add_option_group(verbosity) - -		filesystem = optparse.OptionGroup(parser, 'Filesystem Options') -		filesystem.add_option('-t', '--title', -				action='store_true', dest='usetitle', help='use title in file name', default=False) -		filesystem.add_option('-l', '--literal', -				action='store_true', dest='useliteral', help='use literal title in file name', default=False) -		filesystem.add_option('-A', '--auto-number', -				action='store_true', dest='autonumber', -				help='number downloaded files starting from 00000', default=False) -		filesystem.add_option('-o', '--output', -				dest='outtmpl', metavar='TEMPLATE', help='output filename template') -		filesystem.add_option('-a', '--batch-file', -				dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') -		filesystem.add_option('-w', '--no-overwrites', -				action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) -		filesystem.add_option('-c', '--continue', -				action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) -		filesystem.add_option('--cookies', -				dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') -		filesystem.add_option('--no-part', -				action='store_true', dest='nopart', help='do not use .part files', default=False) -		filesystem.add_option('--no-mtime', -				action='store_false', dest='updatetime', -				help='do not use the Last-modified header to set the file modification time', default=True) -		parser.add_option_group(filesystem) - -		postproc = optparse.OptionGroup(parser, 'Post-processing Options') -		postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, -				help='convert video files to audio-only files (requires ffmpeg and ffprobe)') -		postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', -				help='"best", "aac" or "mp3"; best by default') -		parser.add_option_group(postproc) - -		(opts, args) = parser.parse_args() - -		# Open appropriate CookieJar -		if opts.cookiefile is None: -			jar = cookielib.CookieJar() -		else: -			try: -				jar = cookielib.MozillaCookieJar(opts.cookiefile) -				if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): -					jar.load() -			except (IOError, OSError), err: -				sys.exit(u'ERROR: unable to open cookie file') +	downloader.to_screen('Updated youtube-dl. Restart to use the new version.') -		# Dump user agent -		if opts.dump_user_agent: -			print std_headers['User-Agent'] -			sys.exit(0) +def parseOpts(): +	# Deferred imports +	import getpass +	import optparse -		# General configuration -		cookie_processor = urllib2.HTTPCookieProcessor(jar) -		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) -		socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) +	def _format_option_string(option): +		''' ('-o', '--option') -> -o, --format METAVAR''' + +		opts = [] + +		if option._short_opts: opts.append(option._short_opts[0]) +		if option._long_opts: opts.append(option._long_opts[0]) +		if len(opts) > 1: opts.insert(1, ', ') + +		if option.takes_value(): opts.append(' %s' % option.metavar) + +		return "".join(opts) + +	def _find_term_columns(): +		columns = os.environ.get('COLUMNS', None) +		if columns: +			return int(columns) -		# Batch file verification -		batchurls = [] -		if opts.batchfile is not None: -			try: -				if opts.batchfile == '-': -					batchfd = sys.stdin -				else: -					batchfd = open(opts.batchfile, 'r') -				batchurls = batchfd.readlines() -				batchurls = [x.strip() for x in batchurls] -				batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] -			except IOError: -				sys.exit(u'ERROR: batch file could not be read') -		all_urls = batchurls + args - -		# Conflicting, missing and erroneous options -		if opts.usenetrc and (opts.username is not None or opts.password is not None): -			parser.error(u'using .netrc conflicts with giving username/password') -		if opts.password is not None and opts.username is None: -			parser.error(u'account username missing') -		if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): -			parser.error(u'using output template conflicts with using title, literal title or auto number') -		if opts.usetitle and opts.useliteral: -			parser.error(u'using title conflicts with using literal title') -		if opts.username is not None and opts.password is None: -			opts.password = getpass.getpass(u'Type account password and press return:') -		if opts.ratelimit is not None: -			numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) -			if numeric_limit is None: -				parser.error(u'invalid rate limit specified') -			opts.ratelimit = numeric_limit -		if opts.retries is not None: -			try: -				opts.retries = long(opts.retries) -			except (TypeError, ValueError), err: -				parser.error(u'invalid retry count specified')  		try: -			opts.playliststart = long(opts.playliststart) -			if opts.playliststart <= 0: -				raise ValueError -		except (TypeError, ValueError), err: -			parser.error(u'invalid playlist start number specified') +			sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) +			out,err = sp.communicate() +			return int(out.split()[1]) +		except: +			pass +		return None + +	max_width = 80 +	max_help_position = 80 + +	# No need to wrap help messages if we're on a wide console +	columns = _find_term_columns() +	if columns: max_width = columns + +	fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) +	fmt.format_option_strings = _format_option_string + +	kw = { +		'version'   : __version__, +		'formatter' : fmt, +		'usage' : '%prog [options] url...', +		'conflict_handler' : 'resolve', +	} + +	parser = optparse.OptionParser(**kw) + +	# option groups +	general        = optparse.OptionGroup(parser, 'General Options') +	authentication = optparse.OptionGroup(parser, 'Authentication Options') +	video_format   = optparse.OptionGroup(parser, 'Video Format Options') +	postproc       = optparse.OptionGroup(parser, 'Post-processing Options') +	filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') +	verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + +	general.add_option('-h', '--help', +			action='help', help='print this help text and exit') +	general.add_option('-v', '--version', +			action='version', help='print program version and exit') +	general.add_option('-U', '--update', +			action='store_true', dest='update_self', help='update this program to latest version') +	general.add_option('-i', '--ignore-errors', +			action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) +	general.add_option('-r', '--rate-limit', +			dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') +	general.add_option('-R', '--retries', +			dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) +	general.add_option('--playlist-start', +			dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) +	general.add_option('--playlist-end', +			dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) +	general.add_option('--dump-user-agent', +			action='store_true', dest='dump_user_agent', +			help='display the current browser identification', default=False) + +	authentication.add_option('-u', '--username', +			dest='username', metavar='USERNAME', help='account username') +	authentication.add_option('-p', '--password', +			dest='password', metavar='PASSWORD', help='account password') +	authentication.add_option('-n', '--netrc', +			action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + + +	video_format.add_option('-f', '--format', +			action='store', dest='format', metavar='FORMAT', help='video format code') +	video_format.add_option('--all-formats', +			action='store_const', dest='format', help='download all available video formats', const='-1') +	video_format.add_option('--max-quality', +			action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + + +	verbosity.add_option('-q', '--quiet', +			action='store_true', dest='quiet', help='activates quiet mode', default=False) +	verbosity.add_option('-s', '--simulate', +			action='store_true', dest='simulate', help='do not download video', default=False) +	verbosity.add_option('-g', '--get-url', +			action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) +	verbosity.add_option('-e', '--get-title', +			action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) +	verbosity.add_option('--get-thumbnail', +			action='store_true', dest='getthumbnail', +			help='simulate, quiet but print thumbnail URL', default=False) +	verbosity.add_option('--get-description', +			action='store_true', dest='getdescription', +			help='simulate, quiet but print video description', default=False) +	verbosity.add_option('--get-filename', +			action='store_true', dest='getfilename', +			help='simulate, quiet but print output filename', default=False) +	verbosity.add_option('--no-progress', +			action='store_true', dest='noprogress', help='do not print progress bar', default=False) +	verbosity.add_option('--console-title', +			action='store_true', dest='consoletitle', +			help='display progress in console titlebar', default=False) + + +	filesystem.add_option('-t', '--title', +			action='store_true', dest='usetitle', help='use title in file name', default=False) +	filesystem.add_option('-l', '--literal', +			action='store_true', dest='useliteral', help='use literal title in file name', default=False) +	filesystem.add_option('-A', '--auto-number', +			action='store_true', dest='autonumber', +			help='number downloaded files starting from 00000', default=False) +	filesystem.add_option('-o', '--output', +			dest='outtmpl', metavar='TEMPLATE', help='output filename template') +	filesystem.add_option('-a', '--batch-file', +			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') +	filesystem.add_option('-w', '--no-overwrites', +			action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) +	filesystem.add_option('-c', '--continue', +			action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) +	filesystem.add_option('--cookies', +			dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') +	filesystem.add_option('--no-part', +			action='store_true', dest='nopart', help='do not use .part files', default=False) +	filesystem.add_option('--no-mtime', +			action='store_false', dest='updatetime', +			help='do not use the Last-modified header to set the file modification time', default=True) +	filesystem.add_option('--write-description', +			action='store_true', dest='writedescription', +			help='write video description to a .description file', default=False) +	filesystem.add_option('--write-info-json', +			action='store_true', dest='writeinfojson', +			help='write video metadata to a .info.json file', default=False) + + +	postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, +			help='convert video files to audio-only files (requires ffmpeg and ffprobe)') +	postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', +			help='"best", "aac" or "mp3"; best by default') + + +	parser.add_option_group(general) +	parser.add_option_group(filesystem) +	parser.add_option_group(verbosity) +	parser.add_option_group(video_format) +	parser.add_option_group(authentication) +	parser.add_option_group(postproc) + +	opts, args = parser.parse_args() + +	return parser, opts, args + +def main(): +	parser, opts, args = parseOpts() + +	# Open appropriate CookieJar +	if opts.cookiefile is None: +		jar = cookielib.CookieJar() +	else:  		try: -			opts.playlistend = long(opts.playlistend) -			if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): -				raise ValueError -		except (TypeError, ValueError), err: -			parser.error(u'invalid playlist end number specified') -		if opts.extractaudio: -			if opts.audioformat not in ['best', 'aac', 'mp3']: -				parser.error(u'invalid audio format specified') - -		# Information extractors -		youtube_ie = YoutubeIE() -		metacafe_ie = MetacafeIE(youtube_ie) -		dailymotion_ie = DailymotionIE() -		youtube_pl_ie = YoutubePlaylistIE(youtube_ie) -		youtube_user_ie = YoutubeUserIE(youtube_ie) -		youtube_search_ie = YoutubeSearchIE(youtube_ie) -		google_ie = GoogleIE() -		google_search_ie = GoogleSearchIE(google_ie) -		photobucket_ie = PhotobucketIE() -		yahoo_ie = YahooIE() -		yahoo_search_ie = YahooSearchIE(yahoo_ie) -		deposit_files_ie = DepositFilesIE() -		facebook_ie = FacebookIE() -		generic_ie = GenericIE() - -		# File downloader -		fd = FileDownloader({ -			'usenetrc': opts.usenetrc, -			'username': opts.username, -			'password': opts.password, -			'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), -			'forceurl': opts.geturl, -			'forcetitle': opts.gettitle, -			'forcethumbnail': opts.getthumbnail, -			'forcedescription': opts.getdescription, -			'forcefilename': opts.getfilename, -			'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), -			'format': opts.format, -			'format_limit': opts.format_limit, -			'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) -				or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') -				or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') -				or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') -				or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') -				or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') -				or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') -				or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') -				or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') -				or u'%(id)s.%(ext)s'), -			'ignoreerrors': opts.ignoreerrors, -			'ratelimit': opts.ratelimit, -			'nooverwrites': opts.nooverwrites, -			'retries': opts.retries, -			'continuedl': opts.continue_dl, -			'noprogress': opts.noprogress, -			'playliststart': opts.playliststart, -			'playlistend': opts.playlistend, -			'logtostderr': opts.outtmpl == '-', -			'consoletitle': opts.consoletitle, -			'nopart': opts.nopart, -			'updatetime': opts.updatetime, -			}) -		fd.add_info_extractor(youtube_search_ie) -		fd.add_info_extractor(youtube_pl_ie) -		fd.add_info_extractor(youtube_user_ie) -		fd.add_info_extractor(metacafe_ie) -		fd.add_info_extractor(dailymotion_ie) -		fd.add_info_extractor(youtube_ie) -		fd.add_info_extractor(google_ie) -		fd.add_info_extractor(google_search_ie) -		fd.add_info_extractor(photobucket_ie) -		fd.add_info_extractor(yahoo_ie) -		fd.add_info_extractor(yahoo_search_ie) -		fd.add_info_extractor(deposit_files_ie) -		fd.add_info_extractor(facebook_ie) - -		# This must come last since it's the -		# fallback if none of the others work -		fd.add_info_extractor(generic_ie) - -		# PostProcessors -		if opts.extractaudio: -			fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) - -		# Update version -		if opts.update_self: -			update_self(fd, sys.argv[0]) - -		# Maybe do nothing -		if len(all_urls) < 1: -			if not opts.update_self: -				parser.error(u'you must provide at least one URL') +			jar = cookielib.MozillaCookieJar(opts.cookiefile) +			if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): +				jar.load() +		except (IOError, OSError), err: +			sys.exit(u'ERROR: unable to open cookie file') + +	# Dump user agent +	if opts.dump_user_agent: +		print std_headers['User-Agent'] +		sys.exit(0) + +	# General configuration +	cookie_processor = urllib2.HTTPCookieProcessor(jar) +	opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) +	urllib2.install_opener(opener) +	socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + +	# Batch file verification +	batchurls = [] +	if opts.batchfile is not None: +		try: +			if opts.batchfile == '-': +				batchfd = sys.stdin  			else: -				sys.exit() -		retcode = fd.download(all_urls) +				batchfd = open(opts.batchfile, 'r') +			batchurls = batchfd.readlines() +			batchurls = [x.strip() for x in batchurls] +			batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] +		except IOError: +			sys.exit(u'ERROR: batch file could not be read') +	all_urls = batchurls + args + +	# Conflicting, missing and erroneous options +	if opts.usenetrc and (opts.username is not None or opts.password is not None): +		parser.error(u'using .netrc conflicts with giving username/password') +	if opts.password is not None and opts.username is None: +		parser.error(u'account username missing') +	if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): +		parser.error(u'using output template conflicts with using title, literal title or auto number') +	if opts.usetitle and opts.useliteral: +		parser.error(u'using title conflicts with using literal title') +	if opts.username is not None and opts.password is None: +		opts.password = getpass.getpass(u'Type account password and press return:') +	if opts.ratelimit is not None: +		numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) +		if numeric_limit is None: +			parser.error(u'invalid rate limit specified') +		opts.ratelimit = numeric_limit +	if opts.retries is not None: +		try: +			opts.retries = long(opts.retries) +		except (TypeError, ValueError), err: +			parser.error(u'invalid retry count specified') +	try: +		opts.playliststart = int(opts.playliststart) +		if opts.playliststart <= 0: +			raise ValueError(u'Playlist start must be positive') +	except (TypeError, ValueError), err: +		parser.error(u'invalid playlist start number specified') +	try: +		opts.playlistend = int(opts.playlistend) +		if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): +			raise ValueError(u'Playlist end must be greater than playlist start') +	except (TypeError, ValueError), err: +		parser.error(u'invalid playlist end number specified') +	if opts.extractaudio: +		if opts.audioformat not in ['best', 'aac', 'mp3']: +			parser.error(u'invalid audio format specified') + +	# Information extractors +	youtube_ie = YoutubeIE() +	metacafe_ie = MetacafeIE(youtube_ie) +	dailymotion_ie = DailymotionIE() +	youtube_pl_ie = YoutubePlaylistIE(youtube_ie) +	youtube_user_ie = YoutubeUserIE(youtube_ie) +	youtube_search_ie = YoutubeSearchIE(youtube_ie) +	google_ie = GoogleIE() +	google_search_ie = GoogleSearchIE(google_ie) +	photobucket_ie = PhotobucketIE() +	yahoo_ie = YahooIE() +	yahoo_search_ie = YahooSearchIE(yahoo_ie) +	deposit_files_ie = DepositFilesIE() +	facebook_ie = FacebookIE() +	bliptv_ie = BlipTVIE() +	vimeo_ie = VimeoIE() +	myvideo_ie = MyVideoIE() +	comedycentral_ie = ComedyCentralIE() + +	generic_ie = GenericIE() + +	# File downloader +	fd = FileDownloader({ +		'usenetrc': opts.usenetrc, +		'username': opts.username, +		'password': opts.password, +		'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), +		'forceurl': opts.geturl, +		'forcetitle': opts.gettitle, +		'forcethumbnail': opts.getthumbnail, +		'forcedescription': opts.getdescription, +		'forcefilename': opts.getfilename, +		'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), +		'format': opts.format, +		'format_limit': opts.format_limit, +		'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) +			or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') +			or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') +			or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') +			or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') +			or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') +			or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') +			or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') +			or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') +			or u'%(id)s.%(ext)s'), +		'ignoreerrors': opts.ignoreerrors, +		'ratelimit': opts.ratelimit, +		'nooverwrites': opts.nooverwrites, +		'retries': opts.retries, +		'continuedl': opts.continue_dl, +		'noprogress': opts.noprogress, +		'playliststart': opts.playliststart, +		'playlistend': opts.playlistend, +		'logtostderr': opts.outtmpl == '-', +		'consoletitle': opts.consoletitle, +		'nopart': opts.nopart, +		'updatetime': opts.updatetime, +		'writedescription': opts.writedescription, +		'writeinfojson': opts.writeinfojson, +		}) +	fd.add_info_extractor(youtube_search_ie) +	fd.add_info_extractor(youtube_pl_ie) +	fd.add_info_extractor(youtube_user_ie) +	fd.add_info_extractor(metacafe_ie) +	fd.add_info_extractor(dailymotion_ie) +	fd.add_info_extractor(youtube_ie) +	fd.add_info_extractor(google_ie) +	fd.add_info_extractor(google_search_ie) +	fd.add_info_extractor(photobucket_ie) +	fd.add_info_extractor(yahoo_ie) +	fd.add_info_extractor(yahoo_search_ie) +	fd.add_info_extractor(deposit_files_ie) +	fd.add_info_extractor(facebook_ie) +	fd.add_info_extractor(bliptv_ie) +	fd.add_info_extractor(vimeo_ie) +	fd.add_info_extractor(myvideo_ie) +	fd.add_info_extractor(comedycentral_ie) + +	# This must come last since it's the +	# fallback if none of the others work +	fd.add_info_extractor(generic_ie) + +	# PostProcessors +	if opts.extractaudio: +		fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + +	# Update version +	if opts.update_self: +		updateSelf(fd, sys.argv[0]) + +	# Maybe do nothing +	if len(all_urls) < 1: +		if not opts.update_self: +			parser.error(u'you must provide at least one URL') +		else: +			sys.exit() +	retcode = fd.download(all_urls) -		# Dump cookie jar if requested -		if opts.cookiefile is not None: -			try: -				jar.save() -			except (IOError, OSError), err: -				sys.exit(u'ERROR: unable to save cookie jar') +	# Dump cookie jar if requested +	if opts.cookiefile is not None: +		try: +			jar.save() +		except (IOError, OSError), err: +			sys.exit(u'ERROR: unable to save cookie jar') + +	sys.exit(retcode) -		sys.exit(retcode) +if __name__ == '__main__': +	try: +		main()  	except DownloadError:  		sys.exit(1)  	except SameFileError:  		sys.exit(u'ERROR: fixed output name but more than one file to download')  	except KeyboardInterrupt:  		sys.exit(u'\nERROR: Interrupted by user') + +# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: | 
