diff options
| author | Ricardo Garcia <sarbalap+freshmeat@gmail.com> | 2011-01-12 20:20:37 +0100 | 
|---|---|---|
| committer | Ricardo Garcia <sarbalap+freshmeat@gmail.com> | 2011-01-12 20:20:37 +0100 | 
| commit | 1987c2325a9764e24104fa8da23b467a5e33cf49 (patch) | |
| tree | 68c707e3e2c6c8defff4ed40f450476c5a771d5a | |
| parent | aac3fe0f4adaad809d25e335c17711c23d51eec0 (diff) | |
Add proper support for "gzip" and "deflate" encodings
| -rwxr-xr-x | youtube-dl | 89 | 
1 files changed, 72 insertions, 17 deletions
| diff --git a/youtube-dl b/youtube-dl index a09b498ba..13d42765b 100755 --- a/youtube-dl +++ b/youtube-dl @@ -8,6 +8,7 @@  import cookielib  import ctypes  import datetime +import gzip  import htmlentitydefs  import httplib  import locale @@ -18,11 +19,13 @@ import os.path  import re  import socket  import string +import StringIO  import subprocess  import sys  import time  import urllib  import urllib2 +import zlib  # parse_qs was moved from the cgi module to the urlparse module recently.  try: @@ -161,6 +164,56 @@ class ContentTooShortError(Exception):  		self.downloaded = downloaded  		self.expected = expected +class YoutubeDLHandler(urllib2.HTTPHandler): +	"""Handler for HTTP requests and responses. + +	This class, when installed with an OpenerDirector, automatically adds +	the standard headers to every HTTP request and handles gzipped and +	deflated responses from web servers. If compression is to be avoided in +	a particular request, the original request in the program code only has +	to include the HTTP header "Youtubedl-No-Compression", which will be +	removed before making the real request. +	 +	Part of this code was copied from: + +	  http://techknack.net/python-urllib2-handlers/ +	   +	Andrew Rowls, the author of that code, agreed to release it to the +	public domain. +	""" + +	@staticmethod +	def deflate(data): +		try: +			return zlib.decompress(data, -zlib.MAX_WBITS) +		except zlib.error: +			return zlib.decompress(data) +	 +	def http_request(self, req): +		for h in std_headers: +			if h in req.headers: +				del req.headers[h] +			req.add_header(h, std_headers[h]) +		if 'Youtubedl-no-compression' in req.headers: +			if 'Accept-encoding' in req.headers: +				del req.headers['Accept-encoding'] +			del req.headers['Youtubedl-no-compression'] +		return req + +	def http_response(self, req, resp): +		old_resp = resp +		# gzip +		if resp.headers.get('Content-encoding', '') == 'gzip': +			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') +			resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) +			resp.msg = old_resp.msg +		# deflate +		if resp.headers.get('Content-encoding', '') == 'deflate': +			gz = StringIO.StringIO(self.deflate(resp.read())) +			resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) +			resp.msg = old_resp.msg +		return resp +  class FileDownloader(object):  	"""File Downloader class. @@ -559,8 +612,11 @@ class FileDownloader(object):  		tmpfilename = self.temp_name(filename)  		stream = None  		open_mode = 'wb' -		basic_request = urllib2.Request(url, None, std_headers) -		request = urllib2.Request(url, None, std_headers) + +		# Do not include the Accept-Encoding header +		headers = {'Youtubedl-no-compression': 'True'} +		basic_request = urllib2.Request(url, None, headers) +		request = urllib2.Request(url, None, headers)  		# Establish possible resume length  		if os.path.isfile(tmpfilename): @@ -822,7 +878,7 @@ class YoutubeIE(InfoExtractor):  				return  		# Set language -		request = urllib2.Request(self._LANG_URL, None, std_headers) +		request = urllib2.Request(self._LANG_URL)  		try:  			self.report_lang()  			urllib2.urlopen(request).read() @@ -842,7 +898,7 @@ class YoutubeIE(InfoExtractor):  				'username':	username,  				'password':	password,  				} -		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) +		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))  		try:  			self.report_login()  			login_results = urllib2.urlopen(request).read() @@ -858,7 +914,7 @@ class YoutubeIE(InfoExtractor):  				'next_url':		'/',  				'action_confirm':	'Confirm',  				} -		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) +		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))  		try:  			self.report_age_confirmation()  			age_results = urllib2.urlopen(request).read() @@ -876,7 +932,7 @@ class YoutubeIE(InfoExtractor):  		# Get video webpage  		self.report_video_webpage_download(video_id) -		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) +		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)  		try:  			video_webpage = urllib2.urlopen(request).read()  		except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -895,7 +951,7 @@ class YoutubeIE(InfoExtractor):  		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:  			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'  					   % (video_id, el_type)) -			request = urllib2.Request(video_info_url, None, std_headers) +			request = urllib2.Request(video_info_url)  			try:  				video_info_webpage = urllib2.urlopen(request).read()  				video_info = parse_qs(video_info_webpage) @@ -1055,7 +1111,7 @@ class MetacafeIE(InfoExtractor):  	def _real_initialize(self):  		# Retrieve disclaimer -		request = urllib2.Request(self._DISCLAIMER, None, std_headers) +		request = urllib2.Request(self._DISCLAIMER)  		try:  			self.report_disclaimer()  			disclaimer = urllib2.urlopen(request).read() @@ -1068,7 +1124,7 @@ class MetacafeIE(InfoExtractor):  			'filters': '0',  			'submit': "Continue - I'm over 18",  			} -		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) +		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))  		try:  			self.report_age_confirmation()  			disclaimer = urllib2.urlopen(request).read() @@ -1771,7 +1827,7 @@ class YoutubeSearchIE(InfoExtractor):  		while True:  			self.report_download_page(query, pagenum)  			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) -			request = urllib2.Request(result_url, None, std_headers) +			request = urllib2.Request(result_url)  			try:  				page = urllib2.urlopen(request).read()  			except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1862,7 +1918,7 @@ class GoogleSearchIE(InfoExtractor):  		while True:  			self.report_download_page(query, pagenum)  			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) -			request = urllib2.Request(result_url, None, std_headers) +			request = urllib2.Request(result_url)  			try:  				page = urllib2.urlopen(request).read()  			except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -1953,7 +2009,7 @@ class YahooSearchIE(InfoExtractor):  		while True:  			self.report_download_page(query, pagenum)  			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) -			request = urllib2.Request(result_url, None, std_headers) +			request = urllib2.Request(result_url)  			try:  				page = urllib2.urlopen(request).read()  			except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2017,7 +2073,7 @@ class YoutubePlaylistIE(InfoExtractor):  		while True:  			self.report_download_page(playlist_id, pagenum) -			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) +			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))  			try:  				page = urllib2.urlopen(request).read()  			except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2079,7 +2135,7 @@ class YoutubeUserIE(InfoExtractor):  		pagenum = 1  		self.report_download_page(username) -		request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) +		request = urllib2.Request(self._TEMPLATE_URL % (username))  		try:  			page = urllib2.urlopen(request).read()  		except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -2135,7 +2191,7 @@ class DepositFilesIE(InfoExtractor):  		# Retrieve file webpage with 'Free download' button pressed  		free_download_indication = { 'gateway_result' : '1' } -		request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) +		request = urllib2.Request(url, urllib.urlencode(free_download_indication))  		try:  			self.report_download_webpage(file_id)  			webpage = urllib2.urlopen(request).read() @@ -2354,8 +2410,7 @@ if __name__ == '__main__':  		# General configuration  		cookie_processor = urllib2.HTTPCookieProcessor(jar) -		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) -		urllib2.install_opener(urllib2.build_opener(cookie_processor)) +		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))  		socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)  		# Batch file verification | 
