From bae611f216ac7b1f1a24a506da6dffc518d09d5b Mon Sep 17 00:00:00 2001
From: Arvydas Sidorenko <asido4@gmail.com>
Date: Sun, 1 Jul 2012 18:21:27 +0200
Subject: Simplified preferredencoding()

Not sure what is the point to use yield to return encoding, thus
it will simplify the whole function.

Signed-off-by: Arvydas Sidorenko <asido4@gmail.com>
---
 youtube_dl/utils.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 2853ba50f..7faa046c8 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -32,15 +32,13 @@ def preferredencoding():
 	Returns the best encoding scheme for the system, based on
 	locale.getpreferredencoding() and some further tweaks.
 	"""
-	def yield_preferredencoding():
-		try:
-			pref = locale.getpreferredencoding()
-			u'TEST'.encode(pref)
-		except:
-			pref = 'UTF-8'
-		while True:
-			yield pref
-	return yield_preferredencoding().next()
+	try:
+		pref = locale.getpreferredencoding()
+		u'TEST'.encode(pref)
+	except:
+		pref = 'UTF-8'
+
+	return pref
 
 
 def htmlentity_transform(matchobj):
-- 
cgit v1.2.3


From 51937c086943a3bdbf6f707c75d041ed3b0ba743 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Mon, 26 Nov 2012 04:05:54 +0100
Subject: Add some parentheses around print for #180

---
 youtube_dl/utils.py | 1 -
 1 file changed, 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 56d046145..6f53337d4 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -83,7 +83,6 @@ class IDParser(HTMLParser.HTMLParser):
 		HTMLParser.HTMLParser.__init__(self)
 
 	def error(self, message):
-		#print >> sys.stderr, self.getpos()
 		if self.error_count > 10 or self.started:
 			raise HTMLParser.HTMLParseError(message, self.getpos())
 		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
-- 
cgit v1.2.3


From 92b91c18780938283c505f5662c458e049bf3567 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Mon, 26 Nov 2012 04:23:20 +0100
Subject: Use character instead of byte strings

---
 youtube_dl/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 6f53337d4..658fd2686 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -26,6 +26,11 @@ std_headers = {
 	'Accept-Language': 'en-us,en;q=0.5',
 }
 
+try:
+    compat_str = unicode # Python 2
+except NameError:
+    compat_str = str
+
 def preferredencoding():
 	"""Get preferred encoding.
 
-- 
cgit v1.2.3


From 1c469a9480e9d8bea45950898eb46e07b0c58290 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Mon, 26 Nov 2012 23:58:46 +0100
Subject: New optoin --restrict-filenames

---
 youtube_dl/utils.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 658fd2686..55f2fe02c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -194,18 +194,22 @@ def timeconvert(timestr):
 	if timetuple is not None:
 		timestamp = email.utils.mktime_tz(timetuple)
 	return timestamp
-	
-def sanitize_filename(s):
-	"""Sanitizes a string so it could be used as part of a filename."""
+
+def sanitize_filename(s, restricted=False):
+	"""Sanitizes a string so it could be used as part of a filename.
+	If restricted is set, use a stricter subset of allowed characters.
+	"""
 	def replace_insane(char):
 		if char == '?' or ord(char) < 32 or ord(char) == 127:
 			return ''
 		elif char == '"':
-			return '\''
+			return '' if restricted else 'FOO\''
 		elif char == ':':
-			return ' -'
+			return '_-' if restricted else ' -'
 		elif char in '\\/|*<>':
 			return '-'
+		if restricted and (char in '&\'' or char.isspace()):
+			return '_'
 		return char
 
 	result = u''.join(map(replace_insane, s))
-- 
cgit v1.2.3


From 240089e5df640a12b1d300da05932c2f74ff8c69 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 00:14:12 +0100
Subject: remove accidental remnants

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 55f2fe02c..1f60d34ae 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -203,7 +203,7 @@ def sanitize_filename(s, restricted=False):
 		if char == '?' or ord(char) < 32 or ord(char) == 127:
 			return ''
 		elif char == '"':
-			return '' if restricted else 'FOO\''
+			return '' if restricted else '\''
 		elif char == ':':
 			return '_-' if restricted else ' -'
 		elif char in '\\/|*<>':
-- 
cgit v1.2.3


From 56781d3d2e476e2e109d0907d89548fd4da05058 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 12:46:09 +0100
Subject: Switch back to underline for invalid characters, and make restricted
 ASCII-only

---
 youtube_dl/utils.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 1f60d34ae..3339f56ec 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False):
 		elif char == ':':
 			return '_-' if restricted else ' -'
 		elif char in '\\/|*<>':
-			return '-'
+			return '_'
 		if restricted and (char in '&\'' or char.isspace()):
 			return '_'
+		if restricted and ord(char) > 127:
+			return '_'
 		return char
 
 	result = u''.join(map(replace_insane, s))
-	while '--' in result:
-		result = result.replace('--', '-')
-	return result.strip('-')
+	while '__' in result:
+		result = result.replace('__', '_')
+	result = result.strip('_')
+	if not result:
+		result = '_'
+	return result
 
 def orderedSet(iterable):
 	""" Remove all duplicates from the input iterable """
-- 
cgit v1.2.3


From 46cbda0be4bed00122a5cf43e640808e6c32222d Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 15:07:10 +0100
Subject: Minor filename encoding improvement in a common case

---
 youtube_dl/utils.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3339f56ec..4ace22c2f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -218,6 +218,9 @@ def sanitize_filename(s, restricted=False):
 	while '__' in result:
 		result = result.replace('__', '_')
 	result = result.strip('_')
+	# Common case of "Foreign band name - English song title"
+	if restricted and result.startswith('-_'):
+		result = result[2:]
 	if not result:
 		result = '_'
 	return result
-- 
cgit v1.2.3


From dffe658bac71531dc6aa72088c4d98f6a556bfbf Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 17:15:33 +0100
Subject: Remove exclamation mark in --restrict-filenames mode

---
 youtube_dl/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4ace22c2f..a0c41081a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -208,7 +208,7 @@ def sanitize_filename(s, restricted=False):
 			return '_-' if restricted else ' -'
 		elif char in '\\/|*<>':
 			return '_'
-		if restricted and (char in '&\'' or char.isspace()):
+		if restricted and (char in '!&\'' or char.isspace()):
 			return '_'
 		if restricted and ord(char) > 127:
 			return '_'
@@ -316,7 +316,7 @@ class ContentTooShortError(Exception):
 
 class Trouble(Exception):
 	"""Trouble helper exception
-	
+
 	This is an exception to be handled with
 	FileDownloader.trouble
 	"""
-- 
cgit v1.2.3


From 0969bdd3053fc95c5f545caabdbc77628afec4b5 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 18:49:18 +0100
Subject: unify spacing

---
 youtube_dl/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 40d6823a0..45582ca11 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -27,9 +27,9 @@ std_headers = {
 }
 
 try:
-    compat_str = unicode # Python 2
+	compat_str = unicode # Python 2
 except NameError:
-    compat_str = str
+	compat_str = str
 
 def preferredencoding():
 	"""Get preferred encoding.
-- 
cgit v1.2.3


From b514df2034a7291971427588977dd612c4b5581f Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 18:55:35 +0100
Subject: Clean up with the help of pep8

---
 youtube_dl/utils.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 45582ca11..68a6fae1b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -126,8 +126,10 @@ class IDParser(HTMLParser.HTMLParser):
 	handle_decl = handle_pi = unknown_decl = find_startpos
 
 	def get_result(self):
-		if self.result == None: return None
-		if len(self.result) != 3: return None
+		if self.result is None:
+			return None
+		if len(self.result) != 3:
+			return None
 		lines = self.html.split('\n')
 		lines = lines[self.result[1][0]-1:self.result[2][0]]
 		lines[0] = lines[0][self.result[1][1]:]
-- 
cgit v1.2.3


From dd109dee8ebd51a019529fa494e56651162ad6d9 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 19:02:37 +0100
Subject: Remove mentions of unicode

---
 youtube_dl/utils.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 68a6fae1b..bde446bcb 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -27,9 +27,9 @@ std_headers = {
 }
 
 try:
-	compat_str = unicode # Python 2
+	u = unicode # Python 2
 except NameError:
-	compat_str = str
+	u = str
 
 def preferredencoding():
 	"""Get preferred encoding.
@@ -47,7 +47,7 @@ def preferredencoding():
 
 
 def htmlentity_transform(matchobj):
-	"""Transforms an HTML entity to a Unicode character.
+	"""Transforms an HTML entity to a character.
 
 	This function receives a match object and is intended to be used with
 	the re.sub() function.
@@ -58,7 +58,6 @@ def htmlentity_transform(matchobj):
 	if entity in htmlentitydefs.name2codepoint:
 		return unichr(htmlentitydefs.name2codepoint[entity])
 
-	# Unicode character
 	mobj = re.match(ur'(?u)#(x?\d+)', entity)
 	if mobj is not None:
 		numstr = mobj.group(1)
@@ -67,7 +66,7 @@ def htmlentity_transform(matchobj):
 			numstr = u'0%s' % numstr
 		else:
 			base = 10
-		return unichr(long(numstr, base))
+		return unichr(int(numstr, base))
 
 	# Unknown entity in name, return its literal representation
 	return (u'&%s;' % entity)
@@ -235,7 +234,7 @@ def orderedSet(iterable):
 
 def unescapeHTML(s):
 	"""
-	@param s a string (of type unicode)
+	@param s a string
 	"""
 	assert type(s) == type(u'')
 
@@ -244,7 +243,7 @@ def unescapeHTML(s):
 
 def encodeFilename(s):
 	"""
-	@param s The name of the file (of type unicode)
+	@param s The name of the file
 	"""
 
 	assert type(s) == type(u'')
-- 
cgit v1.2.3


From 96731798dbdd5a8878ac5cf29b69c6c7c821311b Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 23:29:18 +0100
Subject: Rename util.u to util.compat_str

---
 youtube_dl/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index bde446bcb..4d098a377 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -27,9 +27,9 @@ std_headers = {
 }
 
 try:
-	u = unicode # Python 2
+	compat_str = unicode # Python 2
 except NameError:
-	u = str
+	compat_str = str
 
 def preferredencoding():
 	"""Get preferred encoding.
-- 
cgit v1.2.3


From e08bee320e7c2933590d108ff4f8546b4dff935f Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 23:31:55 +0100
Subject: Use except .. as everywhere (#180)

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4d098a377..29e1b0e97 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -177,7 +177,7 @@ def sanitize_open(filename, open_mode):
 			return (sys.stdout, filename)
 		stream = open(encodeFilename(filename), open_mode)
 		return (stream, filename)
-	except (IOError, OSError), err:
+	except (IOError, OSError) as err:
 		# In case of error, try to remove win32 forbidden chars
 		filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 
-- 
cgit v1.2.3


From 01ba00ca42899436c13439226ec61651a6ea6af0 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 23:54:09 +0100
Subject: Prepare urllib references for 2/3 compatibility

---
 youtube_dl/utils.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 29e1b0e97..12e32be98 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -9,7 +9,6 @@ import os
 import re
 import sys
 import zlib
-import urllib2
 import email.utils
 import json
 
@@ -31,6 +30,26 @@ try:
 except NameError:
 	compat_str = str
 
+try:
+	import urllib.request as compat_urllib_request
+except ImportError: # Python 2
+	import urllib2 as compat_urllib_request
+
+try:
+	import urllib.error as compat_urllib_error
+except ImportError: # Python 2
+	import urllib2 as compat_urllib_error
+
+try:
+	import urllib.parse as compat_urllib_parse
+except ImportError: # Python 2
+	import urllib2 as compat_urllib_parse
+
+try:
+	import http.cookiejar as compat_cookiejar
+except ImportError: # Python 2
+	import cookielib as compat_cookiejar
+
 def preferredencoding():
 	"""Get preferred encoding.
 
@@ -320,7 +339,7 @@ class Trouble(Exception):
 	FileDownloader.trouble
 	"""
 
-class YoutubeDLHandler(urllib2.HTTPHandler):
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 	"""Handler for HTTP requests and responses.
 
 	This class, when installed with an OpenerDirector, automatically adds
@@ -347,9 +366,9 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
 
 	@staticmethod
 	def addinfourl_wrapper(stream, headers, url, code):
-		if hasattr(urllib2.addinfourl, 'getcode'):
-			return urllib2.addinfourl(stream, headers, url, code)
-		ret = urllib2.addinfourl(stream, headers, url)
+		if hasattr(compat_urllib_request.addinfourl, 'getcode'):
+			return compat_urllib_request.addinfourl(stream, headers, url, code)
+		ret = compat_urllib_request.addinfourl(stream, headers, url)
 		ret.code = code
 		return ret
 
-- 
cgit v1.2.3


From 89fb51dd2d4d7464b919f17b9d5d24a448319dfc Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 23:56:10 +0100
Subject: Remove ur references for Python 3.3 support

---
 youtube_dl/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 12e32be98..ccefc66a0 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -77,7 +77,7 @@ def htmlentity_transform(matchobj):
 	if entity in htmlentitydefs.name2codepoint:
 		return unichr(htmlentitydefs.name2codepoint[entity])
 
-	mobj = re.match(ur'(?u)#(x?\d+)', entity)
+	mobj = re.match(u'(?u)#(x?\\d+)', entity)
 	if mobj is not None:
 		numstr = mobj.group(1)
 		if numstr.startswith(u'x'):
@@ -198,7 +198,7 @@ def sanitize_open(filename, open_mode):
 		return (stream, filename)
 	except (IOError, OSError) as err:
 		# In case of error, try to remove win32 forbidden chars
-		filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
+		filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
 
 		# An exception here should be caught in the caller
 		stream = open(encodeFilename(filename), open_mode)
@@ -257,7 +257,7 @@ def unescapeHTML(s):
 	"""
 	assert type(s) == type(u'')
 
-	result = re.sub(ur'(?u)&(.+?);', htmlentity_transform, s)
+	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
 	return result
 
 def encodeFilename(s):
-- 
cgit v1.2.3


From da779b4924eb3078233c7f5730e26cc73dd91a4a Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 27 Nov 2012 23:58:47 +0100
Subject: Fall back to urllib instead of urllib2 for Python 3  urllib.parse

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ccefc66a0..ac7e161af 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -43,7 +43,7 @@ except ImportError: # Python 2
 try:
 	import urllib.parse as compat_urllib_parse
 except ImportError: # Python 2
-	import urllib2 as compat_urllib_parse
+	import urllib as compat_urllib_parse
 
 try:
 	import http.cookiejar as compat_cookiejar
-- 
cgit v1.2.3


From 3e669f369f886dff8fa8272f3bfa37be6360a0ba Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:02:55 +0100
Subject: Py3 compat for unichr and htmlentitydefs

---
 youtube_dl/utils.py | 43 ++++++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ac7e161af..668338270 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import gzip
-import htmlentitydefs
 import HTMLParser
 import locale
 import os
@@ -17,19 +16,6 @@ try:
 except ImportError:
 	import StringIO
 
-std_headers = {
-	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
-	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
-	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-	'Accept-Encoding': 'gzip, deflate',
-	'Accept-Language': 'en-us,en;q=0.5',
-}
-
-try:
-	compat_str = unicode # Python 2
-except NameError:
-	compat_str = str
-
 try:
 	import urllib.request as compat_urllib_request
 except ImportError: # Python 2
@@ -50,6 +36,29 @@ try:
 except ImportError: # Python 2
 	import cookielib as compat_cookiejar
 
+try:
+	import html.entities as compat_html_entities
+except NameError: # Python 2
+	import htmlentitydefs as compat_html_entities
+
+try:
+	compat_str = unicode # Python 2
+except NameError:
+	compat_str = str
+
+try:
+	compat_chr = unichr # Python 2
+except NameError:
+	compat_chr = chr
+
+
+std_headers = {
+	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
+	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+	'Accept-Encoding': 'gzip, deflate',
+	'Accept-Language': 'en-us,en;q=0.5',
+}
 def preferredencoding():
 	"""Get preferred encoding.
 
@@ -74,8 +83,8 @@ def htmlentity_transform(matchobj):
 	entity = matchobj.group(1)
 
 	# Known non-numeric HTML entity
-	if entity in htmlentitydefs.name2codepoint:
-		return unichr(htmlentitydefs.name2codepoint[entity])
+	if entity in compat_html_entities.name2codepoint:
+		return compat_chr(compat_html_entities.name2codepoint[entity])
 
 	mobj = re.match(u'(?u)#(x?\\d+)', entity)
 	if mobj is not None:
@@ -85,7 +94,7 @@ def htmlentity_transform(matchobj):
 			numstr = u'0%s' % numstr
 		else:
 			base = 10
-		return unichr(int(numstr, base))
+		return compat_chr(int(numstr, base))
 
 	# Unknown entity in name, return its literal representation
 	return (u'&%s;' % entity)
-- 
cgit v1.2.3


From a8156c1d2e4b2a7ac5e034c247c6fccaca15a21d Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:06:28 +0100
Subject: Python 3 version of HTMLParser

---
 youtube_dl/utils.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 668338270..c4917012b 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 
 import gzip
-import HTMLParser
 import locale
 import os
 import re
@@ -41,6 +40,11 @@ try:
 except NameError: # Python 2
 	import htmlentitydefs as compat_html_entities
 
+try:
+	import html.parser as compat_html_parser
+except NameError: # Python 2
+	import HTMLParser as compat_html_parser
+
 try:
 	compat_str = unicode # Python 2
 except NameError:
@@ -99,8 +103,8 @@ def htmlentity_transform(matchobj):
 	# Unknown entity in name, return its literal representation
 	return (u'&%s;' % entity)
 
-HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
-class IDParser(HTMLParser.HTMLParser):
+compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
+class IDParser(compat_html_parser.HTMLParser):
 	"""Modified HTMLParser that isolates a tag with the specified id"""
 	def __init__(self, id):
 		self.id = id
@@ -110,11 +114,11 @@ class IDParser(HTMLParser.HTMLParser):
 		self.html = None
 		self.watch_startpos = False
 		self.error_count = 0
-		HTMLParser.HTMLParser.__init__(self)
+		compat_html_parser.HTMLParser.__init__(self)
 
 	def error(self, message):
 		if self.error_count > 10 or self.started:
-			raise HTMLParser.HTMLParseError(message, self.getpos())
+			raise compat_html_parser.HTMLParseError(message, self.getpos())
 		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
 		self.error_count += 1
 		self.goahead(1)
@@ -170,7 +174,7 @@ def get_element_by_id(id, html):
 	parser = IDParser(id)
 	try:
 		parser.loads(html)
-	except HTMLParser.HTMLParseError:
+	except compat_html_parser.HTMLParseError:
 		pass
 	return parser.get_result()
 
-- 
cgit v1.2.3


From 03f9daab34605f538294fdffb141ef5d9fc670e6 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:09:17 +0100
Subject: Use io.BytesIO instead of StringIO

---
 youtube_dl/utils.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c4917012b..ebff2e8f2 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import gzip
+import io
 import locale
 import os
 import re
@@ -10,11 +11,6 @@ import zlib
 import email.utils
 import json
 
-try:
-	import cStringIO as StringIO
-except ImportError:
-	import StringIO
-
 try:
 	import urllib.request as compat_urllib_request
 except ImportError: # Python 2
@@ -400,12 +396,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 		old_resp = resp
 		# gzip
 		if resp.headers.get('Content-encoding', '') == 'gzip':
-			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
+			gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
 			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 			resp.msg = old_resp.msg
 		# deflate
 		if resp.headers.get('Content-encoding', '') == 'deflate':
-			gz = StringIO.StringIO(self.deflate(resp.read()))
+			gz = io.BytesIO(self.deflate(resp.read()))
 			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 			resp.msg = old_resp.msg
 		return resp
-- 
cgit v1.2.3


From 348d0a7a18fd4aa2512418b8be48f5011fbd3f99 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:13:00 +0100
Subject: Py2/3 compatibility for http.client

---
 youtube_dl/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index ebff2e8f2..370567705 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -41,6 +41,12 @@ try:
 except NameError: # Python 2
 	import HTMLParser as compat_html_parser
 
+try:
+	import http.client as compat_html_client
+except NameError: # Python 2
+	import httplib as compat_html_client
+
+
 try:
 	compat_str = unicode # Python 2
 except NameError:
-- 
cgit v1.2.3


From 9f37a9594147b71b1ce796219f75fc3a2bb544d3 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:17:12 +0100
Subject: Py2/3 parse_qs compatibility

---
 youtube_dl/utils.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 370567705..a27ac77dd 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -33,19 +33,23 @@ except ImportError: # Python 2
 
 try:
 	import html.entities as compat_html_entities
-except NameError: # Python 2
+except ImportError: # Python 2
 	import htmlentitydefs as compat_html_entities
 
 try:
 	import html.parser as compat_html_parser
-except NameError: # Python 2
+except ImportError: # Python 2
 	import HTMLParser as compat_html_parser
 
 try:
 	import http.client as compat_html_client
-except NameError: # Python 2
+except ImportError: # Python 2
 	import httplib as compat_html_client
 
+try:
+	from urllib.parse.parse_qs import parse_qs as compat_parse_qs
+except ImportError: # Python 2
+	from urlparse import parse_qs as compat_parse_qs
 
 try:
 	compat_str = unicode # Python 2
-- 
cgit v1.2.3


From 73dce4b2e4cb6eea951dbd682a92ad7508c957b0 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:17:59 +0100
Subject: Import from the correct module

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a27ac77dd..41e6b8550 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -47,7 +47,7 @@ except ImportError: # Python 2
 	import httplib as compat_html_client
 
 try:
-	from urllib.parse.parse_qs import parse_qs as compat_parse_qs
+	from urllib.parse import parse_qs as compat_parse_qs
 except ImportError: # Python 2
 	from urlparse import parse_qs as compat_parse_qs
 
-- 
cgit v1.2.3


From 5bd9cc7a6a93fbd2b3b06cbdfbaaf8b7e89e8b7b Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:22:55 +0100
Subject: typo

---
 youtube_dl/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 41e6b8550..3fcb0927f 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -42,9 +42,9 @@ except ImportError: # Python 2
 	import HTMLParser as compat_html_parser
 
 try:
-	import http.client as compat_html_client
+	import http.client as compat_http_client
 except ImportError: # Python 2
-	import httplib as compat_html_client
+	import httplib as compat_http_client
 
 try:
 	from urllib.parse import parse_qs as compat_parse_qs
-- 
cgit v1.2.3


From 8cd10ac4efb5168f3ceb18ec94338bce73e166a7 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:46:21 +0100
Subject: Fix printing title etc.

---
 youtube_dl/utils.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 3fcb0927f..9db7b9d9d 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -61,7 +61,6 @@ try:
 except NameError:
 	compat_chr = chr
 
-
 std_headers = {
 	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
@@ -83,6 +82,12 @@ def preferredencoding():
 
 	return pref
 
+if sys.version_info < (3,0):
+	def compat_print(s):
+		print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
+else:
+	def compat_print(s):
+		print(s)
 
 def htmlentity_transform(matchobj):
 	"""Transforms an HTML entity to a character.
-- 
cgit v1.2.3


From e6137fd61d47ba8624e9baa84701adf929e38849 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:53:09 +0100
Subject: Remove superfluous encodings

---
 youtube_dl/utils.py | 1 +
 1 file changed, 1 insertion(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 9db7b9d9d..0aa350e64 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -87,6 +87,7 @@ if sys.version_info < (3,0):
 		print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 else:
 	def compat_print(s):
+		assert type(s) == type(u'')
 		print(s)
 
 def htmlentity_transform(matchobj):
-- 
cgit v1.2.3


From 0f00efed4c06fefcd4da7294cb3c92bccf081eaa Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 00:56:20 +0100
Subject: Woooohooo!  python3 youtube_dl BaW_jenozKc -t  works!

---
 youtube_dl/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 0aa350e64..a5df62bf8 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -292,6 +292,10 @@ def encodeFilename(s):
 
 	assert type(s) == type(u'')
 
+	# Python 3 has a Unicode API
+	if sys.version_info >= (3, 0):
+		return s
+
 	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
 		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
-- 
cgit v1.2.3


From 40b35b4aa6040ecc3ff7b3c9c8b908249633d86e Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 02:01:09 +0100
Subject: hack for apparently broken parse_qs in python2

---
 youtube_dl/utils.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a5df62bf8..cf78e9dc8 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -49,7 +49,81 @@ except ImportError: # Python 2
 try:
 	from urllib.parse import parse_qs as compat_parse_qs
 except ImportError: # Python 2
-	from urlparse import parse_qs as compat_parse_qs
+	# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+	# Python 2's version is apparently totally broken
+	def _unquote(string, encoding='utf-8', errors='replace'):
+		if string == '':
+			return string
+		res = string.split('%')
+		if len(res) == 1:
+			return string
+		if encoding is None:
+			encoding = 'utf-8'
+		if errors is None:
+			errors = 'replace'
+		# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
+		pct_sequence = b''
+		string = res[0]
+		for item in res[1:]:
+			try:
+				if not item:
+					raise ValueError
+				pct_sequence += item[:2].decode('hex')
+				rest = item[2:]
+				if not rest:
+					# This segment was just a single percent-encoded character.
+					# May be part of a sequence of code units, so delay decoding.
+					# (Stored in pct_sequence).
+					continue
+			except ValueError:
+				rest = '%' + item
+			# Encountered non-percent-encoded characters. Flush the current
+			# pct_sequence.
+			string += pct_sequence.decode(encoding, errors) + rest
+			pct_sequence = b''
+		if pct_sequence:
+			# Flush the final pct_sequence
+			string += pct_sequence.decode(encoding, errors)
+		return string
+
+	def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+				encoding='utf-8', errors='replace'):
+		qs, _coerce_result = qs, unicode
+		pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+		r = []
+		for name_value in pairs:
+			if not name_value and not strict_parsing:
+				continue
+			nv = name_value.split('=', 1)
+			if len(nv) != 2:
+				if strict_parsing:
+					raise ValueError("bad query field: %r" % (name_value,))
+				# Handle case of a control-name with no equal sign
+				if keep_blank_values:
+					nv.append('')
+				else:
+					continue
+			if len(nv[1]) or keep_blank_values:
+				name = nv[0].replace('+', ' ')
+				name = _unquote(name, encoding=encoding, errors=errors)
+				name = _coerce_result(name)
+				value = nv[1].replace('+', ' ')
+				value = _unquote(value, encoding=encoding, errors=errors)
+				value = _coerce_result(value)
+				r.append((name, value))
+		return r
+
+	def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+				encoding='utf-8', errors='replace'):
+		parsed_result = {}
+		pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+						encoding=encoding, errors=errors)
+		for name, value in pairs:
+			if name in parsed_result:
+				parsed_result[name].append(value)
+			else:
+				parsed_result[name] = [value]
+		return parsed_result
 
 try:
 	compat_str = unicode # Python 2
-- 
cgit v1.2.3


From 59ae15a507cbf93de13f8fda8444d2d9a251747a Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 02:04:46 +0100
Subject: Convert all tabs to 4 spaces (PEP8)

---
 youtube_dl/utils.py | 788 ++++++++++++++++++++++++++--------------------------
 1 file changed, 394 insertions(+), 394 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index cf78e9dc8..836138277 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -12,490 +12,490 @@ import email.utils
 import json
 
 try:
-	import urllib.request as compat_urllib_request
+    import urllib.request as compat_urllib_request
 except ImportError: # Python 2
-	import urllib2 as compat_urllib_request
+    import urllib2 as compat_urllib_request
 
 try:
-	import urllib.error as compat_urllib_error
+    import urllib.error as compat_urllib_error
 except ImportError: # Python 2
-	import urllib2 as compat_urllib_error
+    import urllib2 as compat_urllib_error
 
 try:
-	import urllib.parse as compat_urllib_parse
+    import urllib.parse as compat_urllib_parse
 except ImportError: # Python 2
-	import urllib as compat_urllib_parse
+    import urllib as compat_urllib_parse
 
 try:
-	import http.cookiejar as compat_cookiejar
+    import http.cookiejar as compat_cookiejar
 except ImportError: # Python 2
-	import cookielib as compat_cookiejar
+    import cookielib as compat_cookiejar
 
 try:
-	import html.entities as compat_html_entities
+    import html.entities as compat_html_entities
 except ImportError: # Python 2
-	import htmlentitydefs as compat_html_entities
+    import htmlentitydefs as compat_html_entities
 
 try:
-	import html.parser as compat_html_parser
+    import html.parser as compat_html_parser
 except ImportError: # Python 2
-	import HTMLParser as compat_html_parser
+    import HTMLParser as compat_html_parser
 
 try:
-	import http.client as compat_http_client
+    import http.client as compat_http_client
 except ImportError: # Python 2
-	import httplib as compat_http_client
+    import httplib as compat_http_client
 
 try:
-	from urllib.parse import parse_qs as compat_parse_qs
+    from urllib.parse import parse_qs as compat_parse_qs
 except ImportError: # Python 2
-	# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
-	# Python 2's version is apparently totally broken
-	def _unquote(string, encoding='utf-8', errors='replace'):
-		if string == '':
-			return string
-		res = string.split('%')
-		if len(res) == 1:
-			return string
-		if encoding is None:
-			encoding = 'utf-8'
-		if errors is None:
-			errors = 'replace'
-		# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
-		pct_sequence = b''
-		string = res[0]
-		for item in res[1:]:
-			try:
-				if not item:
-					raise ValueError
-				pct_sequence += item[:2].decode('hex')
-				rest = item[2:]
-				if not rest:
-					# This segment was just a single percent-encoded character.
-					# May be part of a sequence of code units, so delay decoding.
-					# (Stored in pct_sequence).
-					continue
-			except ValueError:
-				rest = '%' + item
-			# Encountered non-percent-encoded characters. Flush the current
-			# pct_sequence.
-			string += pct_sequence.decode(encoding, errors) + rest
-			pct_sequence = b''
-		if pct_sequence:
-			# Flush the final pct_sequence
-			string += pct_sequence.decode(encoding, errors)
-		return string
-
-	def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
-				encoding='utf-8', errors='replace'):
-		qs, _coerce_result = qs, unicode
-		pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
-		r = []
-		for name_value in pairs:
-			if not name_value and not strict_parsing:
-				continue
-			nv = name_value.split('=', 1)
-			if len(nv) != 2:
-				if strict_parsing:
-					raise ValueError("bad query field: %r" % (name_value,))
-				# Handle case of a control-name with no equal sign
-				if keep_blank_values:
-					nv.append('')
-				else:
-					continue
-			if len(nv[1]) or keep_blank_values:
-				name = nv[0].replace('+', ' ')
-				name = _unquote(name, encoding=encoding, errors=errors)
-				name = _coerce_result(name)
-				value = nv[1].replace('+', ' ')
-				value = _unquote(value, encoding=encoding, errors=errors)
-				value = _coerce_result(value)
-				r.append((name, value))
-		return r
-
-	def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
-				encoding='utf-8', errors='replace'):
-		parsed_result = {}
-		pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
-						encoding=encoding, errors=errors)
-		for name, value in pairs:
-			if name in parsed_result:
-				parsed_result[name].append(value)
-			else:
-				parsed_result[name] = [value]
-		return parsed_result
+    # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+    # Python 2's version is apparently totally broken
+    def _unquote(string, encoding='utf-8', errors='replace'):
+        if string == '':
+            return string
+        res = string.split('%')
+        if len(res) == 1:
+            return string
+        if encoding is None:
+            encoding = 'utf-8'
+        if errors is None:
+            errors = 'replace'
+        # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
+        pct_sequence = b''
+        string = res[0]
+        for item in res[1:]:
+            try:
+                if not item:
+                    raise ValueError
+                pct_sequence += item[:2].decode('hex')
+                rest = item[2:]
+                if not rest:
+                    # This segment was just a single percent-encoded character.
+                    # May be part of a sequence of code units, so delay decoding.
+                    # (Stored in pct_sequence).
+                    continue
+            except ValueError:
+                rest = '%' + item
+            # Encountered non-percent-encoded characters. Flush the current
+            # pct_sequence.
+            string += pct_sequence.decode(encoding, errors) + rest
+            pct_sequence = b''
+        if pct_sequence:
+            # Flush the final pct_sequence
+            string += pct_sequence.decode(encoding, errors)
+        return string
+
+    def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+                encoding='utf-8', errors='replace'):
+        qs, _coerce_result = qs, unicode
+        pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+        r = []
+        for name_value in pairs:
+            if not name_value and not strict_parsing:
+                continue
+            nv = name_value.split('=', 1)
+            if len(nv) != 2:
+                if strict_parsing:
+                    raise ValueError("bad query field: %r" % (name_value,))
+                # Handle case of a control-name with no equal sign
+                if keep_blank_values:
+                    nv.append('')
+                else:
+                    continue
+            if len(nv[1]) or keep_blank_values:
+                name = nv[0].replace('+', ' ')
+                name = _unquote(name, encoding=encoding, errors=errors)
+                name = _coerce_result(name)
+                value = nv[1].replace('+', ' ')
+                value = _unquote(value, encoding=encoding, errors=errors)
+                value = _coerce_result(value)
+                r.append((name, value))
+        return r
+
+    def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+                encoding='utf-8', errors='replace'):
+        parsed_result = {}
+        pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+                        encoding=encoding, errors=errors)
+        for name, value in pairs:
+            if name in parsed_result:
+                parsed_result[name].append(value)
+            else:
+                parsed_result[name] = [value]
+        return parsed_result
 
 try:
-	compat_str = unicode # Python 2
+    compat_str = unicode # Python 2
 except NameError:
-	compat_str = str
+    compat_str = str
 
 try:
-	compat_chr = unichr # Python 2
+    compat_chr = unichr # Python 2
 except NameError:
-	compat_chr = chr
+    compat_chr = chr
 
 std_headers = {
-	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
-	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
-	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
-	'Accept-Encoding': 'gzip, deflate',
-	'Accept-Language': 'en-us,en;q=0.5',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
+    'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Encoding': 'gzip, deflate',
+    'Accept-Language': 'en-us,en;q=0.5',
 }
 def preferredencoding():
-	"""Get preferred encoding.
+    """Get preferred encoding.
 
-	Returns the best encoding scheme for the system, based on
-	locale.getpreferredencoding() and some further tweaks.
-	"""
-	try:
-		pref = locale.getpreferredencoding()
-		u'TEST'.encode(pref)
-	except:
-		pref = 'UTF-8'
+    Returns the best encoding scheme for the system, based on
+    locale.getpreferredencoding() and some further tweaks.
+    """
+    try:
+        pref = locale.getpreferredencoding()
+        u'TEST'.encode(pref)
+    except:
+        pref = 'UTF-8'
 
-	return pref
+    return pref
 
 if sys.version_info < (3,0):
-	def compat_print(s):
-		print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
+    def compat_print(s):
+        print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
 else:
-	def compat_print(s):
-		assert type(s) == type(u'')
-		print(s)
+    def compat_print(s):
+        assert type(s) == type(u'')
+        print(s)
 
 def htmlentity_transform(matchobj):
-	"""Transforms an HTML entity to a character.
-
-	This function receives a match object and is intended to be used with
-	the re.sub() function.
-	"""
-	entity = matchobj.group(1)
-
-	# Known non-numeric HTML entity
-	if entity in compat_html_entities.name2codepoint:
-		return compat_chr(compat_html_entities.name2codepoint[entity])
-
-	mobj = re.match(u'(?u)#(x?\\d+)', entity)
-	if mobj is not None:
-		numstr = mobj.group(1)
-		if numstr.startswith(u'x'):
-			base = 16
-			numstr = u'0%s' % numstr
-		else:
-			base = 10
-		return compat_chr(int(numstr, base))
-
-	# Unknown entity in name, return its literal representation
-	return (u'&%s;' % entity)
+    """Transforms an HTML entity to a character.
+
+    This function receives a match object and is intended to be used with
+    the re.sub() function.
+    """
+    entity = matchobj.group(1)
+
+    # Known non-numeric HTML entity
+    if entity in compat_html_entities.name2codepoint:
+        return compat_chr(compat_html_entities.name2codepoint[entity])
+
+    mobj = re.match(u'(?u)#(x?\\d+)', entity)
+    if mobj is not None:
+        numstr = mobj.group(1)
+        if numstr.startswith(u'x'):
+            base = 16
+            numstr = u'0%s' % numstr
+        else:
+            base = 10
+        return compat_chr(int(numstr, base))
+
+    # Unknown entity in name, return its literal representation
+    return (u'&%s;' % entity)
 
 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
 class IDParser(compat_html_parser.HTMLParser):
-	"""Modified HTMLParser that isolates a tag with the specified id"""
-	def __init__(self, id):
-		self.id = id
-		self.result = None
-		self.started = False
-		self.depth = {}
-		self.html = None
-		self.watch_startpos = False
-		self.error_count = 0
-		compat_html_parser.HTMLParser.__init__(self)
-
-	def error(self, message):
-		if self.error_count > 10 or self.started:
-			raise compat_html_parser.HTMLParseError(message, self.getpos())
-		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
-		self.error_count += 1
-		self.goahead(1)
-
-	def loads(self, html):
-		self.html = html
-		self.feed(html)
-		self.close()
-
-	def handle_starttag(self, tag, attrs):
-		attrs = dict(attrs)
-		if self.started:
-			self.find_startpos(None)
-		if 'id' in attrs and attrs['id'] == self.id:
-			self.result = [tag]
-			self.started = True
-			self.watch_startpos = True
-		if self.started:
-			if not tag in self.depth: self.depth[tag] = 0
-			self.depth[tag] += 1
-
-	def handle_endtag(self, tag):
-		if self.started:
-			if tag in self.depth: self.depth[tag] -= 1
-			if self.depth[self.result[0]] == 0:
-				self.started = False
-				self.result.append(self.getpos())
-
-	def find_startpos(self, x):
-		"""Needed to put the start position of the result (self.result[1])
-		after the opening tag with the requested id"""
-		if self.watch_startpos:
-			self.watch_startpos = False
-			self.result.append(self.getpos())
-	handle_entityref = handle_charref = handle_data = handle_comment = \
-	handle_decl = handle_pi = unknown_decl = find_startpos
-
-	def get_result(self):
-		if self.result is None:
-			return None
-		if len(self.result) != 3:
-			return None
-		lines = self.html.split('\n')
-		lines = lines[self.result[1][0]-1:self.result[2][0]]
-		lines[0] = lines[0][self.result[1][1]:]
-		if len(lines) == 1:
-			lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
-		lines[-1] = lines[-1][:self.result[2][1]]
-		return '\n'.join(lines).strip()
+    """Modified HTMLParser that isolates a tag with the specified id"""
+    def __init__(self, id):
+        self.id = id
+        self.result = None
+        self.started = False
+        self.depth = {}
+        self.html = None
+        self.watch_startpos = False
+        self.error_count = 0
+        compat_html_parser.HTMLParser.__init__(self)
+
+    def error(self, message):
+        if self.error_count > 10 or self.started:
+            raise compat_html_parser.HTMLParseError(message, self.getpos())
+        self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
+        self.error_count += 1
+        self.goahead(1)
+
+    def loads(self, html):
+        self.html = html
+        self.feed(html)
+        self.close()
+
+    def handle_starttag(self, tag, attrs):
+        attrs = dict(attrs)
+        if self.started:
+            self.find_startpos(None)
+        if 'id' in attrs and attrs['id'] == self.id:
+            self.result = [tag]
+            self.started = True
+            self.watch_startpos = True
+        if self.started:
+            if not tag in self.depth: self.depth[tag] = 0
+            self.depth[tag] += 1
+
+    def handle_endtag(self, tag):
+        if self.started:
+            if tag in self.depth: self.depth[tag] -= 1
+            if self.depth[self.result[0]] == 0:
+                self.started = False
+                self.result.append(self.getpos())
+
+    def find_startpos(self, x):
+        """Needed to put the start position of the result (self.result[1])
+        after the opening tag with the requested id"""
+        if self.watch_startpos:
+            self.watch_startpos = False
+            self.result.append(self.getpos())
+    handle_entityref = handle_charref = handle_data = handle_comment = \
+    handle_decl = handle_pi = unknown_decl = find_startpos
+
+    def get_result(self):
+        if self.result is None:
+            return None
+        if len(self.result) != 3:
+            return None
+        lines = self.html.split('\n')
+        lines = lines[self.result[1][0]-1:self.result[2][0]]
+        lines[0] = lines[0][self.result[1][1]:]
+        if len(lines) == 1:
+            lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
+        lines[-1] = lines[-1][:self.result[2][1]]
+        return '\n'.join(lines).strip()
 
 def get_element_by_id(id, html):
-	"""Return the content of the tag with the specified id in the passed HTML document"""
-	parser = IDParser(id)
-	try:
-		parser.loads(html)
-	except compat_html_parser.HTMLParseError:
-		pass
-	return parser.get_result()
+    """Return the content of the tag with the specified id in the passed HTML document"""
+    parser = IDParser(id)
+    try:
+        parser.loads(html)
+    except compat_html_parser.HTMLParseError:
+        pass
+    return parser.get_result()
 
 
 def clean_html(html):
-	"""Clean an HTML snippet into a readable string"""
-	# Newline vs <br />
-	html = html.replace('\n', ' ')
-	html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
-	# Strip html tags
-	html = re.sub('<.*?>', '', html)
-	# Replace html entities
-	html = unescapeHTML(html)
-	return html
+    """Clean an HTML snippet into a readable string"""
+    # Newline vs <br />
+    html = html.replace('\n', ' ')
+    html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    # Strip html tags
+    html = re.sub('<.*?>', '', html)
+    # Replace html entities
+    html = unescapeHTML(html)
+    return html
 
 
 def sanitize_open(filename, open_mode):
-	"""Try to open the given filename, and slightly tweak it if this fails.
-
-	Attempts to open the given filename. If this fails, it tries to change
-	the filename slightly, step by step, until it's either able to open it
-	or it fails and raises a final exception, like the standard open()
-	function.
-
-	It returns the tuple (stream, definitive_file_name).
-	"""
-	try:
-		if filename == u'-':
-			if sys.platform == 'win32':
-				import msvcrt
-				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
-			return (sys.stdout, filename)
-		stream = open(encodeFilename(filename), open_mode)
-		return (stream, filename)
-	except (IOError, OSError) as err:
-		# In case of error, try to remove win32 forbidden chars
-		filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
-
-		# An exception here should be caught in the caller
-		stream = open(encodeFilename(filename), open_mode)
-		return (stream, filename)
+    """Try to open the given filename, and slightly tweak it if this fails.
+
+    Attempts to open the given filename. If this fails, it tries to change
+    the filename slightly, step by step, until it's either able to open it
+    or it fails and raises a final exception, like the standard open()
+    function.
+
+    It returns the tuple (stream, definitive_file_name).
+    """
+    try:
+        if filename == u'-':
+            if sys.platform == 'win32':
+                import msvcrt
+                msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+            return (sys.stdout, filename)
+        stream = open(encodeFilename(filename), open_mode)
+        return (stream, filename)
+    except (IOError, OSError) as err:
+        # In case of error, try to remove win32 forbidden chars
+        filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)
+
+        # An exception here should be caught in the caller
+        stream = open(encodeFilename(filename), open_mode)
+        return (stream, filename)
 
 
 def timeconvert(timestr):
-	"""Convert RFC 2822 defined time string into system timestamp"""
-	timestamp = None
-	timetuple = email.utils.parsedate_tz(timestr)
-	if timetuple is not None:
-		timestamp = email.utils.mktime_tz(timetuple)
-	return timestamp
+    """Convert RFC 2822 defined time string into system timestamp"""
+    timestamp = None
+    timetuple = email.utils.parsedate_tz(timestr)
+    if timetuple is not None:
+        timestamp = email.utils.mktime_tz(timetuple)
+    return timestamp
 
 def sanitize_filename(s, restricted=False):
-	"""Sanitizes a string so it could be used as part of a filename.
-	If restricted is set, use a stricter subset of allowed characters.
-	"""
-	def replace_insane(char):
-		if char == '?' or ord(char) < 32 or ord(char) == 127:
-			return ''
-		elif char == '"':
-			return '' if restricted else '\''
-		elif char == ':':
-			return '_-' if restricted else ' -'
-		elif char in '\\/|*<>':
-			return '_'
-		if restricted and (char in '!&\'' or char.isspace()):
-			return '_'
-		if restricted and ord(char) > 127:
-			return '_'
-		return char
-
-	result = u''.join(map(replace_insane, s))
-	while '__' in result:
-		result = result.replace('__', '_')
-	result = result.strip('_')
-	# Common case of "Foreign band name - English song title"
-	if restricted and result.startswith('-_'):
-		result = result[2:]
-	if not result:
-		result = '_'
-	return result
+    """Sanitizes a string so it could be used as part of a filename.
+    If restricted is set, use a stricter subset of allowed characters.
+    """
+    def replace_insane(char):
+        if char == '?' or ord(char) < 32 or ord(char) == 127:
+            return ''
+        elif char == '"':
+            return '' if restricted else '\''
+        elif char == ':':
+            return '_-' if restricted else ' -'
+        elif char in '\\/|*<>':
+            return '_'
+        if restricted and (char in '!&\'' or char.isspace()):
+            return '_'
+        if restricted and ord(char) > 127:
+            return '_'
+        return char
+
+    result = u''.join(map(replace_insane, s))
+    while '__' in result:
+        result = result.replace('__', '_')
+    result = result.strip('_')
+    # Common case of "Foreign band name - English song title"
+    if restricted and result.startswith('-_'):
+        result = result[2:]
+    if not result:
+        result = '_'
+    return result
 
 def orderedSet(iterable):
-	""" Remove all duplicates from the input iterable """
-	res = []
-	for el in iterable:
-		if el not in res:
-			res.append(el)
-	return res
+    """ Remove all duplicates from the input iterable """
+    res = []
+    for el in iterable:
+        if el not in res:
+            res.append(el)
+    return res
 
 def unescapeHTML(s):
-	"""
-	@param s a string
-	"""
-	assert type(s) == type(u'')
+    """
+    @param s a string
+    """
+    assert type(s) == type(u'')
 
-	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
-	return result
+    result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
+    return result
 
 def encodeFilename(s):
-	"""
-	@param s The name of the file
-	"""
+    """
+    @param s The name of the file
+    """
 
-	assert type(s) == type(u'')
+    assert type(s) == type(u'')
 
-	# Python 3 has a Unicode API
-	if sys.version_info >= (3, 0):
-		return s
+    # Python 3 has a Unicode API
+    if sys.version_info >= (3, 0):
+        return s
 
-	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
-		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
-		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
-		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
-		return s
-	else:
-		return s.encode(sys.getfilesystemencoding(), 'ignore')
+    if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+        # Pass u'' directly to use Unicode APIs on Windows 2000 and up
+        # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+        # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+        return s
+    else:
+        return s.encode(sys.getfilesystemencoding(), 'ignore')
 
 class DownloadError(Exception):
-	"""Download Error exception.
+    """Download Error exception.
 
-	This exception may be thrown by FileDownloader objects if they are not
-	configured to continue on errors. They will contain the appropriate
-	error message.
-	"""
-	pass
+    This exception may be thrown by FileDownloader objects if they are not
+    configured to continue on errors. They will contain the appropriate
+    error message.
+    """
+    pass
 
 
 class SameFileError(Exception):
-	"""Same File exception.
+    """Same File exception.
 
-	This exception will be thrown by FileDownloader objects if they detect
-	multiple files would have to be downloaded to the same file on disk.
-	"""
-	pass
+    This exception will be thrown by FileDownloader objects if they detect
+    multiple files would have to be downloaded to the same file on disk.
+    """
+    pass
 
 
 class PostProcessingError(Exception):
-	"""Post Processing exception.
+    """Post Processing exception.
 
-	This exception may be raised by PostProcessor's .run() method to
-	indicate an error in the postprocessing task.
-	"""
-	pass
+    This exception may be raised by PostProcessor's .run() method to
+    indicate an error in the postprocessing task.
+    """
+    pass
 
 class MaxDownloadsReached(Exception):
-	""" --max-downloads limit has been reached. """
-	pass
+    """ --max-downloads limit has been reached. """
+    pass
 
 
 class UnavailableVideoError(Exception):
-	"""Unavailable Format exception.
+    """Unavailable Format exception.
 
-	This exception will be thrown when a video is requested
-	in a format that is not available for that video.
-	"""
-	pass
+    This exception will be thrown when a video is requested
+    in a format that is not available for that video.
+    """
+    pass
 
 
 class ContentTooShortError(Exception):
-	"""Content Too Short exception.
+    """Content Too Short exception.
 
-	This exception may be raised by FileDownloader objects when a file they
-	download is too small for what the server announced first, indicating
-	the connection was probably interrupted.
-	"""
-	# Both in bytes
-	downloaded = None
-	expected = None
+    This exception may be raised by FileDownloader objects when a file they
+    download is too small for what the server announced first, indicating
+    the connection was probably interrupted.
+    """
+    # Both in bytes
+    downloaded = None
+    expected = None
 
-	def __init__(self, downloaded, expected):
-		self.downloaded = downloaded
-		self.expected = expected
+    def __init__(self, downloaded, expected):
+        self.downloaded = downloaded
+        self.expected = expected
 
 
 class Trouble(Exception):
-	"""Trouble helper exception
+    """Trouble helper exception
 
-	This is an exception to be handled with
-	FileDownloader.trouble
-	"""
+    This is an exception to be handled with
+    FileDownloader.trouble
+    """
 
 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
-	"""Handler for HTTP requests and responses.
-
-	This class, when installed with an OpenerDirector, automatically adds
-	the standard headers to every HTTP request and handles gzipped and
-	deflated responses from web servers. If compression is to be avoided in
-	a particular request, the original request in the program code only has
-	to include the HTTP header "Youtubedl-No-Compression", which will be
-	removed before making the real request.
-
-	Part of this code was copied from:
-
-	http://techknack.net/python-urllib2-handlers/
-
-	Andrew Rowls, the author of that code, agreed to release it to the
-	public domain.
-	"""
-
-	@staticmethod
-	def deflate(data):
-		try:
-			return zlib.decompress(data, -zlib.MAX_WBITS)
-		except zlib.error:
-			return zlib.decompress(data)
-
-	@staticmethod
-	def addinfourl_wrapper(stream, headers, url, code):
-		if hasattr(compat_urllib_request.addinfourl, 'getcode'):
-			return compat_urllib_request.addinfourl(stream, headers, url, code)
-		ret = compat_urllib_request.addinfourl(stream, headers, url)
-		ret.code = code
-		return ret
-
-	def http_request(self, req):
-		for h in std_headers:
-			if h in req.headers:
-				del req.headers[h]
-			req.add_header(h, std_headers[h])
-		if 'Youtubedl-no-compression' in req.headers:
-			if 'Accept-encoding' in req.headers:
-				del req.headers['Accept-encoding']
-			del req.headers['Youtubedl-no-compression']
-		return req
-
-	def http_response(self, req, resp):
-		old_resp = resp
-		# gzip
-		if resp.headers.get('Content-encoding', '') == 'gzip':
-			gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
-			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
-			resp.msg = old_resp.msg
-		# deflate
-		if resp.headers.get('Content-encoding', '') == 'deflate':
-			gz = io.BytesIO(self.deflate(resp.read()))
-			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
-			resp.msg = old_resp.msg
-		return resp
+    """Handler for HTTP requests and responses.
+
+    This class, when installed with an OpenerDirector, automatically adds
+    the standard headers to every HTTP request and handles gzipped and
+    deflated responses from web servers. If compression is to be avoided in
+    a particular request, the original request in the program code only has
+    to include the HTTP header "Youtubedl-No-Compression", which will be
+    removed before making the real request.
+
+    Part of this code was copied from:
+
+    http://techknack.net/python-urllib2-handlers/
+
+    Andrew Rowls, the author of that code, agreed to release it to the
+    public domain.
+    """
+
+    @staticmethod
+    def deflate(data):
+        try:
+            return zlib.decompress(data, -zlib.MAX_WBITS)
+        except zlib.error:
+            return zlib.decompress(data)
+
+    @staticmethod
+    def addinfourl_wrapper(stream, headers, url, code):
+        if hasattr(compat_urllib_request.addinfourl, 'getcode'):
+            return compat_urllib_request.addinfourl(stream, headers, url, code)
+        ret = compat_urllib_request.addinfourl(stream, headers, url)
+        ret.code = code
+        return ret
+
+    def http_request(self, req):
+        for h in std_headers:
+            if h in req.headers:
+                del req.headers[h]
+            req.add_header(h, std_headers[h])
+        if 'Youtubedl-no-compression' in req.headers:
+            if 'Accept-encoding' in req.headers:
+                del req.headers['Accept-encoding']
+            del req.headers['Youtubedl-no-compression']
+        return req
+
+    def http_response(self, req, resp):
+        old_resp = resp
+        # gzip
+        if resp.headers.get('Content-encoding', '') == 'gzip':
+            gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
+            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            resp.msg = old_resp.msg
+        # deflate
+        if resp.headers.get('Content-encoding', '') == 'deflate':
+            gz = io.BytesIO(self.deflate(resp.read()))
+            resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
+            resp.msg = old_resp.msg
+        return resp
-- 
cgit v1.2.3


From 799c0763845dfb82d53ecae0080b276f447144de Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 04:51:27 +0100
Subject: collegehumor: able to download a single f4f file (not yet playable)

---
 youtube_dl/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 836138277..4600dc967 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -26,6 +26,11 @@ try:
 except ImportError: # Python 2
     import urllib as compat_urllib_parse
 
+try:
+    from urllib.parse import urlparse as compat_urllib_parse_urlparse
+except ImportError: # Python 2
+    from urlparse import urlparse as compat_urllib_parse_urlparse
+
 try:
     import http.cookiejar as compat_cookiejar
 except ImportError: # Python 2
-- 
cgit v1.2.3


From 627dcfff398b00429c8b310f1c1775f9be6f0268 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Wed, 28 Nov 2012 12:59:27 +0100
Subject: Restrict more characters (Closes #566)

---
 youtube_dl/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4600dc967..7f73b8476 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -330,7 +330,7 @@ def sanitize_filename(s, restricted=False):
             return '_-' if restricted else ' -'
         elif char in '\\/|*<>':
             return '_'
-        if restricted and (char in '!&\'' or char.isspace()):
+        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
             return '_'
         if restricted and ord(char) > 127:
             return '_'
-- 
cgit v1.2.3


From 796173d08b514182eedc704541eb55d5c9e1dc0d Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Mon, 3 Dec 2012 15:36:24 +0100
Subject: Keep video IDs verbatim if possible (Closes #571)

---
 youtube_dl/utils.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7f73b8476..4dcf18991 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -317,9 +317,10 @@ def timeconvert(timestr):
         timestamp = email.utils.mktime_tz(timetuple)
     return timestamp
 
-def sanitize_filename(s, restricted=False):
+def sanitize_filename(s, restricted=False, is_id=False):
     """Sanitizes a string so it could be used as part of a filename.
     If restricted is set, use a stricter subset of allowed characters.
+    Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
     """
     def replace_insane(char):
         if char == '?' or ord(char) < 32 or ord(char) == 127:
@@ -337,14 +338,15 @@ def sanitize_filename(s, restricted=False):
         return char
 
     result = u''.join(map(replace_insane, s))
-    while '__' in result:
-        result = result.replace('__', '_')
-    result = result.strip('_')
-    # Common case of "Foreign band name - English song title"
-    if restricted and result.startswith('-_'):
-        result = result[2:]
-    if not result:
-        result = '_'
+    if not is_id:
+        while '__' in result:
+            result = result.replace('__', '_')
+        result = result.strip('_')
+        # Common case of "Foreign band name - English song title"
+        if restricted and result.startswith('-_'):
+            result = result[2:]
+        if not result:
+            result = '_'
     return result
 
 def orderedSet(iterable):
-- 
cgit v1.2.3


From 0f8d03f81cb20ba0f2a4358b8111146b589d1c5d Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Fri, 7 Dec 2012 00:39:44 +0100
Subject: Let YoutubeDLHandler (transparent gzip) handle HTTPS URLs as well
 (Needed for #579)

---
 youtube_dl/utils.py | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4dcf18991..44f939053 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -506,3 +506,6 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
             resp.msg = old_resp.msg
         return resp
+
+    https_request = http_request
+    https_response = http_response
-- 
cgit v1.2.3


From 5910e210f48826c6d078ef3744b25d209535c3ae Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Sun, 16 Dec 2012 12:29:03 +0100
Subject: Fix --extract-audio on Python 3

---
 youtube_dl/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 44f939053..25b67db06 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -51,6 +51,12 @@ try:
 except ImportError: # Python 2
     import httplib as compat_http_client
 
+try:
+    from subprocess import DEVNULL
+    compat_subprocess_get_DEVNULL = lambda: DEVNULL
+except ImportError:
+    compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
 try:
     from urllib.parse import parse_qs as compat_parse_qs
 except ImportError: # Python 2
-- 
cgit v1.2.3


From f4bfd65ff2bfce77a6953281c037ca8e516b7648 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Thu, 20 Dec 2012 13:13:24 +0100
Subject: Correct JSON writing (Closes #596)

---
 youtube_dl/utils.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 25b67db06..4e64f327a 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -3,6 +3,7 @@
 
 import gzip
 import io
+import json
 import locale
 import os
 import re
@@ -175,6 +176,18 @@ else:
         assert type(s) == type(u'')
         print(s)
 
+# In Python 2.x, json.dump expects a bytestream.
+# In Python 3.x, it writes to a character stream
+if sys.version_info < (3,0):
+    def write_json_file(obj, fn):
+        with open(fn, 'wb') as f:
+            json.dump(obj, f)
+else:
+    def write_json_file(obj, fn):
+        with open(fn, 'w', encoding='utf-8') as f:
+            json.dump(obj, f)
+
+
 def htmlentity_transform(matchobj):
     """Transforms an HTML entity to a character.
 
-- 
cgit v1.2.3


From 43e8fafd49f94ebf4776c84697e4b815750ec701 Mon Sep 17 00:00:00 2001
From: Nick Daniels <nick.daniels@forward.co.uk>
Date: Wed, 19 Dec 2012 14:21:14 +0000
Subject: Refactor IDParser to search for elements by any attribute not just ID

---
 youtube_dl/utils.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 4e64f327a..a5196b0ae 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -214,10 +214,11 @@ def htmlentity_transform(matchobj):
     return (u'&%s;' % entity)
 
 compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
-class IDParser(compat_html_parser.HTMLParser):
-    """Modified HTMLParser that isolates a tag with the specified id"""
-    def __init__(self, id):
-        self.id = id
+class AttrParser(compat_html_parser.HTMLParser):
+    """Modified HTMLParser that isolates a tag with the specified attribute"""
+    def __init__(self, attribute, value):
+        self.attribute = attribute
+        self.value = value
         self.result = None
         self.started = False
         self.depth = {}
@@ -242,7 +243,7 @@ class IDParser(compat_html_parser.HTMLParser):
         attrs = dict(attrs)
         if self.started:
             self.find_startpos(None)
-        if 'id' in attrs and attrs['id'] == self.id:
+        if self.attribute in attrs and attrs[self.attribute] == self.value:
             self.result = [tag]
             self.started = True
             self.watch_startpos = True
@@ -280,8 +281,12 @@ class IDParser(compat_html_parser.HTMLParser):
         return '\n'.join(lines).strip()
 
 def get_element_by_id(id, html):
-    """Return the content of the tag with the specified id in the passed HTML document"""
-    parser = IDParser(id)
+    """Return the content of the tag with the specified ID in the passed HTML document"""
+    return get_element_by_attribute("id", id, html)
+
+def get_element_by_attribute(attribute, value, html):
+    """Return the content of the tag with the specified attribute in the passed HTML document"""
+    parser = AttrParser(attribute, value)
     try:
         parser.loads(html)
     except compat_html_parser.HTMLParseError:
-- 
cgit v1.2.3


From 056d857571158264aefb8d9f7d47c0dad768be63 Mon Sep 17 00:00:00 2001
From: Filippo Valsorda <filippo.valsorda@gmail.com>
Date: Thu, 20 Dec 2012 11:26:38 +0100
Subject: refactor YouTube subtitles code, it was ugly (my bad)

---
 youtube_dl/utils.py | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a5196b0ae..c18c9beed 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -465,14 +465,6 @@ class ContentTooShortError(Exception):
         self.downloaded = downloaded
         self.expected = expected
 
-
-class Trouble(Exception):
-    """Trouble helper exception
-
-    This is an exception to be handled with
-    FileDownloader.trouble
-    """
-
 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
     """Handler for HTTP requests and responses.
 
-- 
cgit v1.2.3


From 6b3aef80ceba9b4715065be924dcb1f83ec36655 Mon Sep 17 00:00:00 2001
From: Filippo Valsorda <filippo.valsorda@gmail.com>
Date: Thu, 20 Dec 2012 16:30:55 +0100
Subject: better Vimeo tests; fixed a couple of VimeoIE fields

---
 youtube_dl/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index c18c9beed..463804e18 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -298,7 +298,8 @@ def clean_html(html):
     """Clean an HTML snippet into a readable string"""
     # Newline vs <br />
     html = html.replace('\n', ' ')
-    html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
+    html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
     # Strip html tags
     html = re.sub('<.*?>', '', html)
     # Replace html entities
-- 
cgit v1.2.3


From cb6ff87fbb05e421f77b57a79699c647866ceb09 Mon Sep 17 00:00:00 2001
From: Filippo Valsorda <filippo.valsorda@gmail.com>
Date: Wed, 26 Dec 2012 23:22:49 +0100
Subject: The new updates system, relies on gh-pages, secured by RSA, uses
 external web servers

---
 youtube_dl/utils.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 463804e18..7d6041929 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -410,6 +410,34 @@ def encodeFilename(s):
     else:
         return s.encode(sys.getfilesystemencoding(), 'ignore')
 
+def rsa_verify(message, signature, key):
+    from struct import pack
+    from hashlib import sha256
+    from sys import version_info
+    def b(x):
+        if version_info[0] == 2: return x
+        else: return x.encode('latin1')
+    assert(type(message) == type(b('')))
+    block_size = 0
+    n = key[0]
+    while n:
+        block_size += 1
+        n >>= 8
+    signature = pow(int(signature, 16), key[1], key[0])
+    raw_bytes = []
+    while signature:
+        raw_bytes.insert(0, pack("B", signature & 0xFF))
+        signature >>= 8
+    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
+    if signature[0:2] != b('\x00\x01'): return False
+    signature = signature[2:]
+    if not b('\x00') in signature: return False
+    signature = signature[signature.index(b('\x00'))+1:]
+    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
+    signature = signature[19:]
+    if signature != sha256(message).digest(): return False
+    return True
+
 class DownloadError(Exception):
     """Download Error exception.
 
-- 
cgit v1.2.3


From f427df17abc9508f88af9d904ac0520d610c0e9c Mon Sep 17 00:00:00 2001
From: Filippo Valsorda <filippo.valsorda@gmail.com>
Date: Sun, 30 Dec 2012 18:22:36 +0100
Subject: some fixes, pulled the codename from the code

---
 youtube_dl/utils.py | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7d6041929..9784abb24 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -154,6 +154,7 @@ std_headers = {
     'Accept-Encoding': 'gzip, deflate',
     'Accept-Language': 'en-us,en;q=0.5',
 }
+
 def preferredencoding():
     """Get preferred encoding.
 
@@ -187,6 +188,11 @@ else:
         with open(fn, 'w', encoding='utf-8') as f:
             json.dump(obj, f)
 
+# Some library functions return bytestring on 2.X and unicode on 3.X
+def enforce_unicode(s, encoding='utf-8'):
+    if type(s) != type(u''):
+        return s.decode(encoding)
+    return s
 
 def htmlentity_transform(matchobj):
     """Transforms an HTML entity to a character.
-- 
cgit v1.2.3


From d5ed35b664628f40945959e5c07bfbf4c0bd134a Mon Sep 17 00:00:00 2001
From: Filippo Valsorda <filippo.valsorda@gmail.com>
Date: Sun, 30 Dec 2012 19:49:14 +0100
Subject: moved updating code to update.py

---
 youtube_dl/utils.py | 34 ----------------------------------
 1 file changed, 34 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 9784abb24..e9336bcdd 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -188,12 +188,6 @@ else:
         with open(fn, 'w', encoding='utf-8') as f:
             json.dump(obj, f)
 
-# Some library functions return bytestring on 2.X and unicode on 3.X
-def enforce_unicode(s, encoding='utf-8'):
-    if type(s) != type(u''):
-        return s.decode(encoding)
-    return s
-
 def htmlentity_transform(matchobj):
     """Transforms an HTML entity to a character.
 
@@ -416,34 +410,6 @@ def encodeFilename(s):
     else:
         return s.encode(sys.getfilesystemencoding(), 'ignore')
 
-def rsa_verify(message, signature, key):
-    from struct import pack
-    from hashlib import sha256
-    from sys import version_info
-    def b(x):
-        if version_info[0] == 2: return x
-        else: return x.encode('latin1')
-    assert(type(message) == type(b('')))
-    block_size = 0
-    n = key[0]
-    while n:
-        block_size += 1
-        n >>= 8
-    signature = pow(int(signature, 16), key[1], key[0])
-    raw_bytes = []
-    while signature:
-        raw_bytes.insert(0, pack("B", signature & 0xFF))
-        signature >>= 8
-    signature = (block_size - len(raw_bytes)) * b('\x00') + b('').join(raw_bytes)
-    if signature[0:2] != b('\x00\x01'): return False
-    signature = signature[2:]
-    if not b('\x00') in signature: return False
-    signature = signature[signature.index(b('\x00'))+1:]
-    if not signature.startswith(b('\x30\x31\x30\x0D\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00\x04\x20')): return False
-    signature = signature[19:]
-    if signature != sha256(message).digest(): return False
-    return True
-
 class DownloadError(Exception):
     """Download Error exception.
 
-- 
cgit v1.2.3


From 1c256f7047051bf351ed5aedb95d8e705685a06b Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Tue, 1 Jan 2013 20:27:53 +0100
Subject: ExtractorError for errors during extraction

---
 youtube_dl/utils.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index e9336bcdd..91e180326 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -410,6 +410,17 @@ def encodeFilename(s):
     else:
         return s.encode(sys.getfilesystemencoding(), 'ignore')
 
+
+class ExtractorError(Exception):
+    """Error during info extraction."""
+    def __init__(self, msg, tb=None):
+        """ tb is the original traceback (so that it can be printed out) """
+        super(ExtractorError, self).__init__(msg)
+        if tb is None:
+            tb = sys.exc_info()[2]
+        self.traceback = tb
+
+
 class DownloadError(Exception):
     """Download Error exception.
 
-- 
cgit v1.2.3


From 01951dda7a27d3bd2331f22ded5d33876cf1dad9 Mon Sep 17 00:00:00 2001
From: Philipp Hagemeister <phihag@phihag.de>
Date: Thu, 3 Jan 2013 15:39:55 +0100
Subject: Make ExtractorError usable for other causes

---
 youtube_dl/utils.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'youtube_dl/utils.py')

diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 91e180326..8f856ee8c 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -8,6 +8,7 @@ import locale
 import os
 import re
 import sys
+import traceback
 import zlib
 import email.utils
 import json
@@ -414,12 +415,15 @@ def encodeFilename(s):
 class ExtractorError(Exception):
     """Error during info extraction."""
     def __init__(self, msg, tb=None):
-        """ tb is the original traceback (so that it can be printed out) """
+        """ tb, if given, is the original traceback (so that it can be printed out). """
         super(ExtractorError, self).__init__(msg)
-        if tb is None:
-            tb = sys.exc_info()[2]
         self.traceback = tb
 
+    def format_traceback(self):
+        if self.traceback is None:
+            return None
+        return u''.join(traceback.format_tb(self.traceback))
+
 
 class DownloadError(Exception):
     """Download Error exception.
-- 
cgit v1.2.3