aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFilippo Valsorda <filippo.valsorda@gmail.com>2012-03-18 22:15:58 +0100
committerFilippo Valsorda <filippo.valsorda@gmail.com>2012-03-18 22:15:58 +0100
commit6af22cf0efe393825534e19f82e3282a53625d19 (patch)
treed907c8bf0644dc727a1d850f118eb5f435184dcc
parentceba827e9aab563ae7c7190fc236ec1aa358ee59 (diff)
downloadyoutube-dl-6af22cf0efe393825534e19f82e3282a53625d19.tar.xz
added support for HTTP redirects. Closes #315
-rwxr-xr-xyoutube-dl61
-rwxr-xr-xyoutube_dl/__init__.py60
2 files changed, 121 insertions, 0 deletions
diff --git a/youtube-dl b/youtube-dl
index 5224611d2..5a595901c 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -15,6 +15,7 @@ __authors__ = (
'Kevin Ngo',
'Ori Avtalion',
'shizeeg',
+ 'Filippo Valsorda',
)
__license__ = 'Public Domain'
@@ -2240,7 +2241,67 @@ class GenericIE(InfoExtractor):
"""Report information extraction."""
self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
+
+ def _test_redirect(self, url):
+ """Check if it is a redirect, like url shorteners, in case restart chain."""
+ class HeadRequest(urllib2.Request):
+ def get_method(self):
+ return "HEAD"
+
+ class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+ """
+ Subclass the HTTPRedirectHandler to make it use our
+ HeadRequest also on the redirected URL
+ """
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ if code in (301, 302, 303, 307):
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return HeadRequest(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ else:
+ raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ class HTTPMethodFallback(urllib2.BaseHandler):
+ """
+ Fallback to GET if HEAD is not allowed (405 HTTP error)
+ """
+ def http_error_405(self, req, fp, code, msg, headers):
+ fp.read()
+ fp.close()
+
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return self.parent.open(urllib2.Request(req.get_full_url(),
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True))
+
+ # Build our opener
+ opener = urllib2.OpenerDirector()
+ for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+ HTTPMethodFallback, HEADRedirectHandler,
+ urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+ opener.add_handler(handler())
+
+ response = opener.open(HeadRequest(url))
+ new_url = response.geturl()
+
+ if url == new_url: return False
+
+ self.report_following_redirect(new_url)
+ self._downloader.download([new_url])
+ return True
+
def _real_extract(self, url):
+ if self._test_redirect(url): return
+
# At this point we have a new video
self._downloader.increment_downloads()
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
index 5f874b72f..5a595901c 100755
--- a/youtube_dl/__init__.py
+++ b/youtube_dl/__init__.py
@@ -2241,7 +2241,67 @@ class GenericIE(InfoExtractor):
"""Report information extraction."""
self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
+
+ def _test_redirect(self, url):
+ """Check if it is a redirect, like url shorteners, in case restart chain."""
+ class HeadRequest(urllib2.Request):
+ def get_method(self):
+ return "HEAD"
+
+ class HEADRedirectHandler(urllib2.HTTPRedirectHandler):
+ """
+ Subclass the HTTPRedirectHandler to make it use our
+ HeadRequest also on the redirected URL
+ """
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ if code in (301, 302, 303, 307):
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return HeadRequest(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ else:
+ raise urllib2.HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ class HTTPMethodFallback(urllib2.BaseHandler):
+ """
+ Fallback to GET if HEAD is not allowed (405 HTTP error)
+ """
+ def http_error_405(self, req, fp, code, msg, headers):
+ fp.read()
+ fp.close()
+
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type"))
+ return self.parent.open(urllib2.Request(req.get_full_url(),
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True))
+
+ # Build our opener
+ opener = urllib2.OpenerDirector()
+ for handler in [urllib2.HTTPHandler, urllib2.HTTPDefaultErrorHandler,
+ HTTPMethodFallback, HEADRedirectHandler,
+ urllib2.HTTPErrorProcessor, urllib2.HTTPSHandler]:
+ opener.add_handler(handler())
+
+ response = opener.open(HeadRequest(url))
+ new_url = response.geturl()
+
+ if url == new_url: return False
+
+ self.report_following_redirect(new_url)
+ self._downloader.download([new_url])
+ return True
+
def _real_extract(self, url):
+ if self._test_redirect(url): return
+
# At this point we have a new video
self._downloader.increment_downloads()