aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-03-31 22:58:38 +0600
committerSergey M․ <dstftw@gmail.com>2016-03-31 22:58:38 +0600
commit41d06b042427aa019200bb80a1935d12110f50b1 (patch)
tree87fbc423aec0f4ec30c6491d052acdd2a0c8a767 /youtube_dl/extractor
parent15d260ebaa48409112270685d306a5d9152260c4 (diff)
[extractor/common] Improve _request_webpage
* Do not ignore data, headers and query for Requests * Default values for headers and query switched to dicts since these are used by urllib itself
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py19
1 files changed, 12 insertions, 7 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9b7ab8924..85ac0400c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -22,6 +22,7 @@ from ..compat import (
compat_str,
compat_urllib_error,
compat_urllib_parse_urlencode,
+ compat_urllib_request,
compat_urlparse,
)
from ..downloader.f4m import remove_encrypted_media
@@ -49,6 +50,7 @@ from ..utils import (
determine_protocol,
parse_duration,
mimetype2ext,
+ update_Request,
update_url_query,
)
@@ -347,7 +349,7 @@ class InfoExtractor(object):
def IE_NAME(self):
return compat_str(type(self).__name__[:-2])
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers=None, query=None):
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
""" Returns the response handle """
if note is None:
self.report_download_webpage(video_id)
@@ -357,11 +359,14 @@ class InfoExtractor(object):
else:
self.to_screen('%s: %s' % (video_id, note))
# data, headers and query params will be ignored for `Request` objects
- if isinstance(url_or_request, compat_str):
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ url_or_request = update_Request(
+ url_or_request, data=data, headers=headers, query=query)
+ else:
if query:
url_or_request = update_url_query(url_or_request, query)
if data or headers:
- url_or_request = sanitized_Request(url_or_request, data, headers or {})
+ url_or_request = sanitized_Request(url_or_request, data, headers)
try:
return self._downloader.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -377,7 +382,7 @@ class InfoExtractor(object):
self._downloader.report_warning(errmsg)
return False
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers=None, query=None):
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}):
""" Returns a tuple (page content as string, URL handle) """
# Strip hashes from the URL (#1038)
if isinstance(url_or_request, (compat_str, str)):
@@ -470,7 +475,7 @@ class InfoExtractor(object):
return content
- def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers=None, query=None):
+ def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}):
""" Returns the data of the page as a string """
success = False
try_count = 0
@@ -491,7 +496,7 @@ class InfoExtractor(object):
def _download_xml(self, url_or_request, video_id,
note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None, data=None, headers=None, query=None):
+ transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}):
"""Return the xml as an xml.etree.ElementTree.Element"""
xml_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal, encoding=encoding, data=data, headers=headers, query=query)
@@ -505,7 +510,7 @@ class InfoExtractor(object):
note='Downloading JSON metadata',
errnote='Unable to download JSON metadata',
transform_source=None,
- fatal=True, encoding=None, data=None, headers=None, query=None):
+ fatal=True, encoding=None, data=None, headers={}, query={}):
json_string = self._download_webpage(
url_or_request, video_id, note, errnote, fatal=fatal,
encoding=encoding, data=data, headers=headers, query=query)