diff options
author | Sergey M․ <dstftw@gmail.com> | 2015-05-31 00:44:54 +0600 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2015-05-31 00:44:54 +0600 |
commit | 58bde34a236ff98f25fc109a94b3d393f0bbc9ee (patch) | |
tree | e48ff8c7eb1fcfe54eafe8f85f114245c8902562 /youtube_dl/extractor/generic.py | |
parent | 339516072be6865bf7e9316be81704ae69296c6b (diff) |
[extractor/generic] Force Accept-Encoding to any for extraction pass
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r-- | youtube_dl/extractor/generic.py | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c9c92d686..ec1d9abbe 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,6 +11,7 @@ from ..compat import ( compat_urllib_parse, compat_urlparse, compat_xml_parse_error, + compat_urllib_request, ) from ..utils import ( determine_ext, @@ -916,7 +917,9 @@ class GenericIE(InfoExtractor): full_response = None if head_response is False: - full_response = self._request_webpage(url, video_id) + request = compat_urllib_request.Request(url) + request.add_header('Accept-Encoding', '*') + full_response = self._request_webpage(request, video_id) head_response = full_response # Check for direct link to a video @@ -941,7 +944,17 @@ class GenericIE(InfoExtractor): self._downloader.report_warning('Falling back on generic information extractor.') if not full_response: - full_response = self._request_webpage(url, video_id) + request = compat_urllib_request.Request(url) + # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) + # making it impossible to download only chunk of the file (yet we need only 512kB to + # test whether it's HTML or not). According to youtube-dl default Accept-Encoding + # that will always result in downloading the whole file that is not desirable. + # Therefore for extraction pass we have to override Accept-Encoding to any in order + # to accept raw bytes and being able to download only a chunk. + # It may probably better to solve this by checking Content-Type for application/octet-stream + # after HEAD request finishes, but not sure if we can rely on this. + request.add_header('Accept-Encoding', '*') + full_response = self._request_webpage(request, video_id) # Maybe it's a direct link to a video? # Be careful not to download the whole thing! |