aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2015-05-31 00:44:54 +0600
committerSergey M․ <dstftw@gmail.com>2015-05-31 00:44:54 +0600
commit58bde34a236ff98f25fc109a94b3d393f0bbc9ee (patch)
treee48ff8c7eb1fcfe54eafe8f85f114245c8902562
parent339516072be6865bf7e9316be81704ae69296c6b (diff)
downloadyoutube-dl-58bde34a236ff98f25fc109a94b3d393f0bbc9ee.tar.xz
[extractor/generic] Force Accept-Encoding to any for extraction pass
-rw-r--r--youtube_dl/extractor/generic.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c9c92d686..ec1d9abbe 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -11,6 +11,7 @@ from ..compat import (
compat_urllib_parse,
compat_urlparse,
compat_xml_parse_error,
+ compat_urllib_request,
)
from ..utils import (
determine_ext,
@@ -916,7 +917,9 @@ class GenericIE(InfoExtractor):
full_response = None
if head_response is False:
- full_response = self._request_webpage(url, video_id)
+ request = compat_urllib_request.Request(url)
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
head_response = full_response
# Check for direct link to a video
@@ -941,7 +944,17 @@ class GenericIE(InfoExtractor):
self._downloader.report_warning('Falling back on generic information extractor.')
if not full_response:
- full_response = self._request_webpage(url, video_id)
+ request = compat_urllib_request.Request(url)
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after HEAD request finishes, but not sure if we can rely on this.
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
# Maybe it's a direct link to a video?
# Be careful not to download the whole thing!