aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXiao Di Guan <xdg@puxlit.net>2018-11-03 05:18:20 +1100
committerSergey M <dstftw@gmail.com>2018-11-03 01:18:20 +0700
commit95e42d7336d01f505d6551a21df52f3ae234e96b (patch)
tree795dc08fedf4db29ac64a938197bebf7f5f0ebfe
parentcf0db4d99785532d767d0ca1cc029c73d16bb045 (diff)
[extractor/common] Ensure response handle is not prematurely closed before it can be read if it matches expected_status (resolves #17195, closes #17846, resolves #17447)
-rw-r--r--test/helper.py10
-rw-r--r--test/test_InfoExtractor.py42
-rw-r--r--test/test_downloader_http.py12
-rw-r--r--test/test_http.py10
-rw-r--r--youtube_dl/extractor/common.py5
5 files changed, 57 insertions, 22 deletions
diff --git a/test/helper.py b/test/helper.py
index dfee217a9..aa9a1c9b2 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -7,6 +7,7 @@ import json
import os.path
import re
import types
+import ssl
import sys
import youtube_dl.extractor
@@ -244,3 +245,12 @@ def expect_warnings(ydl, warnings_re):
real_warning(w)
ydl.report_warning = _report_warning
+
+
+def http_server_port(httpd):
+ if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
+ # In Jython SSLSocket is not a subclass of socket.socket
+ sock = httpd.socket.sock
+ else:
+ sock = httpd.socket
+ return sock.getsockname()[1]
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 4833396a5..06be72616 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -9,11 +9,30 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value
-from youtube_dl.compat import compat_etree_fromstring
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from youtube_dl.compat import compat_etree_fromstring, compat_http_server
from youtube_dl.extractor.common import InfoExtractor
from youtube_dl.extractor import YoutubeIE, get_info_extractor
from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+import threading
+
+
+TEAPOT_RESPONSE_STATUS = 418
+TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
+
+
+class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+ def log_message(self, format, *args):
+ pass
+
+ def do_GET(self):
+ if self.path == '/teapot':
+ self.send_response(TEAPOT_RESPONSE_STATUS)
+ self.send_header('Content-Type', 'text/html; charset=utf-8')
+ self.end_headers()
+ self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
+ else:
+ assert False
class TestIE(InfoExtractor):
@@ -743,6 +762,25 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
for i in range(len(entries)):
expect_dict(self, entries[i], expected_entries[i])
+ def test_response_with_expected_status_returns_content(self):
+ # Checks for mitigations against the effects of
+ # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which
+ # manifest as `_download_webpage`, `_download_xml`, `_download_json`,
+ # or the underlying `_download_webpage_handle` returning no content
+ # when a response matches `expected_status`.
+
+ httpd = compat_http_server.HTTPServer(
+ ('127.0.0.1', 0), InfoExtractorTestRequestHandler)
+ port = http_server_port(httpd)
+ server_thread = threading.Thread(target=httpd.serve_forever)
+ server_thread.daemon = True
+ server_thread.start()
+
+ (content, urlh) = self.ie._download_webpage_handle(
+ 'http://127.0.0.1:%d/teapot' % port, None,
+ expected_status=TEAPOT_RESPONSE_STATUS)
+ self.assertEqual(content, TEAPOT_RESPONSE_BODY)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 5cf2bf1a5..750472281 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -9,26 +9,16 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import try_rm
+from test.helper import http_server_port, try_rm
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server
from youtube_dl.downloader.http import HttpFD
from youtube_dl.utils import encodeFilename
-import ssl
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
TEST_SIZE = 10 * 1024
diff --git a/test/test_http.py b/test/test_http.py
index 409fec9c8..3ee0a5dda 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -8,6 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from test.helper import http_server_port
from youtube_dl import YoutubeDL
from youtube_dl.compat import compat_http_server, compat_urllib_request
import ssl
@@ -16,15 +17,6 @@ import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
-def http_server_port(httpd):
- if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
- # In Jython SSLSocket is not a subclass of socket.socket
- sock = httpd.socket.sock
- else:
- sock = httpd.socket
- return sock.getsockname()[1]
-
-
class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8452125c8..e5f8136fc 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -606,6 +606,11 @@ class InfoExtractor(object):
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
if isinstance(err, compat_urllib_error.HTTPError):
if self.__can_accept_status_code(err, expected_status):
+ # Retain reference to error to prevent file object from
+ # being closed before it can be read. Works around the
+ # effects of <https://bugs.python.org/issue15002>
+ # introduced in Python 3.4.1.
+ err.fp._error = err
return err.fp
if errnote is False: