diff options
Diffstat (limited to 'test/test_networking.py')
-rw-r--r-- | test/test_networking.py | 434 |
1 files changed, 329 insertions, 105 deletions
diff --git a/test/test_networking.py b/test/test_networking.py index b67b521d9..b50f70d08 100644 --- a/test/test_networking.py +++ b/test/test_networking.py @@ -30,7 +30,7 @@ from http.cookiejar import CookieJar from test.conftest import validate_and_send from test.helper import FakeYDL, http_server_port, verify_address_availability from yt_dlp.cookies import YoutubeDLCookieJar -from yt_dlp.dependencies import brotli, requests, urllib3 +from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.networking import ( HEADRequest, PUTRequest, @@ -57,7 +57,7 @@ from yt_dlp.networking.impersonate import ( ) from yt_dlp.utils import YoutubeDLError from yt_dlp.utils._utils import _YDLLogger as FakeLogger -from yt_dlp.utils.networking import HTTPHeaderDict +from yt_dlp.utils.networking import HTTPHeaderDict, std_headers TEST_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -79,6 +79,7 @@ def _build_proxy_handler(name): class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' + default_request_version = 'HTTP/1.1' def log_message(self, format, *args): pass @@ -116,6 +117,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): def _read_data(self): if 'Content-Length' in self.headers: return self.rfile.read(int(self.headers['Content-Length'])) + else: + return b'' def do_POST(self): data = self._read_data() + str(self.headers).encode() @@ -199,7 +202,8 @@ class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): self._headers() elif self.path.startswith('/308-to-headers'): self.send_response(308) - self.send_header('Location', '/headers') + # redirect to "localhost" for testing cookie redirection handling + self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/trailing_garbage': @@ -314,7 +318,7 @@ class TestRequestHandlerBase: class TestHTTPRequestHandler(TestRequestHandlerBase): - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): @@ -325,7 +329,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.status == 200 r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_ssl_error(self, handler): # HTTPS server with too old TLS version # XXX: is there a better way to test this than to create a new server? @@ -339,11 +343,11 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): https_server_thread.start() with handler(verify=False) as rh: - with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: + with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info: validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert not issubclass(exc_info.type, CertificateVerifyError) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_percent_encode(self, handler): with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding @@ -355,7 +359,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.status == 200 res.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('path', [ '/a/b/./../../headers', '/redirect_dotsegments', @@ -371,6 +375,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.url == f'http://127.0.0.1:{self.http_port}/headers' res.close() + # Not supported by CurlCFFI (non-standard) @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_unicode_path_redirection(self, handler): with handler() as rh: @@ -378,7 +383,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): @@ -388,7 +393,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): # Should not raise an error validate_and_send(rh, Request('http://127.0.0.1:%d/gen_200' % self.http_port)).close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_response_url(self, handler): with handler() as rh: # Response url should be that of the last url in redirect chain @@ -399,62 +404,50 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200' res2.close() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) - def test_redirect(self, handler): + # Covers some basic cases we expect some level of consistency between request handlers for + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + @pytest.mark.parametrize('redirect_status,method,expected', [ + # A 303 must either use GET or HEAD for subsequent request + (303, 'POST', ('', 'GET', False)), + (303, 'HEAD', ('', 'HEAD', False)), + + # 301 and 302 turn POST only into a GET + (301, 'POST', ('', 'GET', False)), + (301, 'HEAD', ('', 'HEAD', False)), + (302, 'POST', ('', 'GET', False)), + (302, 'HEAD', ('', 'HEAD', False)), + + # 307 and 308 should not change method + (307, 'POST', ('testdata', 'POST', True)), + (308, 'POST', ('testdata', 'POST', True)), + (307, 'HEAD', ('', 'HEAD', False)), + (308, 'HEAD', ('', 'HEAD', False)), + ]) + def test_redirect(self, handler, redirect_status, method, expected): with handler() as rh: - def do_req(redirect_status, method, assert_no_content=False): - data = b'testdata' if method in ('POST', 'PUT') else None - res = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data)) - - headers = b'' - data_sent = b'' - if data is not None: - data_sent += res.read(len(data)) - if data_sent != data: - headers += data_sent - data_sent = b'' - - headers += res.read() - - if assert_no_content or data is None: - assert b'Content-Type' not in headers - assert b'Content-Length' not in headers - else: - assert b'Content-Type' in headers - assert b'Content-Length' in headers - - return data_sent.decode(), res.headers.get('method', '') - - # A 303 must either use GET or HEAD for subsequent request - assert do_req(303, 'POST', True) == ('', 'GET') - assert do_req(303, 'HEAD') == ('', 'HEAD') - - assert do_req(303, 'PUT', True) == ('', 'GET') - - # 301 and 302 turn POST only into a GET - assert do_req(301, 'POST', True) == ('', 'GET') - assert do_req(301, 'HEAD') == ('', 'HEAD') - assert do_req(302, 'POST', True) == ('', 'GET') - assert do_req(302, 'HEAD') == ('', 'HEAD') - - assert do_req(301, 'PUT') == ('testdata', 'PUT') - assert do_req(302, 'PUT') == ('testdata', 'PUT') + data = b'testdata' if method == 'POST' else None + headers = {} + if data is not None: + headers['Content-Type'] = 'application/test' + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data, + headers=headers)) - # 307 and 308 should not change method - for m in ('POST', 'PUT'): - assert do_req(307, m) == ('testdata', m) - assert do_req(308, m) == ('testdata', m) + headers = b'' + data_recv = b'' + if data is not None: + data_recv += res.read(len(data)) + if data_recv != data: + headers += data_recv + data_recv = b'' - assert do_req(307, 'HEAD') == ('', 'HEAD') - assert do_req(308, 'HEAD') == ('', 'HEAD') + headers += res.read() - # These should not redirect and instead raise an HTTPError - for code in (300, 304, 305, 306): - with pytest.raises(HTTPError): - do_req(code, 'GET') + assert expected[0] == data_recv.decode() + assert expected[1] == res.headers.get('method') + assert expected[2] == ('content-length' in headers.decode().lower()) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. with handler() as rh: @@ -463,16 +456,17 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request( f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read().decode() - assert 'Cookie: test=test' in res + assert 'cookie: test=test' in res.lower() # Specified Cookie header should be removed on any redirect res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/308-to-headers', - headers={'Cookie': 'test=test'})).read().decode() - assert 'Cookie: test=test' not in res + headers={'Cookie': 'test=test2'})).read().decode() + assert 'cookie: test=test2' not in res.lower() # Specified Cookie header should override global cookiejar for that request + # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( version=0, name='test', value='ytdlp', port=None, port_specified=False, @@ -482,23 +476,23 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler(cookiejar=cookiejar) as rh: data = validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test'})).read() - assert b'Cookie: test=ytdlp' not in data - assert b'Cookie: test=test' in data + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read() + assert b'cookie: test=ytdlp' not in data.lower() + assert b'cookie: test=test3' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_redirect_loop(self, handler): with handler() as rh: with pytest.raises(HTTPError, match='redirect loop'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_incompleteread(self, handler): with handler(timeout=2) as rh: - with pytest.raises(IncompleteRead): + with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): validate_and_send(rh, Request('http://127.0.0.1:%d/incompleteread' % self.http_port)).read() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( @@ -507,47 +501,66 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): with handler(cookiejar=cookiejar) as rh: data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - assert b'Cookie: test=ytdlp' in data + assert b'cookie: test=ytdlp' in data.lower() # Per request with handler() as rh: data = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() - assert b'Cookie: test=ytdlp' in data + assert b'cookie: test=ytdlp' in data.lower() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: # Global Headers - data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() - assert b'Test1: test' in data + data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower() + assert b'test1: test' in data # Per request headers, merged with global data = validate_and_send(rh, Request( - f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read() - assert b'Test1: test' in data - assert b'Test2: changed' in data - assert b'Test2: test2' not in data - assert b'Test3: test3' in data - - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) - def test_timeout(self, handler): + f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower() + assert b'test1: test' in data + assert b'test2: changed' in data + assert b'test2: test2' not in data + assert b'test3: test3' in data + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_read_timeout(self, handler): with handler() as rh: # Default timeout is 20 seconds, so this should go through validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_3')) + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) - with handler(timeout=0.5) as rh: + with handler(timeout=0.1) as rh: with pytest.raises(TransportError): validate_and_send( - rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) + rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5')) # Per request timeout, should override handler timeout validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) + def test_connect_timeout(self, handler): + # nothing should be listening on this port + connect_timeout_url = 'http://10.255.255.255' + with handler(timeout=0.01) as rh: + now = time.time() + with pytest.raises(TransportError): + validate_and_send( + rh, Request(connect_timeout_url)) + assert 0.01 <= time.time() - now < 20 + + with handler() as rh: + with pytest.raises(TransportError): + # Per request timeout, should override handler timeout + now = time.time() + validate_and_send( + rh, Request(connect_timeout_url, extensions={'timeout': 0.01})) + assert 0.01 <= time.time() - now < 20 + + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' # on some systems these loopback addresses we need for testing may not be available @@ -558,6 +571,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() assert source_address == data + # Not supported by CurlCFFI @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_gzip_trailing_garbage(self, handler): with handler() as rh: @@ -575,7 +589,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.headers.get('Content-Encoding') == 'br' assert res.read() == b'<html><video src="/vid.mp4" /></html>' - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_deflate(self, handler): with handler() as rh: res = validate_and_send( @@ -585,7 +599,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.headers.get('Content-Encoding') == 'deflate' assert res.read() == b'<html><video src="/vid.mp4" /></html>' - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_gzip(self, handler): with handler() as rh: res = validate_and_send( @@ -595,7 +609,7 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.headers.get('Content-Encoding') == 'gzip' assert res.read() == b'<html><video src="/vid.mp4" /></html>' - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_multiple_encodings(self, handler): with handler() as rh: for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'): @@ -606,17 +620,18 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.headers.get('Content-Encoding') == pair assert res.read() == b'<html><video src="/vid.mp4" /></html>' + # Not supported by curl_cffi @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) def test_unsupported_encoding(self, handler): with handler() as rh: res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', - headers={'ytdl-encoding': 'unsupported'})) + headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'})) assert res.headers.get('Content-Encoding') == 'unsupported' assert res.read() == b'raw' - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_read(self, handler): with handler() as rh: res = validate_and_send( @@ -624,9 +639,12 @@ class TestHTTPRequestHandler(TestRequestHandlerBase): assert res.readable() assert res.read(1) == b'H' assert res.read(3) == b'ost' + assert res.read().decode().endswith('\n\n') + assert res.read() == b'' class TestHTTPProxy(TestRequestHandlerBase): + # Note: this only tests http urls over non-CONNECT proxy @classmethod def setup_class(cls): super().setup_class() @@ -646,7 +664,7 @@ class TestHTTPProxy(TestRequestHandlerBase): cls.geo_proxy_thread.daemon = True cls.geo_proxy_thread.start() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_http_proxy(self, handler): http_proxy = f'http://127.0.0.1:{self.proxy_port}' geo_proxy = f'http://127.0.0.1:{self.geo_port}' @@ -672,7 +690,7 @@ class TestHTTPProxy(TestRequestHandlerBase): assert res != f'normal: {real_url}' assert 'Accept' in res - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_noproxy(self, handler): with handler(proxies={'proxy': f'http://127.0.0.1:{self.proxy_port}'}) as rh: # NO_PROXY @@ -682,7 +700,7 @@ class TestHTTPProxy(TestRequestHandlerBase): 'utf-8') assert 'Accept' in nop_response - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_allproxy(self, handler): url = 'http://foo.com/bar' with handler() as rh: @@ -690,7 +708,7 @@ class TestHTTPProxy(TestRequestHandlerBase): 'utf-8') assert response == f'normal: {url}' - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_http_proxy_with_idn(self, handler): with handler(proxies={ 'http': f'http://127.0.0.1:{self.proxy_port}', @@ -702,7 +720,6 @@ class TestHTTPProxy(TestRequestHandlerBase): class TestClientCertificate: - @classmethod def setup_class(cls): certfn = os.path.join(TEST_DIR, 'testcert.pem') @@ -728,27 +745,27 @@ class TestClientCertificate: ) as rh: validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode() - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_certificate_combined_nopass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'), }) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_certificate_nocombined_nopass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'client.crt'), 'client_certificate_key': os.path.join(self.certdir, 'client.key'), }) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_certificate_combined_pass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'), 'client_certificate_password': 'foobar', }) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_certificate_nocombined_pass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'client.crt'), @@ -757,6 +774,18 @@ class TestClientCertificate: }) +@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True) +class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase): + def test_supported_impersonate_targets(self, handler): + with handler(headers=std_headers) as rh: + # note: this assumes the impersonate request handler supports the impersonate extension + for target in rh.supported_targets: + res = validate_and_send(rh, Request( + f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target})) + assert res.status == 200 + assert std_headers['user-agent'].lower() not in res.read().decode().lower() + + class TestRequestHandlerMisc: """Misc generic tests for request handlers, not related to request or validation testing""" @pytest.mark.parametrize('handler,logger_name', [ @@ -935,6 +964,172 @@ class TestRequestsRequestHandler(TestRequestHandlerBase): assert called +@pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True) +class TestCurlCFFIRequestHandler(TestRequestHandlerBase): + + @pytest.mark.parametrize('params,extensions', [ + ({}, {'impersonate': ImpersonateTarget('chrome')}), + ({'impersonate': ImpersonateTarget('chrome', '110')}, {}), + ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}), + ]) + def test_impersonate(self, handler, params, extensions): + with handler(headers=std_headers, **params) as rh: + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode() + assert 'sec-ch-ua: "Chromium";v="110"' in res + # Check that user agent is added over ours + assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res + + def test_headers(self, handler): + with handler(headers=std_headers) as rh: + # Ensure curl-impersonate overrides our standard headers (usually added + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={ + 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower() + + assert std_headers['user-agent'].lower() not in res + assert std_headers['accept-language'].lower() not in res + assert std_headers['sec-fetch-mode'].lower() not in res + # other than UA, custom headers that differ from std_headers should be kept + assert 'sec-fetch-mode: custom' in res + assert 'x-custom: test' in res + # but when not impersonating don't remove std_headers + res = validate_and_send( + rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower() + # std_headers should be present + for k, v in std_headers.items(): + assert f'{k}: {v}'.lower() in res + + @pytest.mark.parametrize('raised,expected,match', [ + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None), + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None), + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None), + ]) + def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): + import curl_cffi.requests + + from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter + curl_res = curl_cffi.requests.Response() + res = CurlCFFIResponseAdapter(curl_res) + + def mock_read(*args, **kwargs): + try: + raise raised() + except Exception as e: + e.response = curl_res + raise + monkeypatch.setattr(res.fp, 'read', mock_read) + + with pytest.raises(expected, match=match) as exc_info: + res.read() + + assert exc_info.type is expected + + @pytest.mark.parametrize('raised,expected,match', [ + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None), + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None), + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None), + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None), + (lambda: curl_cffi.requests.errors.RequestsError( + '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None), + ]) + def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match): + import curl_cffi.requests + curl_res = curl_cffi.requests.Response() + curl_res.status_code = 301 + + with handler() as rh: + original_get_instance = rh._get_instance + + def mock_get_instance(*args, **kwargs): + instance = original_get_instance(*args, **kwargs) + + def request(*_, **__): + try: + raise raised() + except Exception as e: + e.response = curl_res + raise + monkeypatch.setattr(instance, 'request', request) + return instance + + monkeypatch.setattr(rh, '_get_instance', mock_get_instance) + + with pytest.raises(expected) as exc_info: + rh.send(Request('http://fake')) + + assert exc_info.type is expected + + def test_response_reader(self, handler): + class FakeResponse: + def __init__(self, raise_error=False): + self.raise_error = raise_error + self.closed = False + + def iter_content(self): + yield b'foo' + yield b'bar' + yield b'z' + if self.raise_error: + raise Exception('test') + + def close(self): + self.closed = True + + from yt_dlp.networking._curlcffi import CurlCFFIResponseReader + + res = CurlCFFIResponseReader(FakeResponse()) + assert res.readable + assert res.bytes_read == 0 + assert res.read(1) == b'f' + assert res.bytes_read == 3 + assert res._buffer == b'oo' + + assert res.read(2) == b'oo' + assert res.bytes_read == 3 + assert res._buffer == b'' + + assert res.read(2) == b'ba' + assert res.bytes_read == 6 + assert res._buffer == b'r' + + assert res.read(3) == b'rz' + assert res.bytes_read == 7 + assert res._buffer == b'' + assert res.closed + assert res._response.closed + + # should handle no size param + res2 = CurlCFFIResponseReader(FakeResponse()) + assert res2.read() == b'foobarz' + assert res2.bytes_read == 7 + assert res2._buffer == b'' + assert res2.closed + + # should close on an exception + res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True)) + with pytest.raises(Exception, match='test'): + res3.read() + assert res3._buffer == b'' + assert res3.bytes_read == 7 + assert res3.closed + + # buffer should be cleared on close + res4 = CurlCFFIResponseReader(FakeResponse()) + res4.read(2) + assert res4._buffer == b'o' + res4.close() + assert res4.closed + assert res4._buffer == b'' + + def run_validation(handler, error, req, **handler_kwargs): with handler(**handler_kwargs) as rh: if error: @@ -979,6 +1174,10 @@ class TestRequestHandlerValidation: ('ws', False, {}), ('wss', False, {}), ]), + ('CurlCFFI', [ + ('http', False, {}), + ('https', False, {}), + ]), (NoCheckRH, [('http', False, {})]), (ValidationRH, [('http', UnsupportedRequest, {})]) ] @@ -1002,6 +1201,14 @@ class TestRequestHandlerValidation: ('socks5', False), ('socks5h', False), ]), + ('CurlCFFI', 'http', [ + ('http', False), + ('https', False), + ('socks4', False), + ('socks4a', False), + ('socks5', False), + ('socks5h', False), + ]), (NoCheckRH, 'http', [('http', False)]), (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]), ('Websockets', 'ws', [('http', UnsupportedRequest)]), @@ -1019,6 +1226,10 @@ class TestRequestHandlerValidation: ('all', False), ('unrelated', False), ]), + ('CurlCFFI', [ + ('all', False), + ('unrelated', False), + ]), (NoCheckRH, [('all', False)]), (HTTPSupportedRH, [('all', UnsupportedRequest)]), (HTTPSupportedRH, [('no', UnsupportedRequest)]), @@ -1040,6 +1251,19 @@ class TestRequestHandlerValidation: ({'timeout': 'notatimeout'}, AssertionError), ({'unsupported': 'value'}, UnsupportedRequest), ]), + ('CurlCFFI', 'http', [ + ({'cookiejar': 'notacookiejar'}, AssertionError), + ({'cookiejar': YoutubeDLCookieJar()}, False), + ({'timeout': 1}, False), + ({'timeout': 'notatimeout'}, AssertionError), + ({'unsupported': 'value'}, UnsupportedRequest), + ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest), + ({'impersonate': 123}, AssertionError), + ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False), + ({'impersonate': ImpersonateTarget(None, None, None, None)}, False), + ({'impersonate': ImpersonateTarget()}, False), + ({'impersonate': 'chrome'}, AssertionError) + ]), (NoCheckRH, 'http', [ ({'cookiejar': 'notacookiejar'}, False), ({'somerandom': 'test'}, False), # but any extension is allowed through @@ -1059,7 +1283,7 @@ class TestRequestHandlerValidation: def test_url_scheme(self, handler, scheme, fail, handler_kwargs): run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {})) - @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False)], indirect=['handler']) + @pytest.mark.parametrize('handler,fail', [('Urllib', False), ('Requests', False), ('CurlCFFI', False)], indirect=['handler']) def test_no_proxy(self, handler, fail): run_validation(handler, fail, Request('http://', proxies={'no': '127.0.0.1,github.com'})) run_validation(handler, fail, Request('http://'), proxies={'no': '127.0.0.1,github.com'}) @@ -1082,13 +1306,13 @@ class TestRequestHandlerValidation: run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'})) run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'}) - @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', HTTPSupportedRH, 'Requests', 'CurlCFFI'], indirect=True) def test_empty_proxy(self, handler): run_validation(handler, False, Request('http://', proxies={'http': None})) run_validation(handler, False, Request('http://'), proxies={'http': None}) @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c']) - @pytest.mark.parametrize('handler', ['Urllib', 'Requests'], indirect=True) + @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) def test_invalid_proxy_url(self, handler, proxy_url): run_validation(handler, UnsupportedRequest, Request('http://', proxies={'http': proxy_url})) |