From 86444f084b23c967d556039b22a67d80a72725ca Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 13 Sep 2016 17:04:52 +0200 Subject: cutils: Add SSE4 version Signed-off-by: Paolo Bonzini --- util/bufferiszero.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'util/bufferiszero.c') diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 4af3caa1b6..bafd3d159c 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -113,6 +113,13 @@ ACCEL_BUFFER_ZERO(buffer_zero_sse2, 64, __m128i, SSE2_NONZERO) #endif #ifdef CONFIG_AVX2_OPT +#pragma GCC push_options +#pragma GCC target("sse4") +#include +#define SSE4_NONZERO(X) !_mm_testz_si128((X), (X)) +ACCEL_BUFFER_ZERO(buffer_zero_sse4, 64, __m128i, SSE4_NONZERO) +#pragma GCC pop_options + #pragma GCC push_options #pragma GCC target("avx2") #include @@ -182,6 +189,9 @@ static bool select_accel_fn(const void *buf, size_t len) if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) { return buffer_zero_avx2(buf, len); } + if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE4)) { + return buffer_zero_sse4(buf, len); + } #endif if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) { return buffer_zero_sse2(buf, len); -- cgit v1.2.3