aboutsummaryrefslogtreecommitdiffstats
path: root/tests/wc
diff options
context:
space:
mode:
authorMathieu Bordere <mathieu@letmetweakit.com>2025-09-24 12:41:06 +0200
committerPádraig Brady <P@draigBrady.com>2025-09-30 14:09:37 +0100
commit67e9068c5f5fdae5666279717a4c19bdfe5c21de (patch)
tree3433b931659ca653d12b7d137e9f76cfdb454da2 /tests/wc
parentmaint: update valgrind instructions (diff)
downloadcoreutils-67e9068c5f5fdae5666279717a4c19bdfe5c21de.tar.gz
coreutils-67e9068c5f5fdae5666279717a4c19bdfe5c21de.zip
wc: add AVX512 function for line counting
* configure.ac: Add detection of AVX512 intrinsics for wc. * src/local.mk: Build AVX512 wc libraries. * src/wc.c: Add runtime detection of AVX512 intrinsics and call appropriate function when detected. * src/wc.h (wc_lines_avx512): Declare function. * tests/wc/wc-cpu.sh: Add a test that disables AVX512 intrinsics. * src/wc_avx512.c: New file containing the wc -l implementation using AVX512. The logic and code is reused from the AVX2 implementation with slight adaptations. Replaced __builtin_popcount by __builtin_popcountll and the combination of _mm256_cmpeq_epi8 and _mm256_movemask_epi8 by a single call to _mm512_cmpeq_epi8_mask. * NEWS: Mention the improvement.
Diffstat (limited to 'tests/wc')
-rwxr-xr-xtests/wc/wc-cpu.sh12
1 files changed, 10 insertions, 2 deletions
diff --git a/tests/wc/wc-cpu.sh b/tests/wc/wc-cpu.sh
index 1118fe14e..6ad4f5f9c 100755
--- a/tests/wc/wc-cpu.sh
+++ b/tests/wc/wc-cpu.sh
@@ -19,7 +19,7 @@
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ wc
-GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX2' \
+GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX2,-AVX512F' \
wc -l --debug /dev/null 2>debug || fail=1
grep 'using.*hardware support' debug && fail=1
@@ -27,8 +27,16 @@ lines=$(shuf -i 0-1000 | head -n1) || framework_failure_
seq 1000 | head -n "$lines" > lines || framework_failure_
wc_accelerated=$(wc -l < lines) || fail=1
-wc_base=$(GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX2' wc -l < lines) || fail=1
+wc_accelerated_no_avx512=$(
+ GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX512F' \
+ wc -l < lines
+ ) || fail=1
+wc_base=$(
+ GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX2,-AVX512F' \
+ wc -l < lines
+ ) || fail=1
test "$wc_accelerated" = "$wc_base" || fail=1
+test "$wc_accelerated_no_avx512" = "$wc_base" || fail=1
Exit $fail