aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:13:53 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 17:13:53 -0800
commit1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42 (patch)
tree5158d582c0f5040a9c8a28fe9051881c5ec7b5eb /tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
parentMerge tag 'docs-5.11' of git://git.lwn.net/linux (diff)
parententry: Add syscall_exit_to_user_mode_work() (diff)
downloadlinux-1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42.tar.gz
linux-1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42.zip
Merge tag 'core-entry-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core entry/exit updates from Thomas Gleixner: "A set of updates for entry/exit handling: - More generalization of entry/exit functionality - The consolidation work to reclaim TIF flags on x86 and also for non-x86 specific TIF flags which are solely relevant for syscall related work and have been moved into their own storage space. The x86 specific part had to be merged in to avoid a major conflict. - The TIF_NOTIFY_SIGNAL work which replaces the inefficient signal delivery mode of task work and results in an impressive performance improvement for io_uring. The non-x86 consolidation of this is going to come seperate via Jens. - The selective syscall redirection facility which provides a clean and efficient way to support the non-Linux syscalls of WINE by catching them at syscall entry and redirecting them to the user space emulation. This can be utilized for other purposes as well and has been designed carefully to avoid overhead for the regular fastpath. This includes the core changes and the x86 support code. - Simplification of the context tracking entry/exit handling for the users of the generic entry code which guarantee the proper ordering and protection. - Preparatory changes to make the generic entry code accomodate S390 specific requirements which are mostly related to their syscall restart mechanism" * tag 'core-entry-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits) entry: Add syscall_exit_to_user_mode_work() entry: Add exit_to_user_mode() wrapper entry_Add_enter_from_user_mode_wrapper entry: Rename exit_to_user_mode() entry: Rename enter_from_user_mode() docs: Document Syscall User Dispatch selftests: Add benchmark for syscall user dispatch selftests: Add kselftest for syscall user dispatch entry: Support Syscall User Dispatch on common syscall entry kernel: Implement selective syscall userspace redirection signal: Expose SYS_USER_DISPATCH si_code type x86: vdso: Expose sigreturn address on vdso to the kernel MAINTAINERS: Add entry for common entry code entry: Fix boot for !CONFIG_GENERIC_ENTRY x86: Support HAVE_CONTEXT_TRACKING_OFFSTACK context_tracking: Only define schedule_user() on !HAVE_CONTEXT_TRACKING_OFFSTACK archs sched: Detect call to schedule from critical entry code context_tracking: Don't implement exception_enter/exit() on CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK context_tracking: Introduce HAVE_CONTEXT_TRACKING_OFFSTACK x86: Reclaim unused x86 TI flags ...
Diffstat (limited to 'tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c')
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c200
1 files changed, 200 insertions, 0 deletions
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
new file mode 100644
index 000000000000..6689f1183dbf
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Benchmark and test syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+# define PR_SYS_DISPATCH_ON 1
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */
+#endif
+
+/*
+ * To test returning from a sigsys with selector blocked, the test
+ * requires some per-architecture support (i.e. knowledge about the
+ * signal trampoline address). On i386, we know it is on the vdso, and
+ * a small trampoline is open-coded for x86_64. Other architectures
+ * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
+ * out of the box, but don't enable them until they support syscall user
+ * dispatch.
+ */
+#if defined(__x86_64__) || defined(__i386__)
+#define TEST_BLOCKED_RETURN
+#endif
+
+#ifdef __x86_64__
+void* (syscall_dispatcher_start)(void);
+void* (syscall_dispatcher_end)(void);
+#else
+unsigned long syscall_dispatcher_start = 0;
+unsigned long syscall_dispatcher_end = 0;
+#endif
+
+unsigned long trapped_call_count = 0;
+unsigned long native_call_count = 0;
+
+char selector;
+#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON)
+#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
+
+#define CALIBRATION_STEP 100000
+#define CALIBRATE_TO_SECS 5
+int factor;
+
+static double one_sysinfo_step(void)
+{
+ struct timespec t1, t2;
+ int i;
+ struct sysinfo info;
+
+ clock_gettime(CLOCK_MONOTONIC, &t1);
+ for (i = 0; i < CALIBRATION_STEP; i++)
+ sysinfo(&info);
+ clock_gettime(CLOCK_MONOTONIC, &t2);
+ return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
+}
+
+static void calibrate_set(void)
+{
+ double elapsed = 0;
+
+ printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
+
+ while (elapsed < 1) {
+ elapsed += one_sysinfo_step();
+ factor += CALIBRATE_TO_SECS;
+ }
+
+ printf("test iterations = %d\n", CALIBRATION_STEP * factor);
+}
+
+static double perf_syscall(void)
+{
+ unsigned int i;
+ double partial = 0;
+
+ for (i = 0; i < factor; ++i)
+ partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
+ return partial;
+}
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+ char buf[1024];
+ int len;
+
+ SYSCALL_UNBLOCK;
+
+ /* printf and friends are not signal-safe. */
+ len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
+ write(1, buf, len);
+
+ if (info->si_syscall == MAGIC_SYSCALL_1)
+ trapped_call_count++;
+ else
+ native_call_count++;
+
+#ifdef TEST_BLOCKED_RETURN
+ SYSCALL_BLOCK;
+#endif
+
+#ifdef __x86_64__
+ __asm__ volatile("movq $0xf, %rax");
+ __asm__ volatile("leaveq");
+ __asm__ volatile("add $0x8, %rsp");
+ __asm__ volatile("syscall_dispatcher_start:");
+ __asm__ volatile("syscall");
+ __asm__ volatile("nop"); /* Landing pad within dispatcher area */
+ __asm__ volatile("syscall_dispatcher_end:");
+#endif
+
+}
+
+int main(void)
+{
+ struct sigaction act;
+ double time1, time2;
+ int ret;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+
+ act.sa_sigaction = handle_sigsys;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_mask = mask;
+
+ calibrate_set();
+
+ time1 = perf_syscall();
+ printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
+
+ ret = sigaction(SIGSYS, &act, NULL);
+ if (ret) {
+ perror("Error sigaction:");
+ exit(-1);
+ }
+
+ fprintf(stderr, "Enabling syscall trapping.\n");
+
+ if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
+ syscall_dispatcher_start,
+ (syscall_dispatcher_end - syscall_dispatcher_start + 1),
+ &selector)) {
+ perror("prctl failed\n");
+ exit(-1);
+ }
+
+ SYSCALL_BLOCK;
+ syscall(MAGIC_SYSCALL_1);
+
+#ifdef TEST_BLOCKED_RETURN
+ if (selector == PR_SYS_DISPATCH_OFF) {
+ fprintf(stderr, "Failed to return with selector blocked.\n");
+ exit(-1);
+ }
+#endif
+
+ SYSCALL_UNBLOCK;
+
+ if (!trapped_call_count) {
+ fprintf(stderr, "syscall trapping does not work.\n");
+ exit(-1);
+ }
+
+ time2 = perf_syscall();
+
+ if (native_call_count) {
+ perror("syscall trapping intercepted more syscalls than expected\n");
+ exit(-1);
+ }
+
+ printf("trapped_call_count %lu, native_call_count %lu.\n",
+ trapped_call_count, native_call_count);
+ printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
+ printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
+ 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
+ return 0;
+
+}