diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 17:13:53 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-12-14 17:13:53 -0800 |
| commit | 1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42 (patch) | |
| tree | 5158d582c0f5040a9c8a28fe9051881c5ec7b5eb /tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c | |
| parent | Merge tag 'docs-5.11' of git://git.lwn.net/linux (diff) | |
| parent | entry: Add syscall_exit_to_user_mode_work() (diff) | |
| download | linux-1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42.tar.gz linux-1ac0884d5474fea8dc6ceabbd0e870d1bf4b7b42.zip | |
Merge tag 'core-entry-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull core entry/exit updates from Thomas Gleixner:
"A set of updates for entry/exit handling:
- More generalization of entry/exit functionality
- The consolidation work to reclaim TIF flags on x86 and also for
non-x86 specific TIF flags which are solely relevant for syscall
related work and have been moved into their own storage space. The
x86 specific part had to be merged in to avoid a major conflict.
- The TIF_NOTIFY_SIGNAL work which replaces the inefficient signal
delivery mode of task work and results in an impressive performance
improvement for io_uring. The non-x86 consolidation of this is
going to come seperate via Jens.
- The selective syscall redirection facility which provides a clean
and efficient way to support the non-Linux syscalls of WINE by
catching them at syscall entry and redirecting them to the user
space emulation. This can be utilized for other purposes as well
and has been designed carefully to avoid overhead for the regular
fastpath. This includes the core changes and the x86 support code.
- Simplification of the context tracking entry/exit handling for the
users of the generic entry code which guarantee the proper ordering
and protection.
- Preparatory changes to make the generic entry code accomodate S390
specific requirements which are mostly related to their syscall
restart mechanism"
* tag 'core-entry-2020-12-14' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (36 commits)
entry: Add syscall_exit_to_user_mode_work()
entry: Add exit_to_user_mode() wrapper
entry_Add_enter_from_user_mode_wrapper
entry: Rename exit_to_user_mode()
entry: Rename enter_from_user_mode()
docs: Document Syscall User Dispatch
selftests: Add benchmark for syscall user dispatch
selftests: Add kselftest for syscall user dispatch
entry: Support Syscall User Dispatch on common syscall entry
kernel: Implement selective syscall userspace redirection
signal: Expose SYS_USER_DISPATCH si_code type
x86: vdso: Expose sigreturn address on vdso to the kernel
MAINTAINERS: Add entry for common entry code
entry: Fix boot for !CONFIG_GENERIC_ENTRY
x86: Support HAVE_CONTEXT_TRACKING_OFFSTACK
context_tracking: Only define schedule_user() on !HAVE_CONTEXT_TRACKING_OFFSTACK archs
sched: Detect call to schedule from critical entry code
context_tracking: Don't implement exception_enter/exit() on CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK
context_tracking: Introduce HAVE_CONTEXT_TRACKING_OFFSTACK
x86: Reclaim unused x86 TI flags
...
Diffstat (limited to 'tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c')
| -rw-r--r-- | tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c new file mode 100644 index 000000000000..6689f1183dbf --- /dev/null +++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Collabora Ltd. + * + * Benchmark and test syscall user dispatch + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <signal.h> +#include <errno.h> +#include <time.h> +#include <sys/time.h> +#include <unistd.h> +#include <sys/sysinfo.h> +#include <sys/prctl.h> +#include <sys/syscall.h> + +#ifndef PR_SET_SYSCALL_USER_DISPATCH +# define PR_SET_SYSCALL_USER_DISPATCH 59 +# define PR_SYS_DISPATCH_OFF 0 +# define PR_SYS_DISPATCH_ON 1 +#endif + +#ifdef __NR_syscalls +# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */ +#else +# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */ +#endif + +/* + * To test returning from a sigsys with selector blocked, the test + * requires some per-architecture support (i.e. knowledge about the + * signal trampoline address). On i386, we know it is on the vdso, and + * a small trampoline is open-coded for x86_64. Other architectures + * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN + * out of the box, but don't enable them until they support syscall user + * dispatch. + */ +#if defined(__x86_64__) || defined(__i386__) +#define TEST_BLOCKED_RETURN +#endif + +#ifdef __x86_64__ +void* (syscall_dispatcher_start)(void); +void* (syscall_dispatcher_end)(void); +#else +unsigned long syscall_dispatcher_start = 0; +unsigned long syscall_dispatcher_end = 0; +#endif + +unsigned long trapped_call_count = 0; +unsigned long native_call_count = 0; + +char selector; +#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON) +#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF) + +#define CALIBRATION_STEP 100000 +#define CALIBRATE_TO_SECS 5 +int factor; + +static double one_sysinfo_step(void) +{ + struct timespec t1, t2; + int i; + struct sysinfo info; + + clock_gettime(CLOCK_MONOTONIC, &t1); + for (i = 0; i < CALIBRATION_STEP; i++) + sysinfo(&info); + clock_gettime(CLOCK_MONOTONIC, &t2); + return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec); +} + +static void calibrate_set(void) +{ + double elapsed = 0; + + printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS); + + while (elapsed < 1) { + elapsed += one_sysinfo_step(); + factor += CALIBRATE_TO_SECS; + } + + printf("test iterations = %d\n", CALIBRATION_STEP * factor); +} + +static double perf_syscall(void) +{ + unsigned int i; + double partial = 0; + + for (i = 0; i < factor; ++i) + partial += one_sysinfo_step()/(CALIBRATION_STEP*factor); + return partial; +} + +static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) +{ + char buf[1024]; + int len; + + SYSCALL_UNBLOCK; + + /* printf and friends are not signal-safe. */ + len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall); + write(1, buf, len); + + if (info->si_syscall == MAGIC_SYSCALL_1) + trapped_call_count++; + else + native_call_count++; + +#ifdef TEST_BLOCKED_RETURN + SYSCALL_BLOCK; +#endif + +#ifdef __x86_64__ + __asm__ volatile("movq $0xf, %rax"); + __asm__ volatile("leaveq"); + __asm__ volatile("add $0x8, %rsp"); + __asm__ volatile("syscall_dispatcher_start:"); + __asm__ volatile("syscall"); + __asm__ volatile("nop"); /* Landing pad within dispatcher area */ + __asm__ volatile("syscall_dispatcher_end:"); +#endif + +} + +int main(void) +{ + struct sigaction act; + double time1, time2; + int ret; + sigset_t mask; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + + act.sa_sigaction = handle_sigsys; + act.sa_flags = SA_SIGINFO; + act.sa_mask = mask; + + calibrate_set(); + + time1 = perf_syscall(); + printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9); + + ret = sigaction(SIGSYS, &act, NULL); + if (ret) { + perror("Error sigaction:"); + exit(-1); + } + + fprintf(stderr, "Enabling syscall trapping.\n"); + + if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, + syscall_dispatcher_start, + (syscall_dispatcher_end - syscall_dispatcher_start + 1), + &selector)) { + perror("prctl failed\n"); + exit(-1); + } + + SYSCALL_BLOCK; + syscall(MAGIC_SYSCALL_1); + +#ifdef TEST_BLOCKED_RETURN + if (selector == PR_SYS_DISPATCH_OFF) { + fprintf(stderr, "Failed to return with selector blocked.\n"); + exit(-1); + } +#endif + + SYSCALL_UNBLOCK; + + if (!trapped_call_count) { + fprintf(stderr, "syscall trapping does not work.\n"); + exit(-1); + } + + time2 = perf_syscall(); + + if (native_call_count) { + perror("syscall trapping intercepted more syscalls than expected\n"); + exit(-1); + } + + printf("trapped_call_count %lu, native_call_count %lu.\n", + trapped_call_count, native_call_count); + printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9); + printf("Interception overhead: %.1lf%% (+%.0lfns).\n", + 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1)); + return 0; + +} |
