aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h16
-rw-r--r--tools/iio/iio_event_monitor.c1
-rw-r--r--tools/include/uapi/linux/kvm.h3
-rw-r--r--tools/lib/traceevent/event-parse.c59
-rw-r--r--tools/lib/traceevent/event-parse.h5
-rw-r--r--tools/lib/traceevent/parse-filter.c5
-rw-r--r--tools/objtool/check.c8
-rw-r--r--tools/perf/builtin-trace.c2
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/evsel.c2
-rw-r--r--tools/perf/util/python.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c2
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/testing/selftests/Makefile3
-rw-r--r--tools/testing/selftests/alsa/.gitignore1
-rw-r--r--tools/testing/selftests/alsa/Makefile9
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c705
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile16
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c50
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c853
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h26
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3.h (renamed from tools/testing/selftests/kvm/lib/aarch64/gic_v3.h)12
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h3
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h18
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h409
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h399
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h135
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h59
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h26
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c66
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h11
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c206
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c82
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c103
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c59
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c126
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c362
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c87
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c95
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c448
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c59
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c184
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh16
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh5
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c2
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_kernel.c132
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c43
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh34
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c42
-rw-r--r--tools/testing/selftests/vm/hugepage-mremap.c46
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh21
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh2
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c31
-rw-r--r--tools/testing/selftests/vm/write_hugetlb_memory.sh2
-rw-r--r--tools/tracing/rtla/Makefile102
-rw-r--r--tools/tracing/rtla/README.txt36
-rw-r--r--tools/tracing/rtla/src/osnoise.c875
-rw-r--r--tools/tracing/rtla/src/osnoise.h91
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c801
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c579
-rw-r--r--tools/tracing/rtla/src/rtla.c87
-rw-r--r--tools/tracing/rtla/src/timerlat.c72
-rw-r--r--tools/tracing/rtla/src/timerlat.h4
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c822
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c618
-rw-r--r--tools/tracing/rtla/src/trace.c192
-rw-r--r--tools/tracing/rtla/src/trace.h27
-rw-r--r--tools/tracing/rtla/src/utils.c433
-rw-r--r--tools/tracing/rtla/src/utils.h56
78 files changed, 9334 insertions, 575 deletions
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 5a776a08f78c..2da3316bb559 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
-/* for KVM_CAP_XSAVE */
+/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
+ /*
+ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
+ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * respectively, when invoked on the vm file descriptor.
+ *
+ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
+ * will always be at least 4096. Currently, it is only greater
+ * than 4096 if a dynamic feature has been enabled with
+ * ``arch_prctl()``, but this may change in the future.
+ *
+ * The offsets of the state save areas in struct kvm_xsave follow
+ * the contents of CPUID leaf 0xD on the host.
+ */
__u32 region[1024];
+ __u32 extra[0];
};
#define KVM_MAX_XCRS 16
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index 0076437f6e3f..b94a16ba5c6c 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -279,6 +279,7 @@ static void print_event(struct iio_event_data *event)
printf(", direction: %s", iio_ev_dir_text[dir]);
printf("\n");
+ fflush(stdout);
}
/* Enable or disable events in sysfs if the knob is available */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 1daa45268de2..f066637ee206 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1131,6 +1131,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+#define KVM_CAP_XSAVE2 207
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1551,6 +1552,8 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_XSAVE */
#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave)
#define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave)
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave)
/* Available with KVM_CAP_XCRS */
#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index fe58843d047c..8e24c4c78c7f 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field)
return 0;
}
+static int field_is_relative_dynamic(struct tep_format_field *field)
+{
+ if (strncmp(field->type, "__rel_loc", 9) == 0)
+ return 1;
+
+ return 0;
+}
+
static int field_is_long(struct tep_format_field *field)
{
/* includes long long */
@@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **
field->flags |= TEP_FIELD_IS_STRING;
if (field_is_dynamic(field))
field->flags |= TEP_FIELD_IS_DYNAMIC;
+ if (field_is_relative_dynamic(field))
+ field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE;
if (field_is_long(field))
field->flags |= TEP_FIELD_IS_LONG;
@@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
arg->type = TEP_PRINT_STRING;
arg->string.string = token;
- arg->string.offset = -1;
+ arg->string.field = NULL;
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
@@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar
arg->type = TEP_PRINT_BITMASK;
arg->bitmask.bitmask = token;
- arg->bitmask.offset = -1;
+ arg->bitmask.field = NULL;
if (read_expected(TEP_EVENT_DELIM, ")") < 0)
goto out_err;
@@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,
free_token(token);
return process_int_array(event, arg, tok);
}
- if (strcmp(token, "__get_str") == 0) {
+ if (strcmp(token, "__get_str") == 0 ||
+ strcmp(token, "__get_rel_str") == 0) {
free_token(token);
return process_str(event, arg, tok);
}
- if (strcmp(token, "__get_bitmask") == 0) {
+ if (strcmp(token, "__get_bitmask") == 0 ||
+ strcmp(token, "__get_rel_bitmask") == 0) {
free_token(token);
return process_bitmask(event, arg, tok);
}
- if (strcmp(token, "__get_dynamic_array") == 0) {
+ if (strcmp(token, "__get_dynamic_array") == 0 ||
+ strcmp(token, "__get_rel_dynamic_array") == 0) {
free_token(token);
return process_dynamic_array(event, arg, tok);
}
- if (strcmp(token, "__get_dynamic_array_len") == 0) {
+ if (strcmp(token, "__get_dynamic_array_len") == 0 ||
+ strcmp(token, "__get_rel_dynamic_array_len") == 0) {
free_token(token);
return process_dynamic_array_len(event, arg, tok);
}
@@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
case TEP_PRINT_STRING: {
int str_offset;
- if (arg->string.offset == -1) {
- struct tep_format_field *f;
+ if (!arg->string.field)
+ arg->string.field = tep_find_any_field(event, arg->string.string);
+ if (!arg->string.field)
+ break;
- f = tep_find_any_field(event, arg->string.string);
- arg->string.offset = f->offset;
- }
- str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset));
+ str_offset = data2host4(tep,
+ *(unsigned int *)(data + arg->string.field->offset));
str_offset &= 0xffff;
+ if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE)
+ str_offset += arg->string.field->offset + arg->string.field->size;
print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
break;
}
@@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
int bitmask_offset;
int bitmask_size;
- if (arg->bitmask.offset == -1) {
- struct tep_format_field *f;
-
- f = tep_find_any_field(event, arg->bitmask.bitmask);
- arg->bitmask.offset = f->offset;
- }
- bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset));
+ if (!arg->bitmask.field)
+ arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask);
+ if (!arg->bitmask.field)
+ break;
+ bitmask_offset = data2host4(tep,
+ *(unsigned int *)(data + arg->bitmask.field->offset));
bitmask_size = bitmask_offset >> 16;
bitmask_offset &= 0xffff;
+ if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE)
+ bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size;
print_bitmask_to_seq(tep, s, format, len_arg,
data + bitmask_offset, bitmask_size);
break;
@@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
@@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
data + offset, field->size);
*len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
*len = field->size;
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index a67ad9a5b835..41d4f9f6a843 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -125,6 +125,7 @@ enum tep_format_flags {
TEP_FIELD_IS_LONG = 32,
TEP_FIELD_IS_FLAG = 64,
TEP_FIELD_IS_SYMBOLIC = 128,
+ TEP_FIELD_IS_RELATIVE = 256,
};
struct tep_format_field {
@@ -153,12 +154,12 @@ struct tep_print_arg_atom {
struct tep_print_arg_string {
char *string;
- int offset;
+ struct tep_format_field *field;
};
struct tep_print_arg_bitmask {
char *bitmask;
- int offset;
+ struct tep_format_field *field;
};
struct tep_print_arg_field {
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 368826bb5a57..5df177070d53 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *
if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
addr = *(unsigned int *)val;
- val = record->data + (addr & 0xffff);
size = addr >> 16;
+ addr &= 0xffff;
+ if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE)
+ addr += arg->str.field->offset + arg->str.field->size;
+ val = record->data + addr;
}
/*
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8c1931eab5f1..c2d2ab9a2861 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -168,14 +168,16 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"panic",
"do_exit",
"do_task_dead",
- "__module_put_and_exit",
- "complete_and_exit",
+ "kthread_exit",
+ "make_task_dead",
+ "__module_put_and_kthread_exit",
+ "kthread_complete_and_exit",
"__reiserfs_panic",
"lbug_with_loc",
"fortify_panic",
"usercopy_abort",
"machine_real_restart",
- "rewind_stack_do_exit",
+ "rewind_stack_and_make_dead",
"kunit_try_catch_throw",
"xen_start_kernel",
"cpu_bringup_and_idle",
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 46bca1f9ab9e..32844d8a0ea5 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2726,6 +2726,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
offset = format_field__intval(field, sample, evsel->needs_swap);
syscall_arg.len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
val = (uintptr_t)(sample->raw_data + offset);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 8f7705bbc2da..9e0aee276df8 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
offset = tmp_val;
len = offset >> 16;
offset &= 0xffff;
+ if (flags & TEP_FIELD_IS_RELATIVE)
+ offset += fmtf->offset + fmtf->size;
}
if (flags & TEP_FIELD_IS_ARRAY) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7660e0bf3b50..2f6b18af49e5 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2688,6 +2688,8 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(sample->raw_data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
return sample->raw_data + offset;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 95fb53899bcd..f3e5131f183c 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -428,6 +428,8 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 32a721b3e9a5..a5d945415bbc 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -392,6 +392,8 @@ static void perl_process_tracepoint(struct perf_sample *sample,
if (field->flags & TEP_FIELD_IS_DYNAMIC) {
offset = *(int *)(data + field->offset);
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
} else
offset = field->offset;
XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index bd95d60018a9..f5ad0e62227a 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -944,6 +944,8 @@ static void python_process_tracepoint(struct perf_sample *sample,
offset = val;
len = offset >> 16;
offset &= 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
}
if (field->flags & TEP_FIELD_IS_STRING &&
is_printable_array(data + offset, len)) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index e417e47f51b9..cfba8c337783 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2381,6 +2381,8 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
tep_read_number_field(field, a->raw_data, &dyn);
offset = dyn & 0xffff;
size = (dyn >> 16) & 0xffff;
+ if (field->flags & TEP_FIELD_IS_RELATIVE)
+ offset += field->offset + field->size;
/* record max width for output */
if (size > hde->dynamic_len)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c852eb40c4f7..d08fe4cfe811 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TARGETS = arm64
+TARGETS += alsa
+TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
TARGETS += capabilities
diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore
new file mode 100644
index 000000000000..3bb7c41266a8
--- /dev/null
+++ b/tools/testing/selftests/alsa/.gitignore
@@ -0,0 +1 @@
+mixer-test
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
new file mode 100644
index 000000000000..f64d9090426d
--- /dev/null
+++ b/tools/testing/selftests/alsa/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+
+CFLAGS += $(shell pkg-config --cflags alsa)
+LDLIBS += $(shell pkg-config --libs alsa)
+
+TEST_GEN_PROGS := mixer-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
new file mode 100644
index 000000000000..17f158d7a767
--- /dev/null
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -0,0 +1,705 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest for the ALSA mixer API
+//
+// Original author: Mark Brown <broonie@kernel.org>
+// Copyright (c) 2021 Arm Limited
+
+// This test will iterate over all cards detected in the system, exercising
+// every mixer control it can find. This may conflict with other system
+// software if there is audio activity so is best run on a system with a
+// minimal active userspace.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <assert.h>
+#include <alsa/asoundlib.h>
+#include <poll.h>
+#include <stdint.h>
+
+#include "../kselftest.h"
+
+#define TESTS_PER_CONTROL 3
+
+struct card_data {
+ snd_ctl_t *handle;
+ int card;
+ int num_ctls;
+ snd_ctl_elem_list_t *ctls;
+ struct card_data *next;
+};
+
+struct ctl_data {
+ const char *name;
+ snd_ctl_elem_id_t *id;
+ snd_ctl_elem_info_t *info;
+ snd_ctl_elem_value_t *def_val;
+ int elem;
+ struct card_data *card;
+ struct ctl_data *next;
+};
+
+static const char *alsa_config =
+"ctl.hw {\n"
+" @args [ CARD ]\n"
+" @args.CARD.type string\n"
+" type hw\n"
+" card $CARD\n"
+"}\n"
+;
+
+int num_cards = 0;
+int num_controls = 0;
+struct card_data *card_list = NULL;
+struct ctl_data *ctl_list = NULL;
+
+#ifdef SND_LIB_VER
+#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6)
+#define LIB_HAS_LOAD_STRING
+#endif
+#endif
+
+#ifndef LIB_HAS_LOAD_STRING
+int snd_config_load_string(snd_config_t **config, const char *s, size_t size)
+{
+ snd_input_t *input;
+ snd_config_t *dst;
+ int err;
+
+ assert(config && s);
+ if (size == 0)
+ size = strlen(s);
+ err = snd_input_buffer_open(&input, s, size);
+ if (err < 0)
+ return err;
+ err = snd_config_top(&dst);
+ if (err < 0) {
+ snd_input_close(input);
+ return err;
+ }
+ err = snd_config_load(dst, input);
+ snd_input_close(input);
+ if (err < 0) {
+ snd_config_delete(dst);
+ return err;
+ }
+ *config = dst;
+ return 0;
+}
+#endif
+
+void find_controls(void)
+{
+ char name[32];
+ int card, ctl, err;
+ struct card_data *card_data;
+ struct ctl_data *ctl_data;
+ snd_config_t *config;
+
+ card = -1;
+ if (snd_card_next(&card) < 0 || card < 0)
+ return;
+
+ err = snd_config_load_string(&config, alsa_config, strlen(alsa_config));
+ if (err < 0) {
+ ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n",
+ snd_strerror(err));
+ ksft_exit_fail();
+ }
+
+ while (card >= 0) {
+ sprintf(name, "hw:%d", card);
+
+ card_data = malloc(sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_open_lconf(&card_data->handle, name, 0, config);
+ if (err < 0) {
+ ksft_print_msg("Failed to get hctl for card %d: %s\n",
+ card, snd_strerror(err));
+ goto next_card;
+ }
+
+ /* Count controls */
+ snd_ctl_elem_list_malloc(&card_data->ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+ card_data->num_ctls = snd_ctl_elem_list_get_count(card_data->ctls);
+
+ /* Enumerate control information */
+ snd_ctl_elem_list_alloc_space(card_data->ctls, card_data->num_ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+
+ card_data->card = num_cards++;
+ card_data->next = card_list;
+ card_list = card_data;
+
+ num_controls += card_data->num_ctls;
+
+ for (ctl = 0; ctl < card_data->num_ctls; ctl++) {
+ ctl_data = malloc(sizeof(*ctl_data));
+ if (!ctl_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ ctl_data->card = card_data;
+ ctl_data->elem = ctl;
+ ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls,
+ ctl);
+
+ err = snd_ctl_elem_id_malloc(&ctl_data->id);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_info_malloc(&ctl_data->info);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_value_malloc(&ctl_data->def_val);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ snd_ctl_elem_list_get_id(card_data->ctls, ctl,
+ ctl_data->id);
+ snd_ctl_elem_info_set_id(ctl_data->info, ctl_data->id);
+ err = snd_ctl_elem_info(card_data->handle,
+ ctl_data->info);
+ if (err < 0) {
+ ksft_print_msg("%s getting info for %d\n",
+ snd_strerror(err),
+ ctl_data->name);
+ }
+
+ snd_ctl_elem_value_set_id(ctl_data->def_val,
+ ctl_data->id);
+
+ ctl_data->next = ctl_list;
+ ctl_list = ctl_data;
+ }
+
+ next_card:
+ if (snd_card_next(&card) < 0) {
+ ksft_print_msg("snd_card_next");
+ break;
+ }
+ }
+
+ snd_config_delete(config);
+}
+
+bool ctl_value_index_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val,
+ int index)
+{
+ long int_val;
+ long long int64_val;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_NONE:
+ ksft_print_msg("%s.%d Invalid control type NONE\n",
+ ctl->name, index);
+ return false;
+
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ int_val = snd_ctl_elem_value_get_boolean(val, index);
+ switch (int_val) {
+ case 0:
+ case 1:
+ break;
+ default:
+ ksft_print_msg("%s.%d Invalid boolean value %ld\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ int_val = snd_ctl_elem_value_get_integer(val, index);
+
+ if (int_val < snd_ctl_elem_info_get_min(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld less than minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+
+ if (int_val > snd_ctl_elem_info_get_max(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than maximum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step(ctl->info) &&
+ (int_val - snd_ctl_elem_info_get_min(ctl->info) %
+ snd_ctl_elem_info_get_step(ctl->info))) {
+ ksft_print_msg("%s.%d value %ld invalid for step %ld minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_step(ctl->info),
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ int64_val = snd_ctl_elem_value_get_integer64(val, index);
+
+ if (int64_val < snd_ctl_elem_info_get_min64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld less than minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+
+ if (int64_val > snd_ctl_elem_info_get_max64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld more than maximum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step64(ctl->info) &&
+ (int64_val - snd_ctl_elem_info_get_min64(ctl->info)) %
+ snd_ctl_elem_info_get_step64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld invalid for step %lld minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_step64(ctl->info),
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ int_val = snd_ctl_elem_value_get_enumerated(val, index);
+
+ if (int_val < 0) {
+ ksft_print_msg("%s.%d negative value %ld for enumeration\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+
+ if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than item count %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_items(ctl->info));
+ return false;
+ }
+ break;
+
+ default:
+ /* No tests for other types */
+ break;
+ }
+
+ return true;
+}
+
+/*
+ * Check that the provided value meets the constraints for the
+ * provided control.
+ */
+bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
+{
+ int i;
+ bool valid = true;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (!ctl_value_index_valid(ctl, val, i))
+ valid = false;
+
+ return valid;
+}
+
+/*
+ * Check that we can read the default value and it is valid. Write
+ * tests use the read value to restore the default.
+ */
+void test_ctl_get_value(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Can't test reading on an unreadable control */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s is not readable\n", ctl->name);
+ ksft_test_result_skip("get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ err = snd_ctl_elem_read(ctl->card->handle, ctl->def_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ goto out;
+ }
+
+ if (!ctl_value_valid(ctl, ctl->def_val))
+ err = -EINVAL;
+
+out:
+ ksft_test_result(err >= 0, "get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+bool show_mismatch(struct ctl_data *ctl, int index,
+ snd_ctl_elem_value_t *read_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ long long expected_int, read_int;
+
+ /*
+ * We factor out the code to compare values representable as
+ * integers, ensure that check doesn't log otherwise.
+ */
+ expected_int = 0;
+ read_int = 0;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ expected_int = snd_ctl_elem_value_get_boolean(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_boolean(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ expected_int = snd_ctl_elem_value_get_integer(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ expected_int = snd_ctl_elem_value_get_integer64(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer64(read_val,
+ index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ expected_int = snd_ctl_elem_value_get_enumerated(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_enumerated(read_val,
+ index);
+ break;
+
+ default:
+ break;
+ }
+
+ if (expected_int != read_int) {
+ /*
+ * NOTE: The volatile attribute means that the hardware
+ * can voluntarily change the state of control element
+ * independent of any operation by software.
+ */
+ bool is_volatile = snd_ctl_elem_info_is_volatile(ctl->info);
+ ksft_print_msg("%s.%d expected %lld but read %lld, is_volatile %d\n",
+ ctl->name, index, expected_int, read_int, is_volatile);
+ return !is_volatile;
+ } else {
+ return false;
+ }
+}
+
+/*
+ * Write a value then if possible verify that we get the expected
+ * result. An optional expected value can be provided if we expect
+ * the write to fail, for verifying that invalid writes don't corrupt
+ * anything.
+ */
+int write_and_verify(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *write_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ int err, i;
+ bool error_expected, mismatch_shown;
+ snd_ctl_elem_value_t *read_val, *w_val;
+ snd_ctl_elem_value_alloca(&read_val);
+ snd_ctl_elem_value_alloca(&w_val);
+
+ /*
+ * We need to copy the write value since writing can modify
+ * the value which causes surprises, and allocate an expected
+ * value if we expect to read back what we wrote.
+ */
+ snd_ctl_elem_value_copy(w_val, write_val);
+ if (expected_val) {
+ error_expected = true;
+ } else {
+ error_expected = false;
+ snd_ctl_elem_value_alloca(&expected_val);
+ snd_ctl_elem_value_copy(expected_val, write_val);
+ }
+
+ /*
+ * Do the write, if we have an expected value ignore the error
+ * and carry on to validate the expected value.
+ */
+ err = snd_ctl_elem_write(ctl->card->handle, w_val);
+ if (err < 0 && !error_expected) {
+ ksft_print_msg("snd_ctl_elem_write() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /* Can we do the verification part? */
+ if (!snd_ctl_elem_info_is_readable(ctl->info))
+ return err;
+
+ snd_ctl_elem_value_set_id(read_val, ctl->id);
+
+ err = snd_ctl_elem_read(ctl->card->handle, read_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /*
+ * Use the libray to compare values, if there's a mismatch
+ * carry on and try to provide a more useful diagnostic than
+ * just "mismatch".
+ */
+ if (!snd_ctl_elem_value_compare(expected_val, read_val))
+ return 0;
+
+ mismatch_shown = false;
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (show_mismatch(ctl, i, read_val, expected_val))
+ mismatch_shown = true;
+
+ if (!mismatch_shown)
+ ksft_print_msg("%s read and written values differ\n",
+ ctl->name);
+
+ return -1;
+}
+
+/*
+ * Make sure we can write the default value back to the control, this
+ * should validate that at least some write works.
+ */
+void test_ctl_write_default(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* No idea what the default was for unreadable controls */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s couldn't read default\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ err = write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(err >= 0, "write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < 2; j++) {
+ snd_ctl_elem_value_set_boolean(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+bool test_ctl_write_valid_integer(struct ctl_data *ctl)
+{
+ int err;
+ int i;
+ long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min(ctl->info);
+ j <= snd_ctl_elem_info_get_max(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+
+ return !fail;
+}
+
+bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
+{
+ int err, i;
+ long long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step64(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min64(ctl->info);
+ j <= snd_ctl_elem_info_get_max64(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer64(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < snd_ctl_elem_info_get_items(ctl->info); j++) {
+ snd_ctl_elem_value_set_enumerated(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+void test_ctl_write_valid(struct ctl_data *ctl)
+{
+ bool pass;
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ pass = test_ctl_write_valid_boolean(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ pass = test_ctl_write_valid_integer(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ pass = test_ctl_write_valid_integer64(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ pass = test_ctl_write_valid_enumerated(ctl);
+ break;
+
+ default:
+ /* No tests for this yet */
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Restore the default value to minimise disruption */
+ err = write_and_verify(ctl, ctl->def_val, NULL);
+ if (err < 0)
+ pass = false;
+
+ ksft_test_result(pass, "write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+int main(void)
+{
+ struct ctl_data *ctl;
+
+ ksft_print_header();
+
+ find_controls();
+
+ ksft_set_plan(num_controls * TESTS_PER_CONTROL);
+
+ for (ctl = ctl_list; ctl != NULL; ctl = ctl->next) {
+ /*
+ * Must test get_value() before we write anything, the
+ * test stores the default value for later cleanup.
+ */
+ test_ctl_get_value(ctl);
+ test_ctl_write_default(ctl);
+ test_ctl_write_valid(ctl);
+ }
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3cb5ac5da087..8c129961accf 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -4,6 +4,7 @@
/aarch64/get-reg-list
/aarch64/psci_cpu_on_test
/aarch64/vgic_init
+/aarch64/vgic_irq
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 17342b575e85..ee8cf2149824 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -32,11 +32,16 @@ endif
ifeq ($(ARCH),s390)
UNAME_M := s390x
endif
+# Set UNAME_M riscv compile/install to work
+ifeq ($(ARCH),riscv)
+ UNAME_M := riscv
+endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
+LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
@@ -77,6 +82,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
+TEST_GEN_PROGS_x86_64 += x86_64/amx_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -96,6 +102,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
@@ -119,6 +126,13 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += demand_paging_test
+TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
+TEST_GEN_PROGS_riscv += kvm_page_table_test
+TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -133,7 +147,7 @@ endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
- -I$(<D) -Iinclude/$(UNAME_M) -I..
+ -I$(<D) -Iinclude/$(UNAME_M) -I.. $(EXTRA_CFLAGS)
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index bf6a45b0b8dc..9ad38bd360a4 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -382,7 +382,7 @@ static struct kvm_vm *test_vm_create(void)
ucall_init(vm, NULL);
test_init_timer_irq(vm);
- vgic_v3_setup(vm, nr_vcpus, GICD_BASE_GPA, GICR_BASE_GPA);
+ vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
/* Make all the test's cmdline args visible to the guest */
sync_global_to_guest(vm, test_args);
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index cc898181faab..f769fc6cd927 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -1014,6 +1014,22 @@ static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
};
+static __u64 pauth_addr_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
+};
+
#define BASE_SUBLIST \
{ "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
#define VREGS_SUBLIST \
@@ -1025,6 +1041,21 @@ static __u64 sve_rejects_set[] = {
{ "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
.regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
.rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST \
+ { \
+ .name = "pauth_address", \
+ .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
+ .regs = pauth_addr_regs, \
+ .regs_n = ARRAY_SIZE(pauth_addr_regs), \
+ }, \
+ { \
+ .name = "pauth_generic", \
+ .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
+ .regs = pauth_generic_regs, \
+ .regs_n = ARRAY_SIZE(pauth_generic_regs), \
+ }
static struct vcpu_config vregs_config = {
.sublists = {
@@ -1056,11 +1087,30 @@ static struct vcpu_config sve_pmu_config = {
{0},
},
};
+static struct vcpu_config pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
static struct vcpu_config *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
&sve_config,
&sve_pmu_config,
+ &pauth_config,
+ &pauth_pmu_config,
};
static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
new file mode 100644
index 000000000000..e6c7d7f8fbd1
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -0,0 +1,853 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+#define GICD_BASE_GPA 0x08000000ULL
+#define GICR_BASE_GPA 0x080A0000ULL
+#define VCPU_ID 0
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+ uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+ bool level_sensitive; /* 1 is level, 0 is edge */
+ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS 32
+#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+static void *dist = (void *)GICD_BASE_GPA;
+static void *redist = (void *)GICR_BASE_GPA;
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+ KVM_INJECT_EDGE_IRQ_LINE = 1,
+ KVM_SET_IRQ_LINE,
+ KVM_SET_IRQ_LINE_HIGH,
+ KVM_SET_LEVEL_INFO_HIGH,
+ KVM_INJECT_IRQFD,
+ KVM_WRITE_ISPENDR,
+ KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+ kvm_inject_cmd cmd;
+ uint32_t first_intid;
+ uint32_t num;
+ int level;
+ bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
+ kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num) \
+ _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid) \
+ kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+ kvm_inject_cmd cmd;
+ /* can inject PPIs, PPIs, and/or SPIs. */
+ bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+ /* sgi ppi spi */
+ { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, true, false, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+ /* sgi ppi spi */
+ { KVM_SET_IRQ_LINE_HIGH, false, true, true },
+ { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, false, true, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+ /* sgi ppi spi */
+ { KVM_WRITE_ISACTIVER, true, true, true },
+ { 0, },
+};
+
+#define for_each_inject_fn(t, f) \
+ for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f) \
+ for_each_inject_fn(t, f) \
+ if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f) \
+ for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+ int i;
+
+ irq_handled = 0;
+ for (i = 0; i <= MAX_SPI; i++)
+ irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+ uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1);
+
+ dsb(sy);
+ return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+ write_sysreg_s(val, SYS_ICV_AP1R0_EL1);
+ isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+ uint32_t intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(gic_irq_get_active(intid));
+
+ if (!level_sensitive)
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+ if (level_sensitive)
+ guest_set_irq_line(intid, 0);
+
+ GUEST_ASSERT(intid < MAX_SPI);
+ irqnr_received[intid] += 1;
+ irq_handled += 1;
+
+ gic_set_eoi(intid);
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ if (eoi_split)
+ gic_set_dir(intid);
+
+ GUEST_ASSERT(!gic_irq_get_active(intid));
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure)
+{
+ struct kvm_inject_args args = {
+ .cmd = cmd,
+ .first_intid = first_intid,
+ .num = num,
+ .level = level,
+ .expect_failure = expect_failure,
+ };
+ GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY() \
+do { \
+ uint32_t _intid; \
+ _intid = gic_get_and_ack_irq(); \
+ GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
+{ \
+ guest_irq_generic_handler(split, lev); \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+ {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+ {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+ int i;
+
+ for (i = 0; i < args->nr_irqs; i++)
+ gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+ kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+ uint32_t intid, kvm_inject_cmd cmd)
+{
+ reset_stats();
+
+ _KVM_INJECT(cmd, intid, true);
+ /* no IRQ to handle on entry */
+
+ GUEST_ASSERT_EQ(irq_handled, 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t i;
+
+ reset_stats();
+
+ /* Cycle over all priorities to make things more interesting. */
+ for (i = first_intid; i < num + first_intid; i++)
+ gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+ asm volatile("msr daifset, #2" : : : "memory");
+ KVM_INJECT_MULTI(cmd, first_intid, num);
+
+ while (irq_handled < num) {
+ asm volatile("wfi\n"
+ "msr daifclr, #2\n"
+ /* handle IRQ */
+ "msr daifset, #2\n"
+ : : : "memory");
+ }
+ asm volatile("msr daifclr, #2" : : : "memory");
+
+ GUEST_ASSERT_EQ(irq_handled, num);
+ for (i = first_intid; i < num + first_intid; i++)
+ GUEST_ASSERT_EQ(irqnr_received[i], 1);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs). This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t prio, intid, ap1r;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ /* In a real migration, KVM would restore all GIC state before running
+ * guest code.
+ */
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+ KVM_ACTIVATE(cmd, intid);
+ ap1r = gic_read_ap1r0();
+ ap1r |= 1U << i;
+ gic_write_ap1r0(ap1r);
+ }
+
+ /* This is where the "migration" would occur. */
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+ uint32_t intid;
+
+ do {
+ asm volatile("wfi" : : : "memory");
+ intid = gic_get_and_ack_irq();
+ } while (intid == IAR_SPURIOUS);
+
+ return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions. This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+ uint32_t first_intid, int num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ local_irq_disable();
+
+ for (i = 0; i < num; i++) {
+ uint32_t tmp;
+ intid = i + first_intid;
+ KVM_INJECT(cmd, intid);
+ /* Each successive IRQ will preempt the previous one. */
+ tmp = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(tmp, intid);
+ if (args->level_sensitive)
+ guest_set_irq_line(intid, 0);
+ }
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ local_irq_enable();
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+ uint32_t nr_irqs = args->nr_irqs;
+
+ if (f->sgi) {
+ guest_inject(args, MIN_SGI, 1, f->cmd);
+ guest_inject(args, 0, 16, f->cmd);
+ }
+
+ if (f->ppi)
+ guest_inject(args, MIN_PPI, 1, f->cmd);
+
+ if (f->spi) {
+ guest_inject(args, MIN_SPI, 1, f->cmd);
+ guest_inject(args, nr_irqs - 1, 1, f->cmd);
+ guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+ }
+}
+
+static void test_injection_failure(struct test_args *args,
+ struct kvm_inject_desc *f)
+{
+ uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+ test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /*
+ * Test up to 4 levels of preemption. The reason is that KVM doesn't
+ * currently implement the ability to have more than the number-of-LRs
+ * number of concurrently active IRQs. The number of LRs implemented is
+ * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+ */
+ if (f->sgi)
+ test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+ if (f->sgi)
+ guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args args)
+{
+ uint32_t i, nr_irqs = args.nr_irqs;
+ bool level_sensitive = args.level_sensitive;
+ struct kvm_inject_desc *f, *inject_fns;
+
+ gic_init(GIC_V3, 1, dist, redist);
+
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
+ for (i = MIN_SPI; i < nr_irqs; i++)
+ gic_irq_set_config(i, !args.level_sensitive);
+
+ gic_set_eoi_split(args.eoi_split);
+
+ reset_priorities(&args);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ inject_fns = level_sensitive ? inject_level_fns
+ : inject_edge_fns;
+
+ local_irq_enable();
+
+ /* Start the tests. */
+ for_each_supported_inject_fn(&args, inject_fns, f) {
+ test_injection(&args, f);
+ test_preemption(&args, f);
+ test_injection_failure(&args, f);
+ }
+
+ /* Restore the active state of IRQs. This would happen when live
+ * migrating IRQs in the middle of being handled.
+ */
+ for_each_supported_activate_fn(&args, set_active_fns, f)
+ test_restore_active(&args, f);
+
+ GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+ struct test_args *test_args, bool expect_failure)
+{
+ int ret;
+
+ if (!expect_failure) {
+ kvm_arm_irq_line(vm, intid, level);
+ } else {
+ /* The interface doesn't allow larger intid's. */
+ if (intid > KVM_ARM_IRQ_NUM_MASK)
+ return;
+
+ ret = _kvm_arm_irq_line(vm, intid, level);
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause KVM_IRQ_LINE "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+ bool expect_failure)
+{
+ if (!expect_failure) {
+ kvm_irq_set_level_info(gic_fd, intid, level);
+ } else {
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+ /*
+ * The kernel silently fails for invalid SPIs and SGIs (which
+ * are not level-sensitive). It only checks for intid to not
+ * spill over 1U << 10 (the max reserved SPI). Also, callers
+ * are supposed to mask the intid with 0x3ff (1023).
+ */
+ if (intid > VGIC_MAX_RESERVED)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ struct kvm_irq_routing *routing;
+ int ret;
+ uint64_t i;
+
+ assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+ routing = kvm_gsi_routing_create();
+ for (i = intid; i < (uint64_t)intid + num; i++)
+ kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+ if (!expect_failure) {
+ kvm_gsi_routing_write(vm, routing);
+ } else {
+ ret = _kvm_gsi_routing_write(vm, routing);
+ /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */
+ if (intid >= KVM_IRQCHIP_NUM_PINS)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+ uint32_t vcpu, bool expect_failure)
+{
+ /*
+ * Ignore this when expecting failure as invalid intids will lead to
+ * either trying to inject SGIs when we configured the test to be
+ * level_sensitive (or the reverse), or inject large intids which
+ * will lead to writing above the ISPENDR register space (and we
+ * don't want to do that either).
+ */
+ if (!expect_failure)
+ kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ int fd[MAX_SPI];
+ uint64_t val;
+ int ret, f;
+ uint64_t i;
+
+ /*
+ * There is no way to try injecting an SGI or PPI as the interface
+ * starts counting from the first SPI (above the private ones), so just
+ * exit.
+ */
+ if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+ return;
+
+ kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+ kvm_max_routes, expect_failure);
+
+ /*
+ * If expect_failure, then just to inject anyway. These
+ * will silently fail. And in any case, the guest will check
+ * that no actual interrupt was injected for those cases.
+ */
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ fd[f] = eventfd(0, 0);
+ TEST_ASSERT(fd[f] != -1,
+ "eventfd failed, errno: %i\n", errno);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ struct kvm_irqfd irqfd = {
+ .fd = fd[f],
+ .gsi = i - MIN_SPI,
+ };
+ assert(i <= (uint64_t)UINT_MAX);
+ vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ val = 1;
+ ret = write(fd[f], &val, sizeof(uint64_t));
+ TEST_ASSERT(ret == sizeof(uint64_t),
+ "Write to KVM_IRQFD failed with ret: %d\n", ret);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i) \
+ for ((tmp) = (i) = (first); \
+ (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vm *vm, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
+{
+ kvm_inject_cmd cmd = inject_args->cmd;
+ uint32_t intid = inject_args->first_intid;
+ uint32_t num = inject_args->num;
+ int level = inject_args->level;
+ bool expect_failure = inject_args->expect_failure;
+ uint64_t tmp;
+ uint32_t i;
+
+ /* handles the valid case: intid=0xffffffff num=1 */
+ assert(intid < UINT_MAX - num || num == 1);
+
+ switch (cmd) {
+ case KVM_INJECT_EDGE_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 0, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, level, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_LEVEL_INFO_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_set_level_info_check(gic_fd, i, 1,
+ expect_failure);
+ break;
+ case KVM_INJECT_IRQFD:
+ kvm_routing_and_irqfd_check(vm, intid, num,
+ test_args->kvm_max_routes,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISPENDR:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_ispendr_check(gic_fd, i,
+ VCPU_ID, expect_failure);
+ break;
+ case KVM_WRITE_ISACTIVER:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_isactiver(gic_fd, i, VCPU_ID);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args)
+{
+ struct kvm_inject_args *kvm_args_hva;
+ vm_vaddr_t kvm_args_gva;
+
+ kvm_args_gva = uc->args[1];
+ kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+ memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+ printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+ args->nr_irqs, args->level_sensitive,
+ args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+ struct ucall uc;
+ int gic_fd;
+ struct kvm_vm *vm;
+ struct kvm_inject_args inject_args;
+
+ struct test_args args = {
+ .nr_irqs = nr_irqs,
+ .level_sensitive = level_sensitive,
+ .eoi_split = eoi_split,
+ .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+ .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+ };
+
+ print_args(&args);
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ /* Setup the guest args page (so it gets the args). */
+ vcpu_args_set(vm, 0, 1, args);
+
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
+ GICD_BASE_GPA, GICR_BASE_GPA);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+ while (1) {
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ kvm_inject_get_call(vm, &uc, &inject_args);
+ run_guest_cmd(vm, gic_fd, &inject_args, &args);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+ printf(
+ "\n"
+ "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+ printf(" -n: specify number of IRQs to setup the vgic with. "
+ "It has to be a multiple of 32 and between 64 and 1024.\n");
+ printf(" -e: if 1 then EOI is split into a write to DIR on top "
+ "of writing EOI.\n");
+ printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+ puts("");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t nr_irqs = 64;
+ bool default_args = true;
+ bool level_sensitive = false;
+ int opt;
+ bool eoi_split = false;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+ switch (opt) {
+ case 'n':
+ nr_irqs = atoi(optarg);
+ if (nr_irqs > 1024 || nr_irqs % 32)
+ help(argv[0]);
+ break;
+ case 'e':
+ eoi_split = (bool)atoi(optarg);
+ default_args = false;
+ break;
+ case 'l':
+ level_sensitive = (bool)atoi(optarg);
+ default_args = false;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ /* If the user just specified nr_irqs and/or gic_version, then run all
+ * combinations.
+ */
+ if (default_args) {
+ test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ } else {
+ test_vgic(nr_irqs, level_sensitive, eoi_split);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
index 85dd1e53048e..b217ea17cac5 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -11,11 +11,37 @@ enum gic_type {
GIC_TYPE_MAX,
};
+#define MIN_SGI 0
+#define MIN_PPI 16
+#define MIN_SPI 32
+#define MAX_SPI 1019
+#define IAR_SPURIOUS 1023
+
+#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
void gic_init(enum gic_type type, unsigned int nr_cpus,
void *dist_base, void *redist_base);
void gic_irq_enable(unsigned int intid);
void gic_irq_disable(unsigned int intid);
unsigned int gic_get_and_ack_irq(void);
void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
index b51536d469a6..ba0886e8a2bb 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
@@ -16,8 +16,12 @@
#define GICD_IGROUPR 0x0080
#define GICD_ISENABLER 0x0100
#define GICD_ICENABLER 0x0180
+#define GICD_ISPENDR 0x0200
+#define GICD_ICPENDR 0x0280
#define GICD_ICACTIVER 0x0380
+#define GICD_ISACTIVER 0x0300
#define GICD_IPRIORITYR 0x0400
+#define GICD_ICFGR 0x0C00
/*
* The assumption is that the guest runs in a non-secure mode.
@@ -49,16 +53,24 @@
#define GICR_IGROUPR0 GICD_IGROUPR
#define GICR_ISENABLER0 GICD_ISENABLER
#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ISACTIVER0 GICD_ISACTIVER
#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_ICENABLER GICD_ICENABLER
+#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
/* CPU interface registers */
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0)
+
#define ICC_PMR_DEF_PRIO 0xf0
#define ICC_SRE_EL1_SRE (1U << 0)
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 27d8e1bb5b36..8f9f46979a00 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -113,6 +113,9 @@ enum {
#define ESR_EC_WP_CURRENT 0x35
#define ESR_EC_BRK_INS 0x3c
+void aarch64_get_supported_page_sizes(uint32_t ipa,
+ bool *ps4k, bool *ps16k, bool *ps64k);
+
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
index 0ecfb253893c..4442081221a0 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -14,7 +14,21 @@
((uint64_t)(flags) << 12) | \
index)
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
-#endif /* SELFTEST_KVM_VGIC_H */
+#define VGIC_MAX_RESERVED 1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2d62edc49d67..c9286811a4cb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,412 +7,7 @@
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H
-#include "test_util.h"
-
-#include "asm/kvm.h"
-#include "linux/list.h"
-#include "linux/kvm.h"
-#include <sys/ioctl.h>
-
-#include "sparsebit.h"
-
-#define KVM_DEV_PATH "/dev/kvm"
-#define KVM_MAX_VCPUS 512
-
-#define NSEC_PER_SEC 1000000000L
-
-/*
- * Callers of kvm_util only have an incomplete/opaque description of the
- * structure kvm_util is using to maintain the state of a VM.
- */
-struct kvm_vm;
-
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-
-/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-#define DEFAULT_GUEST_PHY_PAGES 512
-#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
-
-enum vm_guest_mode {
- VM_MODE_P52V48_4K,
- VM_MODE_P52V48_64K,
- VM_MODE_P48V48_4K,
- VM_MODE_P48V48_64K,
- VM_MODE_P40V48_4K,
- VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
- VM_MODE_P47V64_4K,
- VM_MODE_P44V64_4K,
- NUM_VM_MODES,
-};
-
-#if defined(__aarch64__)
-
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__x86_64__)
-
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__s390x__)
-
-#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 16)
-
-#endif
-
-#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
-#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
-extern const struct vm_guest_mode_params vm_guest_mode_params[];
-
-int open_path_or_exit(const char *path, int flags);
-int open_kvm_dev_path_or_exit(void);
-int kvm_check_cap(long cap);
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
-int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
- struct kvm_enable_cap *cap);
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
-const char *vm_guest_mode_string(uint32_t i);
-
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp, int perm);
-void kvm_vm_release(struct kvm_vm *vmp);
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages);
-uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
-
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
-
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
-
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by @vcpuid, within the VM
- * given by @vm, to the FILE stream given by @stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
- uint8_t indent);
-
-void vm_create_irqchip(struct kvm_vm *vm);
-
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
-void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
-
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug);
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num function input registers of the VCPU with @vcpuid,
- * per the C calling convention of the architecture, to the values given
- * as variable args. Each of the variable args is expected to be of type
- * uint64_t. The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-#endif
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state);
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error);
-#endif
-void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
-
-int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
-int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
-int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
-int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
- void *val, bool write);
-
-int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr);
-int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
- uint64_t attr, void *val, bool write);
-
-const char *exit_reason_str(unsigned int exit_reason);
-
-void virt_pgd_alloc(struct kvm_vm *vm);
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
-
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot);
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
-
-/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The number of extra pages to add (this will
- * decide how much extra space we will need to
- * setup the page tables using memslot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- */
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code);
-
-/* Same as vm_create_default, but can be used for more than one vcpu */
-struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
-
-/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
-struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
- uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
- uint32_t num_percpu_pages, void *guest_code,
- uint32_t vcpuids[]);
-
-/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
- */
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-unsigned int vm_get_page_size(struct kvm_vm *vm);
-unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
-uint64_t vm_get_max_gfn(struct kvm_vm *vm);
-int vm_get_fd(struct kvm_vm *vm);
-
-unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
-unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
-unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
-static inline unsigned int
-vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
-{
- unsigned int n;
- n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
- return n;
-}
-
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end);
-
-struct kvm_dirty_log *
-allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
-
-int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-
-#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(_p, &(g), sizeof(g)); \
-})
-
-#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(&(g), _p, sizeof(g)); \
-})
-
-void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
-
-/* Common ucalls */
-enum {
- UCALL_NONE,
- UCALL_SYNC,
- UCALL_ABORT,
- UCALL_DONE,
- UCALL_UNHANDLED,
-};
-
-#define UCALL_MAX_ARGS 6
-
-struct ucall {
- uint64_t cmd;
- uint64_t args[UCALL_MAX_ARGS];
-};
-
-void ucall_init(struct kvm_vm *vm, void *arg);
-void ucall_uninit(struct kvm_vm *vm);
-void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
-
-#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
- ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
-#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
-#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- _condstr, __LINE__, _args); \
-} while (0)
-
-#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
-
-int vm_get_stats_fd(struct kvm_vm *vm);
-int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
-
-uint32_t guest_get_vcpuid(void);
+#include "kvm_util_base.h"
+#include "ucall_common.h"
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
new file mode 100644
index 000000000000..66775de26952
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -0,0 +1,399 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util_base.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UTIL_BASE_H
+#define SELFTEST_KVM_UTIL_BASE_H
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/list.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+#define KVM_DEV_PATH "/dev/kvm"
+#define KVM_MAX_VCPUS 512
+
+#define NSEC_PER_SEC 1000000000L
+
+/*
+ * Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
+ VM_MODE_P48V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
+ VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P36V47_16K,
+ NUM_VM_MODES,
+};
+
+#if defined(__aarch64__)
+
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__x86_64__)
+
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 16)
+
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#endif
+
+#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
+
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+int kvm_check_cap(long cap);
+int vm_check_cap(struct kvm_vm *vm, long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages);
+uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
+
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by @vcpuid, within the VM
+ * given by @vm, to the FILE stream given by @stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
+ uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
+int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
+ void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
+void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ unsigned int npages);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_guest_debug *debug);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num function input registers of the VCPU with @vcpuid,
+ * per the C calling convention of the architecture, to the values given
+ * as variable args. Each of the variable args is expected to be of type
+ * uint64_t. The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+
+void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_sregs *sregs);
+void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_fpu *fpu);
+void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
+#ifdef __KVM_HAVE_VCPU_EVENTS
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+#endif
+#ifdef __x86_64__
+void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state);
+int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_nested_state *state, bool ignore_error);
+#endif
+void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid);
+
+int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
+int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr);
+int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd);
+int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test);
+int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
+ void *val, bool write);
+int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
+ void *val, bool write);
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr);
+int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr);
+int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr, void *val, bool write);
+int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+ uint64_t attr, void *val, bool write);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm);
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The number of extra pages to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using memslot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code);
+
+/* Same as vm_create_default, but can be used for more than one vcpu */
+struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
+
+/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
+struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
+ uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
+ uint32_t num_percpu_pages, void *guest_code,
+ uint32_t vcpuids[]);
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+void vm_xsave_req_perm(void);
+
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+unsigned int vm_get_page_size(struct kvm_vm *vm);
+unsigned int vm_get_page_shift(struct kvm_vm *vm);
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+uint64_t vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
+
+uint32_t guest_get_vcpuid(void);
+
+#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
new file mode 100644
index 000000000000..dc284c6bdbc3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V processor specific defines
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include "kvm_util.h"
+#include <linux/stringify.h>
+
+static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx,
+ uint64_t size)
+{
+ return KVM_REG_RISCV | type | idx | size;
+}
+
+#if __riscv_xlen == 64
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U64
+#else
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32
+#endif
+
+#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, \
+ KVM_REG_RISCV_CONFIG_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, \
+ KVM_REG_RISCV_CORE_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \
+ KVM_REG_RISCV_CSR_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, \
+ KVM_REG_RISCV_TIMER_REG(name), \
+ KVM_REG_SIZE_U64)
+
+static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
+ unsigned long *addr)
+{
+ struct kvm_one_reg reg;
+
+ reg.id = id;
+ reg.addr = (unsigned long)addr;
+ vcpu_get_reg(vm, vcpuid, &reg);
+}
+
+static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id,
+ unsigned long val)
+{
+ struct kvm_one_reg reg;
+
+ reg.id = id;
+ reg.addr = (unsigned long)&val;
+ vcpu_set_reg(vm, vcpuid, &reg);
+}
+
+/* L3 index Bit[47:39] */
+#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
+#define PGTBL_L3_INDEX_SHIFT 39
+#define PGTBL_L3_BLOCK_SHIFT 39
+#define PGTBL_L3_BLOCK_SIZE 0x0000008000000000ULL
+#define PGTBL_L3_MAP_MASK (~(PGTBL_L3_BLOCK_SIZE - 1))
+/* L2 index Bit[38:30] */
+#define PGTBL_L2_INDEX_MASK 0x0000007FC0000000ULL
+#define PGTBL_L2_INDEX_SHIFT 30
+#define PGTBL_L2_BLOCK_SHIFT 30
+#define PGTBL_L2_BLOCK_SIZE 0x0000000040000000ULL
+#define PGTBL_L2_MAP_MASK (~(PGTBL_L2_BLOCK_SIZE - 1))
+/* L1 index Bit[29:21] */
+#define PGTBL_L1_INDEX_MASK 0x000000003FE00000ULL
+#define PGTBL_L1_INDEX_SHIFT 21
+#define PGTBL_L1_BLOCK_SHIFT 21
+#define PGTBL_L1_BLOCK_SIZE 0x0000000000200000ULL
+#define PGTBL_L1_MAP_MASK (~(PGTBL_L1_BLOCK_SIZE - 1))
+/* L0 index Bit[20:12] */
+#define PGTBL_L0_INDEX_MASK 0x00000000001FF000ULL
+#define PGTBL_L0_INDEX_SHIFT 12
+#define PGTBL_L0_BLOCK_SHIFT 12
+#define PGTBL_L0_BLOCK_SIZE 0x0000000000001000ULL
+#define PGTBL_L0_MAP_MASK (~(PGTBL_L0_BLOCK_SIZE - 1))
+
+#define PGTBL_PTE_ADDR_MASK 0x003FFFFFFFFFFC00ULL
+#define PGTBL_PTE_ADDR_SHIFT 10
+#define PGTBL_PTE_RSW_MASK 0x0000000000000300ULL
+#define PGTBL_PTE_RSW_SHIFT 8
+#define PGTBL_PTE_DIRTY_MASK 0x0000000000000080ULL
+#define PGTBL_PTE_DIRTY_SHIFT 7
+#define PGTBL_PTE_ACCESSED_MASK 0x0000000000000040ULL
+#define PGTBL_PTE_ACCESSED_SHIFT 6
+#define PGTBL_PTE_GLOBAL_MASK 0x0000000000000020ULL
+#define PGTBL_PTE_GLOBAL_SHIFT 5
+#define PGTBL_PTE_USER_MASK 0x0000000000000010ULL
+#define PGTBL_PTE_USER_SHIFT 4
+#define PGTBL_PTE_EXECUTE_MASK 0x0000000000000008ULL
+#define PGTBL_PTE_EXECUTE_SHIFT 3
+#define PGTBL_PTE_WRITE_MASK 0x0000000000000004ULL
+#define PGTBL_PTE_WRITE_SHIFT 2
+#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL
+#define PGTBL_PTE_READ_SHIFT 1
+#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_EXECUTE_MASK | \
+ PGTBL_PTE_WRITE_MASK | \
+ PGTBL_PTE_READ_MASK)
+#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
+#define PGTBL_PTE_VALID_SHIFT 0
+
+#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
+#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
+
+#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
+#define SATP_MODE_39 _AC(0x8000000000000000, UL)
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT 44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
new file mode 100644
index 000000000000..9eecc9d40b79
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UCALL_COMMON_H
+#define SELFTEST_KVM_UCALL_COMMON_H
+
+/* Common ucalls */
+enum {
+ UCALL_NONE,
+ UCALL_SYNC,
+ UCALL_ABORT,
+ UCALL_DONE,
+ UCALL_UNHANDLED,
+};
+
+#define UCALL_MAX_ARGS 6
+
+struct ucall {
+ uint64_t cmd;
+ uint64_t args[UCALL_MAX_ARGS];
+};
+
+void ucall_init(struct kvm_vm *vm, void *arg);
+void ucall_uninit(struct kvm_vm *vm);
+void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_DONE() ucall(UCALL_DONE, 0)
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ _condstr, __LINE__, _args); \
+} while (0)
+
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT(_condition, #_condition, 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+ __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+ __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+ __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+ __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+
+#endif /* SELFTEST_KVM_UCALL_COMMON_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 05e65ca1c30c..e94ba0fc67d8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -10,8 +10,10 @@
#include <assert.h>
#include <stdint.h>
+#include <syscall.h>
#include <asm/msr-index.h>
+#include <asm/prctl.h>
#include "../kvm_util.h"
@@ -92,6 +94,21 @@ struct desc_ptr {
uint64_t address;
} __attribute__((packed));
+struct kvm_x86_state {
+ struct kvm_xsave *xsave;
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
return ((uint64_t)desc->base3 << 32) |
@@ -348,10 +365,10 @@ static inline unsigned long get_xmm(int n)
bool is_intel_cpu(void);
-struct kvm_x86_state;
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
struct kvm_msr_list *kvm_get_msr_index_list(void);
uint64_t kvm_get_feature_msr(uint64_t msr_index);
@@ -443,4 +460,11 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
+#define XSTATE_XTILE_CFG_BIT 17
+#define XSTATE_XTILE_DATA_BIT 18
+
+#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
+#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK | \
+ XSTATE_XTILE_DATA_MASK)
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
index fff4fc27504d..55668631d546 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -93,3 +93,69 @@ void gic_set_eoi(unsigned int intid)
GUEST_ASSERT(gic_common_ops);
gic_common_ops->gic_write_eoir(intid);
}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
index d81d739433dc..75d07313c893 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -14,6 +14,17 @@ struct gic_common_ops {
void (*gic_irq_disable)(unsigned int intid);
uint64_t (*gic_read_iar)(void);
void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
};
extern const struct gic_common_ops gicv3_ops;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 2dbf3339b62e..00f613c0583c 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -19,7 +19,8 @@ struct gicv3_data {
unsigned int nr_spis;
};
-#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
enum gicv3_intid_range {
SGI_RANGE,
@@ -50,6 +51,14 @@ static void gicv3_gicr_wait_for_rwp(void *redist_base)
}
}
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+}
+
static enum gicv3_intid_range get_intid_range(unsigned int intid)
{
switch (intid) {
@@ -81,39 +90,175 @@ static void gicv3_write_eoir(uint32_t irq)
isb();
}
-static void
-gicv3_config_irq(unsigned int intid, unsigned int offset)
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /* All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
{
uint32_t cpu = guest_get_vcpuid();
- uint32_t mask = 1 << (intid % 32);
enum gicv3_intid_range intid_range = get_intid_range(intid);
- void *reg;
-
- /* We care about 'cpu' only for SGIs or PPIs */
- if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) {
- GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
-
- reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) +
- offset;
- writel(mask, reg);
- gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]);
- } else if (intid_range == SPI_RANGE) {
- reg = gicv3_data.dist_base + offset + (intid / 32) * 4;
- writel(mask, reg);
- gicv3_gicd_wait_for_rwp();
- } else {
- GUEST_ASSERT(0);
- }
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(*val < (1U << bits_per_field));
+ /* Some registers like IROUTER are 64 bit long. Those are currently not
+ * supported by readl nor writel, so just asserting here until then.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
}
-static void gicv3_irq_enable(unsigned int intid)
+static void gicv3_irq_clear_pending(uint32_t intid)
{
- gicv3_config_irq(intid, GICD_ISENABLER);
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
}
-static void gicv3_irq_disable(unsigned int intid)
+static bool gicv3_irq_get_pending(uint32_t intid)
{
- gicv3_config_irq(intid, GICD_ICENABLER);
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
}
static void gicv3_enable_redist(void *redist_base)
@@ -237,4 +382,15 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_disable = gicv3_irq_disable,
.gic_read_iar = gicv3_read_iar,
.gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index b4eeeafd2a70..9343d82519b4 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -8,6 +8,7 @@
#include <linux/compiler.h>
#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
@@ -237,6 +238,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ /* Configure base granule size */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
@@ -245,25 +247,47 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
break;
case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
case VM_MODE_P40V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
+ case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
@@ -432,3 +456,47 @@ uint32_t guest_get_vcpuid(void)
{
return read_sysreg(tpidr_el1);
}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa,
+ bool *ps4k, bool *ps16k, bool *ps64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa);
+ TEST_ASSERT(vm_fd >= 0, "Can't create VM");
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu");
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, "Can't get target");
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, "Can't get init vcpu");
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, "Can't get MMFR0");
+
+ *ps4k = ((val >> 28) & 0xf) != 0xf;
+ *ps64k = ((val >> 24) & 0xf) == 0;
+ *ps16k = ((val >> 20) & 0xf) != 0;
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+/*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+void __attribute__((constructor)) init_guest_modes(void)
+{
+ guest_modes_append_default();
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b9b271ff520d..b3a0fca0d780 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -5,11 +5,14 @@
#include <linux/kvm.h>
#include <linux/sizes.h>
+#include <asm/kvm_para.h>
#include <asm/kvm.h>
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
/*
* vGIC-v3 default host setup
@@ -28,7 +31,7 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
{
int gic_fd;
@@ -50,6 +53,13 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
/* Distributor setup */
gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS,
+ 0, &nr_irqs, true);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
@@ -68,3 +78,94 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
return gic_fd;
}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val, false);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val, true);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid,
+ uint32_t vcpu, uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu == 0);
+ attr += SZ_64K;
+ }
+
+ /* All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_access(gic_fd, group, attr, &val, false);
+ val |= 1ULL << index;
+ kvm_device_access(gic_fd, group, attr, &val, true);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index c330f414ef96..8784013b747c 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -4,22 +4,59 @@
*/
#include "guest_modes.h"
+#ifdef __aarch64__
+#include "processor.h"
+enum vm_guest_mode vm_mode_default;
+#endif
+
struct guest_mode guest_modes[NUM_VM_MODES];
void guest_modes_append_default(void)
{
+#ifndef __aarch64__
guest_mode_append(VM_MODE_DEFAULT, true, true);
-
-#ifdef __aarch64__
- guest_mode_append(VM_MODE_P40V48_64K, true, true);
+#else
{
unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ bool ps4k, ps16k, ps64k;
+ int i;
+
+ aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k);
+
+ vm_mode_default = NUM_VM_MODES;
+
if (limit >= 52)
- guest_mode_append(VM_MODE_P52V48_64K, true, true);
+ guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k);
if (limit >= 48) {
- guest_mode_append(VM_MODE_P48V48_4K, true, true);
- guest_mode_append(VM_MODE_P48V48_64K, true, true);
+ guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k);
+ }
+ if (limit >= 40) {
+ guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k);
+ if (ps4k)
+ vm_mode_default = VM_MODE_P40V48_4K;
}
+ if (limit >= 36) {
+ guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k);
+ guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k);
+ guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k);
+ guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k);
+ }
+
+ /*
+ * Pick the first supported IPA size if the default
+ * isn't available.
+ */
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES,
+ "No supported mode!");
}
#endif
#ifdef __s390x__
@@ -38,6 +75,16 @@ void guest_modes_append_default(void)
guest_mode_append(VM_MODE_P47V64_4K, true, true);
}
#endif
+#ifdef __riscv
+ {
+ unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+
+ if (sz >= 52)
+ guest_mode_append(VM_MODE_P52V48_4K, true, true);
+ if (sz >= 48)
+ guest_mode_append(VM_MODE_P48V48_4K, true, true);
+ }
+#endif
}
void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 53d2b5d04b82..4a645dc77f34 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -85,6 +85,33 @@ int kvm_check_cap(long cap)
return ret;
}
+/* VM Check Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
/* VM Enable Capability
*
* Input Args:
@@ -166,12 +193,18 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
[VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
[VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
+ [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
[VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
+ [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
[VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
+ [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
+ [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
+ [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -185,12 +218,18 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
[VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
[VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
[VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
[VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
[VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
[VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
[VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
[VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
+ [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
+ [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
+ [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -252,9 +291,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
vm->pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ vm->pgtable_levels = 3;
+ break;
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ vm->pgtable_levels = 4;
+ break;
+ case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
case VM_MODE_PXXV48_4K:
@@ -344,6 +393,11 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
struct kvm_vm *vm;
int i;
+ /*
+ * Permission needs to be requested before KVM_SET_CPUID2.
+ */
+ vm_xsave_req_perm();
+
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
@@ -2087,6 +2141,78 @@ int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
}
/*
+ * IRQ related functions.
+ */
+
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+{
+ struct kvm_irq_level irq_level = {
+ .irq = irq,
+ .level = level,
+ };
+
+ return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
+}
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
+{
+ int ret = _kvm_irq_line(vm, irq, level);
+
+ TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno);
+}
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void)
+{
+ struct kvm_irq_routing *routing;
+ size_t size;
+
+ size = sizeof(struct kvm_irq_routing);
+ /* Allocate space for the max number of entries: this wastes 196 KBs. */
+ size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
+ routing = calloc(1, size);
+ assert(routing);
+
+ return routing;
+}
+
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin)
+{
+ int i;
+
+ assert(routing);
+ assert(routing->nr < KVM_MAX_IRQ_ROUTES);
+
+ i = routing->nr;
+ routing->entries[i].gsi = gsi;
+ routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ routing->entries[i].flags = 0;
+ routing->entries[i].u.irqchip.irqchip = 0;
+ routing->entries[i].u.irqchip.pin = pin;
+ routing->nr++;
+}
+
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
+{
+ int ret;
+
+ assert(routing);
+ ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing);
+ free(routing);
+
+ return ret;
+}
+
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
+{
+ int ret;
+
+ ret = _kvm_gsi_routing_write(vm, routing);
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/*
* VM Dump
*
* Input Args:
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
new file mode 100644
index 000000000000..d377f2603d98
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V code
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
+ PGTBL_PAGE_SIZE_SHIFT;
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+}
+
+static uint64_t pte_index_mask[] = {
+ PGTBL_L0_INDEX_MASK,
+ PGTBL_L1_INDEX_MASK,
+ PGTBL_L2_INDEX_MASK,
+ PGTBL_L3_INDEX_MASK,
+};
+
+static uint32_t pte_index_shift[] = {
+ PGTBL_L0_INDEX_SHIFT,
+ PGTBL_L1_INDEX_SHIFT,
+ PGTBL_L2_INDEX_SHIFT,
+ PGTBL_L3_INDEX_SHIFT,
+};
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ TEST_ASSERT(level > -1,
+ "Negative page table level (%d) not possible", level);
+ TEST_ASSERT(level < vm->pgtable_levels,
+ "Invalid page table level (%d)", level);
+
+ return (gva & pte_index_mask[level]) >> pte_index_shift[level];
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_pages_alloc(vm,
+ page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+ }
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t *ptep, next_ppn;
+ int level = vm->pgtable_levels - 1;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ if (!*ptep) {
+ next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, vaddr, level) * 8;
+ if (!*ptep && level > 0) {
+ next_ppn = vm_alloc_page_table(vm) >>
+ PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+ }
+
+ paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
+}
+
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+ int level = vm->pgtable_levels - 1;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
+ gva, level);
+ exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
+ uint64_t pte, *ptep;
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
+ type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+#endif
+}
+
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = vm->pgtable_levels - 1;
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
+ pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+}
+
+void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid)
+{
+ unsigned long satp;
+
+ /*
+ * The RISC-V Sv48 MMU mode supports 56-bit physical address
+ * for 48-bit virtual address with 4KB last level page size.
+ */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= SATP_MODE_48;
+
+ set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp);
+}
+
+void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_riscv_core core;
+
+ get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5);
+ get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+
+ fprintf(stream,
+ " MODE: 0x%lx\n", core.mode);
+ fprintf(stream,
+ " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
+ core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
+ fprintf(stream,
+ " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
+ core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
+ fprintf(stream,
+ " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
+ core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
+ fprintf(stream,
+ " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
+ core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
+ fprintf(stream,
+ " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
+ core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
+ fprintf(stream,
+ " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
+ core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
+ fprintf(stream,
+ " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
+ core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
+ fprintf(stream,
+ " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
+ core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
+}
+
+static void guest_hang(void)
+{
+ while (1)
+ ;
+}
+
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ int r;
+ size_t stack_size = vm->page_size == 4096 ?
+ DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
+ unsigned long current_gp = 0;
+ struct kvm_mp_state mps;
+
+ vm_vcpu_add(vm, vcpuid);
+ riscv_vcpu_mmu_setup(vm, vcpuid);
+
+ /*
+ * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
+ * powered-off by default so we ensure that all secondary VCPUs
+ * are powered-on using KVM_SET_MP_STATE ioctl().
+ */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps);
+ TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
+
+ /* Setup global pointer of guest to be same as the host */
+ asm volatile (
+ "add %0, gp, zero" : "=r" (current_gp) : : "memory");
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp);
+
+ /* Setup stack pointer and program counter of guest */
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp),
+ stack_vaddr + stack_size);
+ set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc),
+ (unsigned long)guest_code);
+
+ /* Setup default exception vector of guest */
+ set_reg(vm, vcpuid, RISCV_CSR_REG(stvec),
+ (unsigned long)guest_hang);
+}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ uint64_t id = RISCV_CORE_REG(regs.a0);
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u\n", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ switch (i) {
+ case 0:
+ id = RISCV_CORE_REG(regs.a0);
+ break;
+ case 1:
+ id = RISCV_CORE_REG(regs.a1);
+ break;
+ case 2:
+ id = RISCV_CORE_REG(regs.a2);
+ break;
+ case 3:
+ id = RISCV_CORE_REG(regs.a3);
+ break;
+ case 4:
+ id = RISCV_CORE_REG(regs.a4);
+ break;
+ case 5:
+ id = RISCV_CORE_REG(regs.a5);
+ break;
+ case 6:
+ id = RISCV_CORE_REG(regs.a6);
+ break;
+ case 7:
+ id = RISCV_CORE_REG(regs.a7);
+ break;
+ };
+ set_reg(vm, vcpuid, id, va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
new file mode 100644
index 000000000000..9e42d8248fa6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+
+void ucall_init(struct kvm_vm *vm, void *arg)
+{
+}
+
+void ucall_uninit(struct kvm_vm *vm)
+{
+}
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ struct sbiret ret;
+
+ asm volatile (
+ "ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ return ret;
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall uc = {
+ .cmd = cmd,
+ };
+ va_list va;
+ int i;
+
+ nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ uc.args[i] = va_arg(va, uint64_t);
+ va_end(va);
+
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, 0, (vm_vaddr_t)&uc,
+ 0, 0, 0, 0, 0);
+}
+
+uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct ucall ucall = {};
+
+ if (uc)
+ memset(uc, 0, sizeof(*uc));
+
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
+ run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT &&
+ run->riscv_sbi.function_id == 0) {
+ memcpy(&ucall, addr_gva2hva(vm, run->riscv_sbi.args[0]),
+ sizeof(ucall));
+
+ vcpu_run_complete_io(vm, vcpu_id);
+ if (uc)
+ memcpy(uc, &ucall, sizeof(ucall));
+ }
+
+ return ucall.cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index eef7b34756d5..babb0f28575c 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -650,6 +650,45 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
vcpu_sregs_set(vm, vcpuid, &sregs);
}
+#define CPUID_XFD_BIT (1 << 4)
+static bool is_xfd_supported(void)
+{
+ int eax, ebx, ecx, edx;
+ const int leaf = 0xd, subleaf = 0x1;
+
+ __asm__ __volatile__(
+ "cpuid"
+ : /* output */ "=a"(eax), "=b"(ebx),
+ "=c"(ecx), "=d"(edx)
+ : /* input */ "0"(leaf), "2"(subleaf));
+
+ return !!(eax & CPUID_XFD_BIT);
+}
+
+void vm_xsave_req_perm(void)
+{
+ unsigned long bitmask;
+ long rc;
+
+ if (!is_xfd_supported())
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
+ XSTATE_XTILE_DATA_BIT);
+ /*
+ * The older kernel version(<5.15) can't support
+ * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
+ */
+ if (rc)
+ return;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK,
+ "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
+ bitmask);
+}
+
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
struct kvm_mp_state mp_state;
@@ -1017,21 +1056,6 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
sregs_dump(stream, &sregs, indent + 4);
}
-struct kvm_x86_state {
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xsave xsave;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
static int kvm_get_num_msrs_fd(int kvm_fd)
{
struct kvm_msr_list nmsrs;
@@ -1069,6 +1093,22 @@ struct kvm_msr_list *kvm_get_msr_index_list(void)
return list;
}
+static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu,
+ struct kvm_x86_state *state)
+{
+ int size;
+
+ size = vm_check_cap(vm, KVM_CAP_XSAVE2);
+ if (!size)
+ size = sizeof(struct kvm_xsave);
+
+ state->xsave = malloc(size);
+ if (size == sizeof(struct kvm_xsave))
+ return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave);
+ else
+ return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave);
+}
+
struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
@@ -1112,7 +1152,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
r);
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+ r = vcpu_save_xsave_state(vm, vcpu, state);
TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
r);
@@ -1157,24 +1197,25 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int r;
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+ r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
r);
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+ TEST_ASSERT(r == state->msrs.nmsrs,
+ "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+ r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+
if (kvm_check_cap(KVM_CAP_XCRS)) {
r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
r);
}
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+ r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
r);
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
-
r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
r);
@@ -1198,6 +1239,12 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s
}
}
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+ free(state->xsave);
+ free(state);
+}
+
bool is_intel_cpu(void)
{
int eax, ebx, ecx, edx;
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
new file mode 100644
index 000000000000..523c1e99ed64
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define VCPU_ID 0
+#define X86_FEATURE_XSAVE (1 << 26)
+#define X86_FEATURE_OSXSAVE (1 << 27)
+
+#define PAGE_SIZE (1 << 12)
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XFEATURE_XTILECFG 17
+#define XFEATURE_XTILEDATA 18
+#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define TILE_CPUID 0x1d
+#define XSTATE_CPUID 0xd
+#define TILE_PALETTE_CPUID_SUBLEAVE 0x1
+#define XSTATE_USER_STATE_SUBLEAVE 0x0
+
+#define XSAVE_HDR_OFFSET 512
+
+struct xsave_data {
+ u8 area[XSAVE_SIZE];
+} __aligned(64);
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline u64 __xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void __xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xsave_data *data, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (data), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void check_cpuid_xsave(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ eax = 1;
+ ecx = 0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!(ecx & X86_FEATURE_XSAVE))
+ GUEST_ASSERT(!"cpuid: no CPU xsave support!");
+ if (!(ecx & X86_FEATURE_OSXSAVE))
+ GUEST_ASSERT(!"cpuid: no OS xsave support!");
+}
+
+static bool check_xsave_supports_xtile(void)
+{
+ return __xgetbv(0) & XFEATURE_MASK_XTILE;
+}
+
+static bool enum_xtile_config(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = TILE_CPUID;
+ ecx = TILE_PALETTE_CPUID_SUBLEAVE;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx || !ecx)
+ return false;
+
+ xtile.max_names = ebx >> 16;
+ if (xtile.max_names < NUM_TILES)
+ return false;
+
+ xtile.bytes_per_tile = eax >> 16;
+ if (xtile.bytes_per_tile < TILE_SIZE)
+ return false;
+
+ xtile.bytes_per_row = ebx;
+ xtile.max_rows = ecx;
+
+ return true;
+}
+
+static bool enum_xsave_tile(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = XSTATE_CPUID;
+ ecx = XFEATURE_XTILEDATA;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (!eax || !ebx)
+ return false;
+
+ xtile.xsave_offset = ebx;
+ xtile.xsave_size = eax;
+
+ return true;
+}
+
+static bool check_xsave_size(void)
+{
+ u32 eax, ebx, ecx, edx;
+ bool valid = false;
+
+ eax = XSTATE_CPUID;
+ ecx = XSTATE_USER_STATE_SUBLEAVE;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ if (ebx && ebx <= XSAVE_SIZE)
+ valid = true;
+
+ return valid;
+}
+
+static bool check_xtile_info(void)
+{
+ bool ret = false;
+
+ if (!check_xsave_size())
+ return ret;
+
+ if (!enum_xsave_tile())
+ return ret;
+
+ if (!enum_xtile_config())
+ return ret;
+
+ if (sizeof(struct tile_data) >= xtile.xsave_size)
+ ret = true;
+
+ return ret;
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void set_xstatebv(void *data, uint64_t bv)
+{
+ *(uint64_t *)(data + XSAVE_HDR_OFFSET) = bv;
+}
+
+static u64 get_xstatebv(void *data)
+{
+ return *(u64 *)(data + XSAVE_HDR_OFFSET);
+}
+
+static void init_regs(void)
+{
+ uint64_t cr4, xcr0;
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+
+ xcr0 = __xgetbv(0);
+ xcr0 |= XFEATURE_MASK_XTILE;
+ __xsetbv(0x0, xcr0);
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xsave_data *xsave_data)
+{
+ init_regs();
+ check_cpuid_xsave();
+ GUEST_ASSERT(check_xsave_supports_xtile());
+ GUEST_ASSERT(check_xtile_info());
+
+ /* check xtile configs */
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(xtile.max_names == 8);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ GUEST_ASSERT(xtile.max_rows == 16);
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /* bit 18 not in the XCOMP_BV after xsavec() */
+ set_xstatebv(xsave_data, XFEATURE_MASK_XTILEDATA);
+ __xsavec(xsave_data, XFEATURE_MASK_XTILEDATA);
+ GUEST_ASSERT((get_xstatebv(xsave_data) & XFEATURE_MASK_XTILEDATA) == 0);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILEDATA);
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILEDATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILEDATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILEDATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_regs regs1, regs2;
+ bool amx_supported = false;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int xsave_restore_size = 0;
+ vm_vaddr_t amx_cfg, tiledata, xsavedata;
+ struct ucall uc;
+ u32 amx_offset;
+ int stage, ret;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ entry = kvm_get_supported_cpuid_entry(1);
+ if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+ print_skip("XSAVE feature not supported");
+ exit(KSFT_SKIP);
+ }
+
+ if (kvm_get_cpuid_max_basic() >= 0xd) {
+ entry = kvm_get_supported_cpuid_index(0xd, 0);
+ amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE);
+ if (!amx_supported) {
+ print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax);
+ exit(KSFT_SKIP);
+ }
+ /* Get xsave/restore max size */
+ xsave_restore_size = entry->ecx;
+ }
+
+ run = vcpu_state(vm, VCPU_ID);
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ /* Register #NM handler */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* xsave data for guest_code */
+ xsavedata = vm_vaddr_alloc_pages(vm, 3);
+ memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize());
+ vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata);
+
+ for (stage = 1; ; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vm, VCPU_ID);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 2b46dcca86a8..4c7841dfd481 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -129,7 +129,7 @@ static void save_restore_vm(struct kvm_vm *vm)
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
vcpu_load_state(vm, VCPU_ID, state);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
index 29b18d565cf4..80056bbbb003 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -21,7 +21,7 @@
#define NR_LOCK_TESTING_THREADS 3
#define NR_LOCK_TESTING_ITERATIONS 10000
-static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data, __u32 *fw_error)
{
struct kvm_sev_cmd cmd = {
.id = cmd_id,
@@ -31,9 +31,19 @@ static void sev_ioctl(int vm_fd, int cmd_id, void *data)
int ret;
ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
- TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
+ *fw_error = cmd.error;
+ return ret;
+}
+
+static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ int ret;
+ __u32 fw_error;
+
+ ret = __sev_ioctl(vm_fd, cmd_id, data, &fw_error);
+ TEST_ASSERT(ret == 0 && fw_error == SEV_RET_SUCCESS,
"%d failed: return code: %d, errno: %d, fw error: %d",
- cmd_id, ret, errno, cmd.error);
+ cmd_id, ret, errno, fw_error);
}
static struct kvm_vm *sev_vm_create(bool es)
@@ -225,12 +235,45 @@ static void sev_mirror_create(int dst_fd, int src_fd)
TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno);
}
+static void verify_mirror_allowed_cmds(int vm_fd)
+{
+ struct kvm_sev_guest_status status;
+
+ for (int cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+ __u32 fw_error;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d\n",
+ cmd_id, ret, errno);
+ }
+
+ sev_ioctl(vm_fd, KVM_SEV_GUEST_STATUS, &status);
+}
+
static void test_sev_mirror(bool es)
{
struct kvm_vm *src_vm, *dst_vm;
- struct kvm_sev_launch_start start = {
- .policy = es ? SEV_POLICY_ES : 0
- };
int i;
src_vm = sev_vm_create(es);
@@ -241,10 +284,12 @@ static void test_sev_mirror(bool es)
/* Check that we can complete creation of the mirror VM. */
for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
vm_vcpu_add(dst_vm, i);
- sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start);
+
if (es)
sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+ verify_mirror_allowed_cmds(dst_vm->fd);
+
kvm_vm_free(src_vm);
kvm_vm_free(dst_vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index d0fe2fdce58c..2da8eb8e2d96 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -212,7 +212,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 32854c1462ad..2e0a92da8ff5 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -218,7 +218,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a07480aed397..ff92e25b6f1e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -244,7 +244,7 @@ int main(int argc, char *argv[])
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);
- free(state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
vcpu_regs_get(vm, VCPU_ID, &regs2);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index a0699f00b3d6..478e0ae8b93e 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -14,6 +14,9 @@
#include <stdint.h>
#include <time.h>
#include <sched.h>
+#include <signal.h>
+
+#include <sys/eventfd.h>
#define VCPU_ID 5
@@ -22,10 +25,15 @@
#define SHINFO_REGION_SLOT 10
#define PAGE_SIZE 4096
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (2 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
@@ -73,15 +81,37 @@ struct vcpu_info {
struct pvclock_vcpu_time_info time;
}; /* 64 bytes (x86) */
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
#define RUNSTATE_running 0
#define RUNSTATE_runnable 1
#define RUNSTATE_blocked 2
#define RUNSTATE_offline 3
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
static void evtchn_handler(struct ex_regs *regs)
{
struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
vi->evtchn_upcall_pending = 0;
+ vi->evtchn_pending_sel = 0;
GUEST_SYNC(0x20);
}
@@ -127,7 +157,25 @@ static void guest_code(void)
GUEST_SYNC(6);
GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
- GUEST_DONE();
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(7);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(8);
+
+ while (!si->evtchn_pending[1])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(9);
+
+ for (;;)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
}
static int cmp_timespec(struct timespec *a, struct timespec *b)
@@ -144,9 +192,18 @@ static int cmp_timespec(struct timespec *a, struct timespec *b)
return 0;
}
+static void handle_alrm(int sig)
+{
+ TEST_FAIL("IRQ delivery timed out");
+}
+
int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
+ bool verbose;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
@@ -155,6 +212,7 @@ int main(int argc, char *argv[])
}
bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
clock_gettime(CLOCK_REALTIME, &min_ts);
@@ -166,6 +224,11 @@ int main(int argc, char *argv[])
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
+ struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
.msr = XEN_HYPERCALL_MSR,
@@ -184,6 +247,16 @@ int main(int argc, char *argv[])
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
struct kvm_xen_vcpu_attr vi = {
.type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
.u.gpa = VCPU_INFO_ADDR,
@@ -214,6 +287,49 @@ int main(int argc, char *argv[])
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
}
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = eventfd(0, 0);
+ irq_fd[1] = eventfd(0, 0);
+
+ /* Unexpected, but not a KVM failure */
+ if (irq_fd[0] == -1 || irq_fd[1] == -1)
+ do_eventfd_tests = false;
+ }
+
+ if (do_eventfd_tests) {
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = 15;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = 66;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes);
+
+ struct kvm_irqfd ifd = { };
+
+ ifd.fd = irq_fd[0];
+ ifd.gsi = 32;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ ifd.fd = irq_fd[1];
+ ifd.gsi = 33;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
vinfo->evtchn_upcall_pending = 0;
@@ -248,6 +364,8 @@ int main(int argc, char *argv[])
switch (uc.args[1]) {
case 0:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
evtchn_irq_expected = true;
vinfo->evtchn_upcall_pending = 1;
break;
@@ -256,11 +374,16 @@ int main(int argc, char *argv[])
TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
if (!do_runstate_tests)
goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
rst.u.runstate.state = uc.args[1];
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
+
case 4:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = (uint64_t)-1;
@@ -274,6 +397,8 @@ int main(int argc, char *argv[])
break;
case 5:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
memset(&rst.u, 0, sizeof(rst.u));
rst.u.runstate.state = RUNSTATE_running;
@@ -282,16 +407,54 @@ int main(int argc, char *argv[])
rst.u.runstate.time_offline = 0x5a;
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
break;
+
case 6:
+ if (verbose)
+ printf("Testing steal time\n");
/* Yield until scheduler delay exceeds target */
rundelay = get_run_delay() + MIN_STEAL_TIME;
do {
sched_yield();
} while (get_run_delay() < rundelay);
break;
+
+ case 7:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 0x8000;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case 8:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case 9:
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
case 0x20:
TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
evtchn_irq_expected = false;
+ if (shinfo->evtchn_pending[1] &&
+ shinfo->evtchn_pending[0])
+ goto done;
break;
}
break;
@@ -318,6 +481,16 @@ int main(int argc, char *argv[])
ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
vm_ts.tv_sec = wc->sec;
vm_ts.tv_nsec = wc->nsec;
TEST_ASSERT(wc->version && !(wc->version & 1),
@@ -341,6 +514,15 @@ int main(int argc, char *argv[])
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
"State entry time mismatch");
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
index 1b4d95d575f8..14fedeef762e 100755
--- a/tools/testing/selftests/lkdtm/stack-entropy.sh
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -4,13 +4,27 @@
# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
set -e
samples="${1:-1000}"
+TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+KSELFTEST_SKIP_TEST=4
+
+# Verify we have LKDTM available in the kernel.
+if [ ! -r $TRIGGER ] ; then
+ /sbin/modprobe -q lkdtm || true
+ if [ ! -r $TRIGGER ] ; then
+ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
+ else
+ echo "Cannot write $TRIGGER (need to run as root?)"
+ fi
+ # Skip this test
+ exit $KSELFTEST_SKIP_TEST
+fi
# Capture dmesg continuously since it may fill up depending on sample size.
log=$(mktemp -t stack-entropy-XXXXXX)
dmesg --follow >"$log" & pid=$!
report=-1
for i in $(seq 1 $samples); do
- echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT
+ echo "REPORT_STACK" > $TRIGGER
if [ -t 1 ]; then
percent=$(( 100 * $i / $samples ))
if [ "$percent" -ne "$report" ]; then
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
index b0b20e0b4e30..f43aa4b77fba 100755
--- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -44,7 +44,10 @@ mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush
for m in $mitigations
do
- do_one "$m" &
+ if [[ -f /sys/kernel/debug/powerpc/$m ]]
+ then
+ do_one "$m" &
+ fi
done
echo "Spawned threads enabling/disabling mitigations ..."
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index adc2b7294e5f..83647b8277e7 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -193,7 +193,7 @@ int spectre_v2_test(void)
* We are not vulnerable and reporting otherwise, so
* missing such a mismatch is safe.
*/
- if (state == VULNERABLE)
+ if (miss_percent > 95)
return 4;
return 1;
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index ce3375cd8e73..9d0915777fed 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -4,3 +4,5 @@ signal_tm
sigfuz
sigreturn_vdso
sig_sc_double_restart
+sigreturn_kernel
+sigreturn_unaligned
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index d6ae54663aed..f679d260afc8 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
+TEST_GEN_PROGS += sigreturn_kernel
+TEST_GEN_PROGS += sigreturn_unaligned
CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
new file mode 100644
index 000000000000..0a1b6e591eee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that we can't sigreturn to kernel addresses, or to kernel mode.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#define MSR_PR (1ul << 14)
+
+static volatile unsigned long long sigreturn_addr;
+static volatile unsigned long long sigreturn_msr_mask;
+
+static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr)
+{
+ ucontext_t *uc = (ucontext_t *)uc_ptr;
+
+ if (sigreturn_addr)
+ UCONTEXT_NIA(uc) = sigreturn_addr;
+
+ if (sigreturn_msr_mask)
+ UCONTEXT_MSR(uc) &= sigreturn_msr_mask;
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ raise(SIGUSR1);
+ exit(0);
+ }
+
+ return pid;
+}
+
+static int expect_segv(pid_t pid)
+{
+ int child_ret;
+
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(WIFEXITED(child_ret));
+ FAIL_IF(!WIFSIGNALED(child_ret));
+ FAIL_IF(WTERMSIG(child_ret) != 11);
+
+ return 0;
+}
+
+int test_sigreturn_kernel(void)
+{
+ struct sigaction act;
+ int child_ret, i;
+ pid_t pid;
+
+ act.sa_sigaction = sigusr1_handler;
+ act.sa_flags = SA_SIGINFO;
+ sigemptyset(&act.sa_mask);
+
+ FAIL_IF(sigaction(SIGUSR1, &act, NULL));
+
+ for (i = 0; i < 2; i++) {
+ // Return to kernel
+ sigreturn_addr = 0xcull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to kernel virtual
+ sigreturn_addr = 0xc008ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return out of range
+ sigreturn_addr = 0xc010ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, just below PAGE_OFFSET
+ sigreturn_addr = (0xcull << 60) - (64 * 1024);
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, above TASK_SIZE_4PB
+ sigreturn_addr = 0x1ull << 52;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xd space
+ sigreturn_addr = 0xdull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xe space
+ sigreturn_addr = 0xeull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xf space
+ sigreturn_addr = 0xfull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Attempt to set PR=0 for 2nd loop (should be blocked by kernel)
+ sigreturn_msr_mask = ~MSR_PR;
+ }
+
+ printf("All children killed as expected\n");
+
+ // Don't change address, just MSR, should return to user as normal
+ sigreturn_addr = 0;
+ sigreturn_msr_mask = ~MSR_PR;
+ pid = fork_child();
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(!WIFEXITED(child_ret));
+ FAIL_IF(WIFSIGNALED(child_ret));
+ FAIL_IF(WEXITSTATUS(child_ret) != 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_kernel, "sigreturn_kernel");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
new file mode 100644
index 000000000000..6e58ee4f0fdf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test sigreturn to an unaligned address, ie. low 2 bits set.
+ * Nothing bad should happen.
+ * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = ptr;
+
+ UCONTEXT_NIA(uc) |= 3;
+}
+
+static int test_sigreturn_unaligned(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = sigusr1_handler;
+ action.sa_flags = SA_SIGINFO;
+
+ FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1);
+
+ raise(SIGUSR1);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned");
+}
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index fe8fcfb334e0..a5cb4b09a46c 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -24,19 +24,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
reservation_usage_file=rsvd.current
fi
-cgroup_path=/dev/cgroup/memory
-if [[ ! -e $cgroup_path ]]; then
- mkdir -p $cgroup_path
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup2 none $cgroup_path
- else
+ do_umount=1
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+else
+ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $cgroup_path
+ do_umount=1
fi
fi
-
-if [[ $cgroup2 ]]; then
- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
-fi
+export cgroup_path
function cleanup() {
if [[ $cgroup2 ]]; then
@@ -108,7 +112,7 @@ function setup_cgroup() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get depleted.
while [ $(cat $path) != 0 ]; do
echo Waiting for hugetlb memory to get depleted.
@@ -121,7 +125,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory reservation to reach size $size.
@@ -134,7 +138,7 @@ function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory to reach size $size.
@@ -574,5 +578,7 @@ for populate in "" "-o"; do
done # populate
done # method
-umount $cgroup_path
-rmdir $cgroup_path
+if [[ $do_umount ]]; then
+ umount $cgroup_path
+ rmdir $cgroup_path
+fi
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 864f126ffd78..203323967b50 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1251,6 +1251,48 @@ TEST_F(hmm, anon_teardown)
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
TEST_F(hmm2, snapshot)
{
struct hmm_buffer *buffer;
diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index 257df94697a5..2a7c33631a29 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -4,7 +4,11 @@
*
* Example of remapping huge page memory in a user application using the
* mremap system call. Code assumes a hugetlbfs filesystem is mounted
- * at './huge'. The code will use 10MB worth of huge pages.
+ * at './huge'. The amount of memory used by this test is decided by a command
+ * line argument in MBs. If missing, the default amount is 10MB.
+ *
+ * To make sure the test triggers pmd sharing and goes through the 'unshare'
+ * path in the mremap code use 1GB (1024) or more.
*/
#define _GNU_SOURCE
@@ -18,8 +22,10 @@
#include <linux/userfaultfd.h>
#include <sys/ioctl.h>
-#define LENGTH (1UL * 1024 * 1024 * 1024)
+#define DEFAULT_LENGTH_MB 10UL
+#define MB_TO_BYTES(x) (x * 1024 * 1024)
+#define FILE_NAME "huge/hugepagefile"
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
@@ -28,20 +34,20 @@ static void check_bytes(char *addr)
printf("First hex is %x\n", *((unsigned int *)addr));
}
-static void write_bytes(char *addr)
+static void write_bytes(char *addr, size_t len)
{
unsigned long i;
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
*(addr + i) = (char)i;
}
-static int read_bytes(char *addr)
+static int read_bytes(char *addr, size_t len)
{
unsigned long i;
check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < len; i++)
if (*(addr + i) != (char)i) {
printf("Mismatch at %lu\n", i);
return 1;
@@ -99,11 +105,19 @@ static void register_region_with_uffd(char *addr, size_t len)
}
}
-int main(void)
+int main(int argc, char *argv[])
{
+ /* Read memory length as the first arg if valid, otherwise fallback to
+ * the default length. Any additional args are ignored.
+ */
+ size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL;
+
+ length = length > 0 ? length : DEFAULT_LENGTH_MB;
+ length = MB_TO_BYTES(length);
+
int ret = 0;
- int fd = open("/huge/test", O_CREAT | O_RDWR, 0755);
+ int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
if (fd < 0) {
perror("Open failed");
@@ -112,7 +126,7 @@ int main(void)
/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
unsigned long suggested_addr = 0x7eaa40000000;
- void *haddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map haddr: Returned address is %p\n", haddr);
if (haddr == MAP_FAILED) {
@@ -122,7 +136,7 @@ int main(void)
/* mmap again to a dummy address to hopefully trigger pmd sharing. */
suggested_addr = 0x7daa40000000;
- void *daddr = mmap((void *)suggested_addr, LENGTH, PROTECTION,
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
printf("Map daddr: Returned address is %p\n", daddr);
if (daddr == MAP_FAILED) {
@@ -132,16 +146,16 @@ int main(void)
suggested_addr = 0x7faa40000000;
void *vaddr =
- mmap((void *)suggested_addr, LENGTH, PROTECTION, FLAGS, -1, 0);
+ mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
printf("Map vaddr: Returned address is %p\n", vaddr);
if (vaddr == MAP_FAILED) {
perror("mmap2");
exit(1);
}
- register_region_with_uffd(haddr, LENGTH);
+ register_region_with_uffd(haddr, length);
- void *addr = mremap(haddr, LENGTH, LENGTH,
+ void *addr = mremap(haddr, length, length,
MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
if (addr == MAP_FAILED) {
perror("mremap");
@@ -150,10 +164,10 @@ int main(void)
printf("Mremap: Returned address is %p\n", addr);
check_bytes(addr);
- write_bytes(addr);
- ret = read_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
- munmap(addr, LENGTH);
+ munmap(addr, length);
return ret;
}
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index 4a9a3afe9fd4..bf2d2a684edf 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -18,19 +18,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
usage_file=current
fi
-CGROUP_ROOT='/dev/cgroup/memory'
-MNT='/mnt/huge/'
-if [[ ! -e $CGROUP_ROOT ]]; then
- mkdir -p $CGROUP_ROOT
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup2 none $CGROUP_ROOT
- sleep 1
- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
- else
+ do_umount=1
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+else
+ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ do_umount=1
fi
fi
+MNT='/mnt/huge/'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index a24d30af3094..75d401741394 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -111,7 +111,7 @@ fi
echo "-----------------------"
echo "running hugepage-mremap"
echo "-----------------------"
-./hugepage-mremap
+./hugepage-mremap 256
if [ $? -ne 0 ]; then
echo "[FAIL]"
exitcode=1
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9354a5e0321c..d3fd24f9fae8 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -87,7 +87,7 @@ static bool test_uffdio_minor = false;
static bool map_shared;
static int shm_fd;
-static int huge_fd = -1; /* only used for hugetlb_shared test */
+static int huge_fd;
static char *huge_fd_off0;
static unsigned long long *count_verify;
static int uffd = -1;
@@ -223,9 +223,6 @@ static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
static void hugetlb_release_pages(char *rel_area)
{
- if (huge_fd == -1)
- return;
-
if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
nr_pages * page_size))
@@ -238,17 +235,17 @@ static void hugetlb_allocate_area(void **alloc_area)
char **alloc_area_alias;
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- map_shared ? MAP_SHARED :
- MAP_PRIVATE | MAP_HUGETLB |
+ (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+ MAP_HUGETLB |
(*alloc_area == area_src ? 0 : MAP_NORESERVE),
- huge_fd,
- *alloc_area == area_src ? 0 : nr_pages * page_size);
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
if (*alloc_area == MAP_FAILED)
err("mmap of hugetlbfs file failed");
if (map_shared) {
area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED,
+ MAP_SHARED | MAP_HUGETLB,
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
if (area_alias == MAP_FAILED)
@@ -648,7 +645,7 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret != sizeof(*msg)) {
if (ret < 0) {
- if (errno == EAGAIN)
+ if (errno == EAGAIN || errno == EINTR)
return 1;
err("blocking read error");
} else {
@@ -724,8 +721,11 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (ret <= 0)
+ if (ret <= 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
err("poll error: %d", ret);
+ }
if (pollfd[1].revents & POLLIN) {
if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
err("read pipefd error");
@@ -1417,7 +1417,6 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize)
static int userfaultfd_stress(void)
{
void *area;
- char *tmp_area;
unsigned long nr;
struct uffdio_register uffdio_register;
struct uffd_stats uffd_stats[nr_cpus];
@@ -1528,13 +1527,9 @@ static int userfaultfd_stress(void)
count_verify[nr], nr);
/* prepare next bounce */
- tmp_area = area_src;
- area_src = area_dst;
- area_dst = tmp_area;
+ swap(area_src, area_dst);
- tmp_area = area_src_alias;
- area_src_alias = area_dst_alias;
- area_dst_alias = tmp_area;
+ swap(area_src_alias, area_dst_alias);
uffd_stats_report(uffd_stats, nr_cpus);
}
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
index d3d0d108924d..70a02301f4c2 100644
--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
@@ -14,7 +14,7 @@ want_sleep=$8
reserve=$9
echo "Putting task in cgroup '$cgroup'"
-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
echo "Method is $method"
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
new file mode 100644
index 000000000000..2d52ff0bff7d
--- /dev/null
+++ b/tools/tracing/rtla/Makefile
@@ -0,0 +1,102 @@
+NAME := rtla
+VERSION := 0.5
+
+# From libtracefs:
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+$(call allow-override,PKG_CONFIG,pkg-config)
+$(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/)
+$(call allow-override,LDCONFIG,ldconfig)
+
+INSTALL = install
+FOPTS := -flto=auto -ffat-lto-objects -fexceptions -fstack-protector-strong \
+ -fasynchronous-unwind-tables -fstack-clash-protection
+WOPTS := -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -Wno-maybe-uninitialized
+
+TRACEFS_HEADERS := $$($(PKG_CONFIG) --cflags libtracefs)
+
+CFLAGS := -O -g -DVERSION=\"$(VERSION)\" $(FOPTS) $(MOPTS) $(WOPTS) $(TRACEFS_HEADERS)
+LDFLAGS := -ggdb
+LIBS := $$($(PKG_CONFIG) --libs libtracefs) -lprocps
+
+SRC := $(wildcard src/*.c)
+HDR := $(wildcard src/*.h)
+OBJ := $(SRC:.c=.o)
+DIRS := src
+FILES := Makefile README.txt
+CEXT := bz2
+TARBALL := $(NAME)-$(VERSION).tar.$(CEXT)
+TAROPTS := -cvjf $(TARBALL)
+BINDIR := /usr/bin
+DATADIR := /usr/share
+DOCDIR := $(DATADIR)/doc
+MANDIR := $(DATADIR)/man
+LICDIR := $(DATADIR)/licenses
+SRCTREE := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
+
+# If running from the tarball, man pages are stored in the Documentation
+# dir. If running from the kernel source, man pages are stored in
+# Documentation/tools/rtla/.
+ifneq ($(wildcard Documentation/.*),)
+DOCSRC = Documentation/
+else
+DOCSRC = $(SRCTREE)/../../../Documentation/tools/rtla/
+endif
+
+.PHONY: all
+all: rtla
+
+rtla: $(OBJ) doc
+ $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS)
+
+static: $(OBJ)
+ $(CC) -o rtla-static $(LDFLAGS) --static $(OBJ) $(LIBS) -lpthread -ldl
+
+.PHONY: install
+install: doc_install
+ $(INSTALL) -d -m 755 $(DESTDIR)$(BINDIR)
+ $(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR)
+ $(STRIP) $(DESTDIR)$(BINDIR)/rtla
+ @test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise
+ ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/osnoise
+ @test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat
+ ln -s $(DESTDIR)$(BINDIR)/rtla $(DESTDIR)$(BINDIR)/timerlat
+
+.PHONY: clean tarball
+clean: doc_clean
+ @test ! -f rtla || rm rtla
+ @test ! -f rtla-static || rm rtla-static
+ @test ! -f src/rtla.o || rm src/rtla.o
+ @test ! -f $(TARBALL) || rm -f $(TARBALL)
+ @rm -rf *~ $(OBJ) *.tar.$(CEXT)
+
+tarball: clean
+ rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION)
+ cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION)
+ mkdir $(NAME)-$(VERSION)/Documentation/
+ cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/
+ tar $(TAROPTS) --exclude='*~' $(NAME)-$(VERSION)
+ rm -rf $(NAME)-$(VERSION)
+
+.PHONY: doc doc_clean doc_install
+doc:
+ $(MAKE) -C $(DOCSRC)
+
+doc_clean:
+ $(MAKE) -C $(DOCSRC) clean
+
+doc_install:
+ $(MAKE) -C $(DOCSRC) install
diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt
new file mode 100644
index 000000000000..6c88446f7e74
--- /dev/null
+++ b/tools/tracing/rtla/README.txt
@@ -0,0 +1,36 @@
+RTLA: Real-Time Linux Analysis tools
+
+The rtla is a meta-tool that includes a set of commands that
+aims to analyze the real-time properties of Linux. But, instead of
+testing Linux as a black box, rtla leverages kernel tracing
+capabilities to provide precise information about the properties
+and root causes of unexpected results.
+
+Installing RTLA
+
+RTLA depends on some libraries and tools. More precisely, it depends on the
+following libraries:
+
+ - libtracefs
+ - libtraceevent
+ - procps
+
+It also depends on python3-docutils to compile man pages.
+
+For development, we suggest the following steps for compiling rtla:
+
+ $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git
+ $ cd libtraceevent/
+ $ make
+ $ sudo make install
+ $ cd ..
+ $ git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git
+ $ cd libtracefs/
+ $ make
+ $ sudo make install
+ $ cd ..
+ $ cd $rtla_src
+ $ make
+ $ sudo make install
+
+For further information, please refer to the rtla man page.
diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c
new file mode 100644
index 000000000000..7b73d1eccd0e
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise.c
@@ -0,0 +1,875 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "osnoise.h"
+#include "utils.h"
+
+/*
+ * osnoise_get_cpus - return the original "osnoise/cpus" content
+ *
+ * It also saves the value to be restored.
+ */
+char *osnoise_get_cpus(struct osnoise_context *context)
+{
+ if (context->curr_cpus)
+ return context->curr_cpus;
+
+ if (context->orig_cpus)
+ return context->orig_cpus;
+
+ context->orig_cpus = tracefs_instance_file_read(NULL, "osnoise/cpus", NULL);
+
+ /*
+ * The error value (NULL) is the same for tracefs_instance_file_read()
+ * and this functions, so:
+ */
+ return context->orig_cpus;
+}
+
+/*
+ * osnoise_set_cpus - configure osnoise to run on *cpus
+ *
+ * "osnoise/cpus" file is used to set the cpus in which osnoise/timerlat
+ * will run. This function opens this file, saves the current value,
+ * and set the cpus passed as argument.
+ */
+int osnoise_set_cpus(struct osnoise_context *context, char *cpus)
+{
+ char *orig_cpus = osnoise_get_cpus(context);
+ char buffer[1024];
+ int retval;
+
+ if (!orig_cpus)
+ return -1;
+
+ context->curr_cpus = strdup(cpus);
+ if (!context->curr_cpus)
+ return -1;
+
+ snprintf(buffer, 1024, "%s\n", cpus);
+
+ debug_msg("setting cpus to %s from %s", cpus, context->orig_cpus);
+
+ retval = tracefs_instance_file_write(NULL, "osnoise/cpus", buffer);
+ if (retval < 0) {
+ free(context->curr_cpus);
+ context->curr_cpus = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_cpus - restore the original "osnoise/cpus"
+ *
+ * osnoise_set_cpus() saves the original data for the "osnoise/cpus"
+ * file. This function restore the original config it was previously
+ * modified.
+ */
+void osnoise_restore_cpus(struct osnoise_context *context)
+{
+ int retval;
+
+ if (!context->orig_cpus)
+ return;
+
+ if (!context->curr_cpus)
+ return;
+
+ /* nothing to do? */
+ if (!strcmp(context->orig_cpus, context->curr_cpus))
+ goto out_done;
+
+ debug_msg("restoring cpus to %s", context->orig_cpus);
+
+ retval = tracefs_instance_file_write(NULL, "osnoise/cpus", context->orig_cpus);
+ if (retval < 0)
+ err_msg("could not restore original osnoise cpus\n");
+
+out_done:
+ free(context->curr_cpus);
+ context->curr_cpus = NULL;
+}
+
+/*
+ * osnoise_put_cpus - restore cpus config and cleanup data
+ */
+void osnoise_put_cpus(struct osnoise_context *context)
+{
+ osnoise_restore_cpus(context);
+
+ if (!context->orig_cpus)
+ return;
+
+ free(context->orig_cpus);
+ context->orig_cpus = NULL;
+}
+
+/*
+ * osnoise_read_ll_config - read a long long value from a config
+ *
+ * returns -1 on error.
+ */
+static long long osnoise_read_ll_config(char *rel_path)
+{
+ long long retval;
+ char *buffer;
+
+ buffer = tracefs_instance_file_read(NULL, rel_path, NULL);
+ if (!buffer)
+ return -1;
+
+ /* get_llong_from_str returns -1 on error */
+ retval = get_llong_from_str(buffer);
+
+ debug_msg("reading %s returned %lld\n", rel_path, retval);
+
+ free(buffer);
+
+ return retval;
+}
+
+/*
+ * osnoise_write_ll_config - write a long long value to a config in rel_path
+ *
+ * returns -1 on error.
+ */
+static long long osnoise_write_ll_config(char *rel_path, long long value)
+{
+ char buffer[BUFF_U64_STR_SIZE];
+ long long retval;
+
+ snprintf(buffer, sizeof(buffer), "%lld\n", value);
+
+ debug_msg("setting %s to %lld\n", rel_path, value);
+
+ retval = tracefs_instance_file_write(NULL, rel_path, buffer);
+ return retval;
+}
+
+/*
+ * osnoise_get_runtime - return the original "osnoise/runtime_us" value
+ *
+ * It also saves the value to be restored.
+ */
+unsigned long long osnoise_get_runtime(struct osnoise_context *context)
+{
+ long long runtime_us;
+
+ if (context->runtime_us != OSNOISE_TIME_INIT_VAL)
+ return context->runtime_us;
+
+ if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_runtime_us;
+
+ runtime_us = osnoise_read_ll_config("osnoise/runtime_us");
+ if (runtime_us < 0)
+ goto out_err;
+
+ context->orig_runtime_us = runtime_us;
+ return runtime_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_period - return the original "osnoise/period_us" value
+ *
+ * It also saves the value to be restored.
+ */
+unsigned long long osnoise_get_period(struct osnoise_context *context)
+{
+ long long period_us;
+
+ if (context->period_us != OSNOISE_TIME_INIT_VAL)
+ return context->period_us;
+
+ if (context->orig_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_period_us;
+
+ period_us = osnoise_read_ll_config("osnoise/period_us");
+ if (period_us < 0)
+ goto out_err;
+
+ context->orig_period_us = period_us;
+ return period_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+static int __osnoise_write_runtime(struct osnoise_context *context,
+ unsigned long long runtime)
+{
+ int retval;
+
+ if (context->orig_runtime_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/runtime_us", runtime);
+ if (retval < 0)
+ return -1;
+
+ context->runtime_us = runtime;
+ return 0;
+}
+
+static int __osnoise_write_period(struct osnoise_context *context,
+ unsigned long long period)
+{
+ int retval;
+
+ if (context->orig_period_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/period_us", period);
+ if (retval < 0)
+ return -1;
+
+ context->period_us = period;
+ return 0;
+}
+
+/*
+ * osnoise_set_runtime_period - set osnoise runtime and period
+ *
+ * Osnoise's runtime and period are related as runtime <= period.
+ * Thus, this function saves the original values, and then tries
+ * to set the runtime and period if they are != 0.
+ */
+int osnoise_set_runtime_period(struct osnoise_context *context,
+ unsigned long long runtime,
+ unsigned long long period)
+{
+ unsigned long long curr_runtime_us;
+ unsigned long long curr_period_us;
+ int retval;
+
+ if (!period && !runtime)
+ return 0;
+
+ curr_runtime_us = osnoise_get_runtime(context);
+ curr_period_us = osnoise_get_period(context);
+
+ /* error getting any value? */
+ if (curr_period_us == OSNOISE_TIME_INIT_VAL || curr_runtime_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ if (!period) {
+ if (runtime > curr_period_us)
+ return -1;
+ return __osnoise_write_runtime(context, runtime);
+ } else if (!runtime) {
+ if (period < curr_runtime_us)
+ return -1;
+ return __osnoise_write_period(context, period);
+ }
+
+ if (runtime > curr_period_us) {
+ retval = __osnoise_write_period(context, period);
+ if (retval)
+ return -1;
+ retval = __osnoise_write_runtime(context, runtime);
+ if (retval)
+ return -1;
+ } else {
+ retval = __osnoise_write_runtime(context, runtime);
+ if (retval)
+ return -1;
+ retval = __osnoise_write_period(context, period);
+ if (retval)
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_runtime_period - restore the original runtime and period
+ */
+void osnoise_restore_runtime_period(struct osnoise_context *context)
+{
+ unsigned long long orig_runtime = context->orig_runtime_us;
+ unsigned long long orig_period = context->orig_period_us;
+ unsigned long long curr_runtime = context->runtime_us;
+ unsigned long long curr_period = context->period_us;
+ int retval;
+
+ if ((orig_runtime == OSNOISE_TIME_INIT_VAL) && (orig_period == OSNOISE_TIME_INIT_VAL))
+ return;
+
+ if ((orig_period == curr_period) && (orig_runtime == curr_runtime))
+ goto out_done;
+
+ retval = osnoise_set_runtime_period(context, orig_runtime, orig_period);
+ if (retval)
+ err_msg("Could not restore original osnoise runtime/period\n");
+
+out_done:
+ context->runtime_us = OSNOISE_TIME_INIT_VAL;
+ context->period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_put_runtime_period - restore original values and cleanup data
+ */
+void osnoise_put_runtime_period(struct osnoise_context *context)
+{
+ osnoise_restore_runtime_period(context);
+
+ if (context->orig_runtime_us != OSNOISE_TIME_INIT_VAL)
+ context->orig_runtime_us = OSNOISE_TIME_INIT_VAL;
+
+ if (context->orig_period_us != OSNOISE_TIME_INIT_VAL)
+ context->orig_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_timerlat_period_us - read and save the original "timerlat_period_us"
+ */
+static long long
+osnoise_get_timerlat_period_us(struct osnoise_context *context)
+{
+ long long timerlat_period_us;
+
+ if (context->timerlat_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->timerlat_period_us;
+
+ if (context->orig_timerlat_period_us != OSNOISE_TIME_INIT_VAL)
+ return context->orig_timerlat_period_us;
+
+ timerlat_period_us = osnoise_read_ll_config("osnoise/timerlat_period_us");
+ if (timerlat_period_us < 0)
+ goto out_err;
+
+ context->orig_timerlat_period_us = timerlat_period_us;
+ return timerlat_period_us;
+
+out_err:
+ return OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_set_timerlat_period_us - set "timerlat_period_us"
+ */
+int osnoise_set_timerlat_period_us(struct osnoise_context *context, long long timerlat_period_us)
+{
+ long long curr_timerlat_period_us = osnoise_get_timerlat_period_us(context);
+ int retval;
+
+ if (curr_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/timerlat_period_us", timerlat_period_us);
+ if (retval < 0)
+ return -1;
+
+ context->timerlat_period_us = timerlat_period_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_timerlat_period_us - restore "timerlat_period_us"
+ */
+void osnoise_restore_timerlat_period_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return;
+
+ if (context->orig_timerlat_period_us == context->timerlat_period_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/timerlat_period_us", context->orig_timerlat_period_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise timerlat_period_us\n");
+
+out_done:
+ context->timerlat_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_put_timerlat_period_us - restore original values and cleanup data
+ */
+void osnoise_put_timerlat_period_us(struct osnoise_context *context)
+{
+ osnoise_restore_timerlat_period_us(context);
+
+ if (context->orig_timerlat_period_us == OSNOISE_TIME_INIT_VAL)
+ return;
+
+ context->orig_timerlat_period_us = OSNOISE_TIME_INIT_VAL;
+}
+
+/*
+ * osnoise_get_stop_us - read and save the original "stop_tracing_us"
+ */
+static long long
+osnoise_get_stop_us(struct osnoise_context *context)
+{
+ long long stop_us;
+
+ if (context->stop_us != OSNOISE_OPTION_INIT_VAL)
+ return context->stop_us;
+
+ if (context->orig_stop_us != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_stop_us;
+
+ stop_us = osnoise_read_ll_config("osnoise/stop_tracing_us");
+ if (stop_us < 0)
+ goto out_err;
+
+ context->orig_stop_us = stop_us;
+ return stop_us;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_stop_us - set "stop_tracing_us"
+ */
+int osnoise_set_stop_us(struct osnoise_context *context, long long stop_us)
+{
+ long long curr_stop_us = osnoise_get_stop_us(context);
+ int retval;
+
+ if (curr_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_us", stop_us);
+ if (retval < 0)
+ return -1;
+
+ context->stop_us = stop_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_stop_us - restore the original "stop_tracing_us"
+ */
+void osnoise_restore_stop_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_stop_us == context->stop_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_us", context->orig_stop_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise stop_us\n");
+
+out_done:
+ context->stop_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_stop_us - restore original values and cleanup data
+ */
+void osnoise_put_stop_us(struct osnoise_context *context)
+{
+ osnoise_restore_stop_us(context);
+
+ if (context->orig_stop_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_stop_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_get_stop_total_us - read and save the original "stop_tracing_total_us"
+ */
+static long long
+osnoise_get_stop_total_us(struct osnoise_context *context)
+{
+ long long stop_total_us;
+
+ if (context->stop_total_us != OSNOISE_OPTION_INIT_VAL)
+ return context->stop_total_us;
+
+ if (context->orig_stop_total_us != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_stop_total_us;
+
+ stop_total_us = osnoise_read_ll_config("osnoise/stop_tracing_total_us");
+ if (stop_total_us < 0)
+ goto out_err;
+
+ context->orig_stop_total_us = stop_total_us;
+ return stop_total_us;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_stop_total_us - set "stop_tracing_total_us"
+ */
+int osnoise_set_stop_total_us(struct osnoise_context *context, long long stop_total_us)
+{
+ long long curr_stop_total_us = osnoise_get_stop_total_us(context);
+ int retval;
+
+ if (curr_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us", stop_total_us);
+ if (retval < 0)
+ return -1;
+
+ context->stop_total_us = stop_total_us;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_stop_total_us - restore the original "stop_tracing_total_us"
+ */
+void osnoise_restore_stop_total_us(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_stop_total_us == context->stop_total_us)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/stop_tracing_total_us",
+ context->orig_stop_total_us);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise stop_total_us\n");
+
+out_done:
+ context->stop_total_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_stop_total_us - restore original values and cleanup data
+ */
+void osnoise_put_stop_total_us(struct osnoise_context *context)
+{
+ osnoise_restore_stop_total_us(context);
+
+ if (context->orig_stop_total_us == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_get_print_stack - read and save the original "print_stack"
+ */
+static long long
+osnoise_get_print_stack(struct osnoise_context *context)
+{
+ long long print_stack;
+
+ if (context->print_stack != OSNOISE_OPTION_INIT_VAL)
+ return context->print_stack;
+
+ if (context->orig_print_stack != OSNOISE_OPTION_INIT_VAL)
+ return context->orig_print_stack;
+
+ print_stack = osnoise_read_ll_config("osnoise/print_stack");
+ if (print_stack < 0)
+ goto out_err;
+
+ context->orig_print_stack = print_stack;
+ return print_stack;
+
+out_err:
+ return OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_set_print_stack - set "print_stack"
+ */
+int osnoise_set_print_stack(struct osnoise_context *context, long long print_stack)
+{
+ long long curr_print_stack = osnoise_get_print_stack(context);
+ int retval;
+
+ if (curr_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return -1;
+
+ retval = osnoise_write_ll_config("osnoise/print_stack", print_stack);
+ if (retval < 0)
+ return -1;
+
+ context->print_stack = print_stack;
+
+ return 0;
+}
+
+/*
+ * osnoise_restore_print_stack - restore the original "print_stack"
+ */
+void osnoise_restore_print_stack(struct osnoise_context *context)
+{
+ int retval;
+
+ if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ if (context->orig_print_stack == context->print_stack)
+ goto out_done;
+
+ retval = osnoise_write_ll_config("osnoise/print_stack", context->orig_print_stack);
+ if (retval < 0)
+ err_msg("Could not restore original osnoise print_stack\n");
+
+out_done:
+ context->print_stack = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * osnoise_put_print_stack - restore original values and cleanup data
+ */
+void osnoise_put_print_stack(struct osnoise_context *context)
+{
+ osnoise_restore_print_stack(context);
+
+ if (context->orig_print_stack == OSNOISE_OPTION_INIT_VAL)
+ return;
+
+ context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
+}
+
+/*
+ * enable_osnoise - enable osnoise tracer in the trace_instance
+ */
+int enable_osnoise(struct trace_instance *trace)
+{
+ return enable_tracer_by_name(trace->inst, "osnoise");
+}
+
+/*
+ * enable_timerlat - enable timerlat tracer in the trace_instance
+ */
+int enable_timerlat(struct trace_instance *trace)
+{
+ return enable_tracer_by_name(trace->inst, "timerlat");
+}
+
+enum {
+ FLAG_CONTEXT_NEWLY_CREATED = (1 << 0),
+ FLAG_CONTEXT_DELETED = (1 << 1),
+};
+
+/*
+ * osnoise_get_context - increase the usage of a context and return it
+ */
+int osnoise_get_context(struct osnoise_context *context)
+{
+ int ret;
+
+ if (context->flags & FLAG_CONTEXT_DELETED) {
+ ret = -1;
+ } else {
+ context->ref++;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/*
+ * osnoise_context_alloc - alloc an osnoise_context
+ *
+ * The osnoise context contains the information of the "osnoise/" configs.
+ * It is used to set and restore the config.
+ */
+struct osnoise_context *osnoise_context_alloc(void)
+{
+ struct osnoise_context *context;
+
+ context = calloc(1, sizeof(*context));
+ if (!context)
+ return NULL;
+
+ context->orig_stop_us = OSNOISE_OPTION_INIT_VAL;
+ context->stop_us = OSNOISE_OPTION_INIT_VAL;
+
+ context->orig_stop_total_us = OSNOISE_OPTION_INIT_VAL;
+ context->stop_total_us = OSNOISE_OPTION_INIT_VAL;
+
+ context->orig_print_stack = OSNOISE_OPTION_INIT_VAL;
+ context->print_stack = OSNOISE_OPTION_INIT_VAL;
+
+ osnoise_get_context(context);
+
+ return context;
+}
+
+/*
+ * osnoise_put_context - put the osnoise_put_context
+ *
+ * If there is no other user for the context, the original data
+ * is restored.
+ */
+void osnoise_put_context(struct osnoise_context *context)
+{
+ if (--context->ref < 1)
+ context->flags |= FLAG_CONTEXT_DELETED;
+
+ if (!(context->flags & FLAG_CONTEXT_DELETED))
+ return;
+
+ osnoise_put_cpus(context);
+ osnoise_put_runtime_period(context);
+ osnoise_put_stop_us(context);
+ osnoise_put_stop_total_us(context);
+ osnoise_put_timerlat_period_us(context);
+ osnoise_put_print_stack(context);
+
+ free(context);
+}
+
+/*
+ * osnoise_destroy_tool - disable trace, restore configs and free data
+ */
+void osnoise_destroy_tool(struct osnoise_tool *top)
+{
+ trace_instance_destroy(&top->trace);
+
+ if (top->context)
+ osnoise_put_context(top->context);
+
+ free(top);
+}
+
+/*
+ * osnoise_init_tool - init an osnoise tool
+ *
+ * It allocs data, create a context to store data and
+ * creates a new trace instance for the tool.
+ */
+struct osnoise_tool *osnoise_init_tool(char *tool_name)
+{
+ struct osnoise_tool *top;
+ int retval;
+
+ top = calloc(1, sizeof(*top));
+ if (!top)
+ return NULL;
+
+ top->context = osnoise_context_alloc();
+ if (!top->context)
+ goto out_err;
+
+ retval = trace_instance_init(&top->trace, tool_name);
+ if (retval)
+ goto out_err;
+
+ return top;
+out_err:
+ osnoise_destroy_tool(top);
+ return NULL;
+}
+
+/*
+ * osnoise_init_trace_tool - init a tracer instance to trace osnoise events
+ */
+struct osnoise_tool *osnoise_init_trace_tool(char *tracer)
+{
+ struct osnoise_tool *trace;
+ int retval;
+
+ trace = osnoise_init_tool("osnoise_trace");
+ if (!trace)
+ return NULL;
+
+ retval = tracefs_event_enable(trace->trace.inst, "osnoise", NULL);
+ if (retval < 0 && !errno) {
+ err_msg("Could not find osnoise events\n");
+ goto out_err;
+ }
+
+ retval = enable_tracer_by_name(trace->trace.inst, tracer);
+ if (retval) {
+ err_msg("Could not enable osnoiser tracer for tracing\n");
+ goto out_err;
+ }
+
+ return trace;
+out_err:
+ osnoise_destroy_tool(trace);
+ return NULL;
+}
+
+static void osnoise_usage(void)
+{
+ int i;
+
+ static const char *msg[] = {
+ "",
+ "osnoise version " VERSION,
+ "",
+ " usage: [rtla] osnoise [MODE] ...",
+ "",
+ " modes:",
+ " top - prints the summary from osnoise tracer",
+ " hist - prints a histogram of osnoise samples",
+ "",
+ "if no MODE is given, the top mode is called, passing the arguments",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+int osnoise_main(int argc, char *argv[])
+{
+ if (argc == 0)
+ goto usage;
+
+ /*
+ * if osnoise was called without any argument, run the
+ * default cmdline.
+ */
+ if (argc == 1) {
+ osnoise_top_main(argc, argv);
+ exit(0);
+ }
+
+ if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+ osnoise_usage();
+ exit(0);
+ } else if (strncmp(argv[1], "-", 1) == 0) {
+ /* the user skipped the tool, call the default one */
+ osnoise_top_main(argc, argv);
+ exit(0);
+ } else if (strcmp(argv[1], "top") == 0) {
+ osnoise_top_main(argc-1, &argv[1]);
+ exit(0);
+ } else if (strcmp(argv[1], "hist") == 0) {
+ osnoise_hist_main(argc-1, &argv[1]);
+ exit(0);
+ }
+
+usage:
+ osnoise_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h
new file mode 100644
index 000000000000..9e4b2e2a4559
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise.h
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "trace.h"
+
+/*
+ * osnoise_context - read, store, write, restore osnoise configs.
+ */
+struct osnoise_context {
+ int flags;
+ int ref;
+
+ char *curr_cpus;
+ char *orig_cpus;
+
+ /* 0 as init value */
+ unsigned long long orig_runtime_us;
+ unsigned long long runtime_us;
+
+ /* 0 as init value */
+ unsigned long long orig_period_us;
+ unsigned long long period_us;
+
+ /* 0 as init value */
+ long long orig_timerlat_period_us;
+ long long timerlat_period_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_stop_us;
+ long long stop_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_stop_total_us;
+ long long stop_total_us;
+
+ /* -1 as init value because 0 is disabled */
+ long long orig_print_stack;
+ long long print_stack;
+};
+
+/*
+ * *_INIT_VALs are also invalid values, they are used to
+ * communicate errors.
+ */
+#define OSNOISE_OPTION_INIT_VAL (-1)
+#define OSNOISE_TIME_INIT_VAL (0)
+
+struct osnoise_context *osnoise_context_alloc(void);
+int osnoise_get_context(struct osnoise_context *context);
+void osnoise_put_context(struct osnoise_context *context);
+
+int osnoise_set_cpus(struct osnoise_context *context, char *cpus);
+void osnoise_restore_cpus(struct osnoise_context *context);
+
+int osnoise_set_runtime_period(struct osnoise_context *context,
+ unsigned long long runtime,
+ unsigned long long period);
+void osnoise_restore_runtime_period(struct osnoise_context *context);
+
+int osnoise_set_stop_us(struct osnoise_context *context,
+ long long stop_us);
+void osnoise_restore_stop_us(struct osnoise_context *context);
+
+int osnoise_set_stop_total_us(struct osnoise_context *context,
+ long long stop_total_us);
+void osnoise_restore_stop_total_us(struct osnoise_context *context);
+
+int osnoise_set_timerlat_period_us(struct osnoise_context *context,
+ long long timerlat_period_us);
+void osnoise_restore_timerlat_period_us(struct osnoise_context *context);
+
+void osnoise_restore_print_stack(struct osnoise_context *context);
+int osnoise_set_print_stack(struct osnoise_context *context,
+ long long print_stack);
+
+/*
+ * osnoise_tool - osnoise based tool definition.
+ */
+struct osnoise_tool {
+ struct trace_instance trace;
+ struct osnoise_context *context;
+ void *data;
+ void *params;
+ time_t start_time;
+};
+
+void osnoise_destroy_tool(struct osnoise_tool *top);
+struct osnoise_tool *osnoise_init_tool(char *tool_name);
+struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
+
+int osnoise_hist_main(int argc, char *argv[]);
+int osnoise_top_main(int argc, char **argv);
+int osnoise_main(int argc, char **argv);
diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c
new file mode 100644
index 000000000000..180fcbe423cd
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise_hist.c
@@ -0,0 +1,801 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+
+struct osnoise_hist_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int set_sched;
+ int output_divisor;
+ struct sched_attr sched_param;
+
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+};
+
+struct osnoise_hist_cpu {
+ int *samples;
+ int count;
+
+ unsigned long long min_sample;
+ unsigned long long sum_sample;
+ unsigned long long max_sample;
+
+};
+
+struct osnoise_hist_data {
+ struct tracefs_hist *trace_hist;
+ struct osnoise_hist_cpu *hist;
+ int entries;
+ int bucket_size;
+ int nr_cpus;
+};
+
+/*
+ * osnoise_free_histogram - free runtime data
+ */
+static void
+osnoise_free_histogram(struct osnoise_hist_data *data)
+{
+ int cpu;
+
+ /* one histogram for IRQ and one for thread, per CPU */
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (data->hist[cpu].samples)
+ free(data->hist[cpu].samples);
+ }
+
+ /* one set of histograms per CPU */
+ if (data->hist)
+ free(data->hist);
+
+ free(data);
+}
+
+/*
+ * osnoise_alloc_histogram - alloc runtime data
+ */
+static struct osnoise_hist_data
+*osnoise_alloc_histogram(int nr_cpus, int entries, int bucket_size)
+{
+ struct osnoise_hist_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->entries = entries;
+ data->bucket_size = bucket_size;
+ data->nr_cpus = nr_cpus;
+
+ data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
+ if (!data->hist)
+ goto cleanup;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].samples = calloc(1, sizeof(*data->hist->samples) * (entries + 1));
+ if (!data->hist[cpu].samples)
+ goto cleanup;
+ }
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ data->hist[cpu].min_sample = ~0;
+
+ return data;
+
+cleanup:
+ osnoise_free_histogram(data);
+ return NULL;
+}
+
+static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
+ unsigned long long duration, int count)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ int entries = data->entries;
+ int bucket;
+ int *hist;
+
+ if (params->output_divisor)
+ duration = duration / params->output_divisor;
+
+ if (data->bucket_size)
+ bucket = duration / data->bucket_size;
+
+ hist = data->hist[cpu].samples;
+ data->hist[cpu].count += count;
+ update_min(&data->hist[cpu].min_sample, &duration);
+ update_sum(&data->hist[cpu].sum_sample, &duration);
+ update_max(&data->hist[cpu].max_sample, &duration);
+
+ if (bucket < entries)
+ hist[bucket] += count;
+ else
+ hist[entries] += count;
+}
+
+/*
+ * osnoise_destroy_trace_hist - disable events used to collect histogram
+ */
+static void osnoise_destroy_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+
+ tracefs_hist_pause(tool->trace.inst, data->trace_hist);
+ tracefs_hist_destroy(tool->trace.inst, data->trace_hist);
+}
+
+/*
+ * osnoise_init_trace_hist - enable events used to collect histogram
+ */
+static int osnoise_init_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ int bucket_size;
+ char buff[128];
+ int retval = 0;
+
+ /*
+ * Set the size of the bucket.
+ */
+ bucket_size = params->output_divisor * params->bucket_size;
+ snprintf(buff, sizeof(buff), "duration.buckets=%d", bucket_size);
+
+ data->trace_hist = tracefs_hist_alloc(tool->trace.tep, "osnoise", "sample_threshold",
+ buff, TRACEFS_HIST_KEY_NORMAL);
+ if (!data->trace_hist)
+ return 1;
+
+ retval = tracefs_hist_add_key(data->trace_hist, "cpu", 0);
+ if (retval)
+ goto out_err;
+
+ retval = tracefs_hist_start(tool->trace.inst, data->trace_hist);
+ if (retval)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ osnoise_destroy_trace_hist(tool);
+ return 1;
+}
+
+/*
+ * osnoise_read_trace_hist - parse histogram file and file osnoise histogram
+ */
+static void osnoise_read_trace_hist(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+ long long cpu, counter, duration;
+ char *content, *position;
+
+ tracefs_hist_pause(tool->trace.inst, data->trace_hist);
+
+ content = tracefs_event_file_read(tool->trace.inst, "osnoise",
+ "sample_threshold",
+ "hist", NULL);
+ if (!content)
+ return;
+
+ position = content;
+ while (true) {
+ position = strstr(position, "duration: ~");
+ if (!position)
+ break;
+ position += strlen("duration: ~");
+ duration = get_llong_from_str(position);
+ if (duration == -1)
+ err_msg("error reading duration from histogram\n");
+
+ position = strstr(position, "cpu:");
+ if (!position)
+ break;
+ position += strlen("cpu: ");
+ cpu = get_llong_from_str(position);
+ if (cpu == -1)
+ err_msg("error reading cpu from histogram\n");
+
+ position = strstr(position, "hitcount:");
+ if (!position)
+ break;
+ position += strlen("hitcount: ");
+ counter = get_llong_from_str(position);
+ if (counter == -1)
+ err_msg("error reading counter from histogram\n");
+
+ osnoise_hist_update_multiple(tool, cpu, duration, counter);
+ }
+ free(content);
+}
+
+/*
+ * osnoise_hist_header - print the header of the tracer to the output
+ */
+static void osnoise_hist_header(struct osnoise_tool *tool)
+{
+ struct osnoise_hist_params *params = tool->params;
+ struct osnoise_hist_data *data = tool->data;
+ struct trace_seq *s = tool->trace.seq;
+ char duration[26];
+ int cpu;
+
+ if (params->no_header)
+ return;
+
+ get_duration(tool->start_time, duration, sizeof(duration));
+ trace_seq_printf(s, "# RTLA osnoise histogram\n");
+ trace_seq_printf(s, "# Time unit is %s (%s)\n",
+ params->output_divisor == 1 ? "nanoseconds" : "microseconds",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "# Duration: %s\n", duration);
+
+ if (!params->no_index)
+ trace_seq_printf(s, "Index");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(s, " CPU-%03d", cpu);
+ }
+ trace_seq_printf(s, "\n");
+
+ trace_seq_do_printf(s);
+ trace_seq_reset(s);
+}
+
+/*
+ * osnoise_print_summary - print the summary of the hist data to the output
+ */
+static void
+osnoise_print_summary(struct osnoise_hist_params *params,
+ struct trace_instance *trace,
+ struct osnoise_hist_data *data)
+{
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].count);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].min_sample);
+
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ if (data->hist[cpu].count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_sample / data->hist[cpu].count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].max_sample);
+
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * osnoise_print_stats - print data for all CPUs
+ */
+static void
+osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *tool)
+{
+ struct osnoise_hist_data *data = tool->data;
+ struct trace_instance *trace = &tool->trace;
+ int bucket, cpu;
+ int total;
+
+ osnoise_hist_header(tool);
+
+ for (bucket = 0; bucket < data->entries; bucket++) {
+ total = 0;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "%-6d",
+ bucket * data->bucket_size);
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ total += data->hist[cpu].samples[bucket];
+ trace_seq_printf(trace->seq, "%9d ", data->hist[cpu].samples[bucket]);
+ }
+
+ if (total == 0 && !params->with_zeros) {
+ trace_seq_reset(trace->seq);
+ continue;
+ }
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "over: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].count)
+ continue;
+
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].samples[data->entries]);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+
+ osnoise_print_summary(params, trace, data);
+}
+
+/*
+ * osnoise_hist_usage - prints osnoise hist usage message
+ */
+static void osnoise_hist_usage(char *usage)
+{
+ int i;
+
+ static const char * const msg[] = {
+ "",
+ " usage: rtla osnoise hist [-h] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority] [-b N] [-e N] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: osnoise period in us",
+ " -r/--runtime us: osnoise runtime in us",
+ " -s/--stop us: stop trace if a single sample is higher than the argument in us",
+ " -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -d/--duration time[s|m|h|d]: duration of the session",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -b/--bucket-size N: set the histogram bucket size (default 1)",
+ " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " --no-header: do not print header",
+ " --no-summary: do not print summary",
+ " --no-index: do not print index",
+ " --with-zeros: print zero only entries",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period: set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla osnoise hist: a per-cpu histogram of the OS noise (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * osnoise_hist_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct osnoise_hist_params
+*osnoise_hist_parse_args(int argc, char *argv[])
+{
+ struct osnoise_hist_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+ params->bucket_size = 1;
+ params->entries = 256;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"bucket-size", required_argument, 0, 'b'},
+ {"entries", required_argument, 0, 'e'},
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"runtime", required_argument, 0, 'r'},
+ {"stop", required_argument, 0, 's'},
+ {"stop-total", required_argument, 0, 'S'},
+ {"trace", optional_argument, 0, 't'},
+ {"no-header", no_argument, 0, '0'},
+ {"no-summary", no_argument, 0, '1'},
+ {"no-index", no_argument, 0, '2'},
+ {"with-zeros", no_argument, 0, '3'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:b:d:e:Dhp:P:r:s:S:t::0123",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'b':
+ params->bucket_size = get_llong_from_str(optarg);
+ if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
+ osnoise_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
+ break;
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ osnoise_hist_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ osnoise_hist_usage("Invalid -D duration\n");
+ break;
+ case 'e':
+ params->entries = get_llong_from_str(optarg);
+ if ((params->entries < 10) || (params->entries > 9999999))
+ osnoise_hist_usage("Entries must be > 10 and < 9999999\n");
+ break;
+ case 'h':
+ case '?':
+ osnoise_hist_usage(NULL);
+ break;
+ case 'p':
+ params->period = get_llong_from_str(optarg);
+ if (params->period > 10000000)
+ osnoise_hist_usage("Period longer than 10 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ osnoise_hist_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'r':
+ params->runtime = get_llong_from_str(optarg);
+ if (params->runtime < 100)
+ osnoise_hist_usage("Runtime shorter than 100 us\n");
+ break;
+ case 's':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'S':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "osnoise_trace.txt";
+ break;
+ case '0': /* no header */
+ params->no_header = 1;
+ break;
+ case '1': /* no summary */
+ params->no_summary = 1;
+ break;
+ case '2': /* no index */
+ params->no_index = 1;
+ break;
+ case '3': /* with zeros */
+ params->with_zeros = 1;
+ break;
+ default:
+ osnoise_hist_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->no_index && !params->with_zeros)
+ osnoise_hist_usage("no-index set and with-zeros not set - it does not make sense");
+
+ return params;
+}
+
+/*
+ * osnoise_hist_apply_config - apply the hist configs to the initialized tool
+ */
+static int
+osnoise_hist_apply_config(struct osnoise_tool *tool, struct osnoise_hist_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * osnoise_init_hist - initialize a osnoise hist tool with parameters
+ */
+static struct osnoise_tool
+*osnoise_init_hist(struct osnoise_hist_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("osnoise_hist");
+ if (!tool)
+ return NULL;
+
+ tool->data = osnoise_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ return tool;
+
+out_err:
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_hist(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * osnoise_hist_set_signals - handles the signal to stop the tool
+ */
+static void
+osnoise_hist_set_signals(struct osnoise_hist_params *params)
+{
+ signal(SIGINT, stop_hist);
+ if (params->duration) {
+ signal(SIGALRM, stop_hist);
+ alarm(params->duration);
+ }
+}
+
+int osnoise_hist_main(int argc, char *argv[])
+{
+ struct osnoise_hist_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = osnoise_hist_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = osnoise_init_hist(params);
+ if (!tool) {
+ err_msg("Could not init osnoise hist\n");
+ goto out_exit;
+ }
+
+ retval = osnoise_hist_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_destroy;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_osnoise(trace);
+ if (retval) {
+ err_msg("Failed to enable osnoise tracer\n");
+ goto out_destroy;
+ }
+
+ retval = osnoise_init_trace_hist(tool);
+ if (retval)
+ goto out_destroy;
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("osnoise/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_hist;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("osnoise");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_hist;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ osnoise_hist_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+ };
+
+ osnoise_read_trace_hist(tool);
+
+ osnoise_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_hist:
+ osnoise_free_histogram(tool->data);
+out_destroy:
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
new file mode 100644
index 000000000000..332b2ac205fc
--- /dev/null
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "osnoise.h"
+#include "utils.h"
+
+/*
+ * osnoise top parameters
+ */
+struct osnoise_top_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ unsigned long long period;
+ long long stop_us;
+ long long stop_total_us;
+ int sleep_time;
+ int duration;
+ int quiet;
+ int set_sched;
+ struct sched_attr sched_param;
+};
+
+struct osnoise_top_cpu {
+ unsigned long long sum_runtime;
+ unsigned long long sum_noise;
+ unsigned long long max_noise;
+ unsigned long long max_sample;
+
+ unsigned long long hw_count;
+ unsigned long long nmi_count;
+ unsigned long long irq_count;
+ unsigned long long softirq_count;
+ unsigned long long thread_count;
+
+ int sum_cycles;
+};
+
+struct osnoise_top_data {
+ struct osnoise_top_cpu *cpu_data;
+ int nr_cpus;
+};
+
+/*
+ * osnoise_free_top - free runtime data
+ */
+static void
+osnoise_free_top(struct osnoise_top_data *data)
+{
+ free(data->cpu_data);
+ free(data);
+}
+
+/*
+ * osnoise_alloc_histogram - alloc runtime data
+ */
+static struct osnoise_top_data *osnoise_alloc_top(int nr_cpus)
+{
+ struct osnoise_top_data *data;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus);
+ if (!data->cpu_data)
+ goto cleanup;
+
+ return data;
+
+cleanup:
+ osnoise_free_top(data);
+ return NULL;
+}
+
+/*
+ * osnoise_top_handler - this is the handler for osnoise tracer events
+ */
+static int
+osnoise_top_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct trace_instance *trace = context;
+ struct osnoise_tool *tool;
+ unsigned long long val;
+ struct osnoise_top_cpu *cpu_data;
+ struct osnoise_top_data *data;
+ int cpu = record->cpu;
+
+ tool = container_of(trace, struct osnoise_tool, trace);
+
+ data = tool->data;
+ cpu_data = &data->cpu_data[cpu];
+
+ cpu_data->sum_cycles++;
+
+ tep_get_field_val(s, event, "runtime", record, &val, 1);
+ update_sum(&cpu_data->sum_runtime, &val);
+
+ tep_get_field_val(s, event, "noise", record, &val, 1);
+ update_max(&cpu_data->max_noise, &val);
+ update_sum(&cpu_data->sum_noise, &val);
+
+ tep_get_field_val(s, event, "max_sample", record, &val, 1);
+ update_max(&cpu_data->max_sample, &val);
+
+ tep_get_field_val(s, event, "hw_count", record, &val, 1);
+ update_sum(&cpu_data->hw_count, &val);
+
+ tep_get_field_val(s, event, "nmi_count", record, &val, 1);
+ update_sum(&cpu_data->nmi_count, &val);
+
+ tep_get_field_val(s, event, "irq_count", record, &val, 1);
+ update_sum(&cpu_data->irq_count, &val);
+
+ tep_get_field_val(s, event, "softirq_count", record, &val, 1);
+ update_sum(&cpu_data->softirq_count, &val);
+
+ tep_get_field_val(s, event, "thread_count", record, &val, 1);
+ update_sum(&cpu_data->thread_count, &val);
+
+ return 0;
+}
+
+/*
+ * osnoise_top_header - print the header of the tool output
+ */
+static void osnoise_top_header(struct osnoise_tool *top)
+{
+ struct trace_seq *s = top->trace.seq;
+ char duration[26];
+
+ get_duration(top->start_time, duration, sizeof(duration));
+
+ trace_seq_printf(s, "\033[2;37;40m");
+ trace_seq_printf(s, " Operating System Noise");
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, " ");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "duration: %9s | time is in us\n", duration);
+
+ trace_seq_printf(s, "\033[2;30;47m");
+ trace_seq_printf(s, "CPU Period Runtime ");
+ trace_seq_printf(s, " Noise ");
+ trace_seq_printf(s, " %% CPU Aval ");
+ trace_seq_printf(s, " Max Noise Max Single ");
+ trace_seq_printf(s, " HW NMI IRQ Softirq Thread");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+}
+
+/*
+ * clear_terminal - clears the output terminal
+ */
+static void clear_terminal(struct trace_seq *seq)
+{
+ if (!config_debug)
+ trace_seq_printf(seq, "\033c");
+}
+
+/*
+ * osnoise_top_print - prints the output of a given CPU
+ */
+static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
+{
+ struct trace_seq *s = tool->trace.seq;
+ struct osnoise_top_cpu *cpu_data;
+ struct osnoise_top_data *data;
+ int percentage;
+ int decimal;
+
+ data = tool->data;
+ cpu_data = &data->cpu_data[cpu];
+
+ if (!cpu_data->sum_runtime)
+ return;
+
+ percentage = ((cpu_data->sum_runtime - cpu_data->sum_noise) * 10000000)
+ / cpu_data->sum_runtime;
+ decimal = percentage % 100000;
+ percentage = percentage / 100000;
+
+ trace_seq_printf(s, "%3d #%-6d %12llu ", cpu, cpu_data->sum_cycles, cpu_data->sum_runtime);
+ trace_seq_printf(s, "%12llu ", cpu_data->sum_noise);
+ trace_seq_printf(s, " %3d.%05d", percentage, decimal);
+ trace_seq_printf(s, "%12llu %12llu", cpu_data->max_noise, cpu_data->max_sample);
+
+ trace_seq_printf(s, "%12llu ", cpu_data->hw_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->nmi_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->irq_count);
+ trace_seq_printf(s, "%12llu ", cpu_data->softirq_count);
+ trace_seq_printf(s, "%12llu\n", cpu_data->thread_count);
+}
+
+/*
+ * osnoise_print_stats - print data for all cpus
+ */
+static void
+osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
+{
+ struct trace_instance *trace = &top->trace;
+ static int nr_cpus = -1;
+ int i;
+
+ if (nr_cpus == -1)
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (!params->quiet)
+ clear_terminal(trace->seq);
+
+ osnoise_top_header(top);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->cpus && !params->monitored_cpus[i])
+ continue;
+ osnoise_top_print(top, i);
+ }
+
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * osnoise_top_usage - prints osnoise top usage message
+ */
+void osnoise_top_usage(char *usage)
+{
+ int i;
+
+ static const char * const msg[] = {
+ " usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-p us] [-r us] [-s us] [-S us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: osnoise period in us",
+ " -r/--runtime us: osnoise runtime in us",
+ " -s/--stop us: stop trace if a single sample is higher than the argument in us",
+ " -S/--stop-total us: stop trace if the total sample is higher than the argument in us",
+ " -c/--cpus cpu-list: list of cpus to run osnoise threads",
+ " -d/--duration time[s|m|h|d]: duration of the session",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|osnoise_trace.txt]",
+ " -q/--quiet print only a summary at the end",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * osnoise_top_parse_args - allocs, parse and fill the cmd line parameters
+ */
+struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+{
+ struct osnoise_top_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"quiet", no_argument, 0, 'q'},
+ {"runtime", required_argument, 0, 'r'},
+ {"stop", required_argument, 0, 's'},
+ {"stop-total", required_argument, 0, 'S'},
+ {"trace", optional_argument, 0, 't'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:d:Dhp:P:qr:s:S:t::",
+ long_options, &option_index);
+
+ /* Detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ osnoise_top_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ osnoise_top_usage("Invalid -D duration\n");
+ break;
+ case 'h':
+ case '?':
+ osnoise_top_usage(NULL);
+ break;
+ case 'p':
+ params->period = get_llong_from_str(optarg);
+ if (params->period > 10000000)
+ osnoise_top_usage("Period longer than 10 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ osnoise_top_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'q':
+ params->quiet = 1;
+ break;
+ case 'r':
+ params->runtime = get_llong_from_str(optarg);
+ if (params->runtime < 100)
+ osnoise_top_usage("Runtime shorter than 100 us\n");
+ break;
+ case 's':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'S':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "osnoise_trace.txt";
+ break;
+ default:
+ osnoise_top_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("osnoise needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return params;
+}
+
+/*
+ * osnoise_top_apply_config - apply the top configs to the initialized tool
+ */
+static int
+osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->runtime || params->period) {
+ retval = osnoise_set_runtime_period(tool->context,
+ params->runtime,
+ params->period);
+ if (retval) {
+ err_msg("Failed to set runtime and/or period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * osnoise_init_top - initialize a osnoise top tool with parameters
+ */
+struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("osnoise_top");
+ if (!tool)
+ return NULL;
+
+ tool->data = osnoise_alloc_top(nr_cpus);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "osnoise",
+ osnoise_top_handler, NULL);
+
+ return tool;
+
+out_err:
+ osnoise_free_top(tool->data);
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_top(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * osnoise_top_set_signals - handles the signal to stop the tool
+ */
+static void osnoise_top_set_signals(struct osnoise_top_params *params)
+{
+ signal(SIGINT, stop_top);
+ if (params->duration) {
+ signal(SIGALRM, stop_top);
+ alarm(params->duration);
+ }
+}
+
+int osnoise_top_main(int argc, char **argv)
+{
+ struct osnoise_top_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = osnoise_top_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = osnoise_init_top(params);
+ if (!tool) {
+ err_msg("Could not init osnoise top\n");
+ goto out_exit;
+ }
+
+ retval = osnoise_top_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_top;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_osnoise(trace);
+ if (retval) {
+ err_msg("Failed to enable osnoise tracer\n");
+ goto out_top;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("osnoise/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_top;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("osnoise");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_top;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ osnoise_top_set_signals(params);
+
+ do {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_top;
+ }
+
+ if (!params->quiet)
+ osnoise_print_stats(params, tool);
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+
+ } while (!stop_tracing);
+
+ osnoise_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("osnoise hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_top:
+ osnoise_free_top(tool->data);
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/rtla.c b/tools/tracing/rtla/src/rtla.c
new file mode 100644
index 000000000000..09bd21b8af81
--- /dev/null
+++ b/tools/tracing/rtla/src/rtla.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "osnoise.h"
+#include "timerlat.h"
+
+/*
+ * rtla_usage - print rtla usage
+ */
+static void rtla_usage(void)
+{
+ int i;
+
+ static const char *msg[] = {
+ "",
+ "rtla version " VERSION,
+ "",
+ " usage: rtla COMMAND ...",
+ "",
+ " commands:",
+ " osnoise - gives information about the operating system noise (osnoise)",
+ " timerlat - measures the timer irq and thread latency",
+ "",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * run_command - try to run a rtla tool command
+ *
+ * It returns 0 if it fails. The tool's main will generally not
+ * return as they should call exit().
+ */
+int run_command(int argc, char **argv, int start_position)
+{
+ if (strcmp(argv[start_position], "osnoise") == 0) {
+ osnoise_main(argc-start_position, &argv[start_position]);
+ goto ran;
+ } else if (strcmp(argv[start_position], "timerlat") == 0) {
+ timerlat_main(argc-start_position, &argv[start_position]);
+ goto ran;
+ }
+
+ return 0;
+ran:
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int retval;
+
+ /* is it an alias? */
+ retval = run_command(argc, argv, 0);
+ if (retval)
+ exit(0);
+
+ if (argc < 2)
+ goto usage;
+
+ if (strcmp(argv[1], "-h") == 0) {
+ rtla_usage();
+ exit(0);
+ } else if (strcmp(argv[1], "--help") == 0) {
+ rtla_usage();
+ exit(0);
+ }
+
+ retval = run_command(argc, argv, 1);
+ if (retval)
+ exit(0);
+
+usage:
+ rtla_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/timerlat.c b/tools/tracing/rtla/src/timerlat.c
new file mode 100644
index 000000000000..97abbf494fee
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+#include "timerlat.h"
+
+static void timerlat_usage(void)
+{
+ int i;
+
+ static const char * const msg[] = {
+ "",
+ "timerlat version " VERSION,
+ "",
+ " usage: [rtla] timerlat [MODE] ...",
+ "",
+ " modes:",
+ " top - prints the summary from timerlat tracer",
+ " hist - prints a histogram of timer latencies",
+ "",
+ "if no MODE is given, the top mode is called, passing the arguments",
+ NULL,
+ };
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+int timerlat_main(int argc, char *argv[])
+{
+ if (argc == 0)
+ goto usage;
+
+ /*
+ * if timerlat was called without any argument, run the
+ * default cmdline.
+ */
+ if (argc == 1) {
+ timerlat_top_main(argc, argv);
+ exit(0);
+ }
+
+ if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
+ timerlat_usage();
+ exit(0);
+ } else if (strncmp(argv[1], "-", 1) == 0) {
+ /* the user skipped the tool, call the default one */
+ timerlat_top_main(argc, argv);
+ exit(0);
+ } else if (strcmp(argv[1], "top") == 0) {
+ timerlat_top_main(argc-1, &argv[1]);
+ exit(0);
+ } else if (strcmp(argv[1], "hist") == 0) {
+ timerlat_hist_main(argc-1, &argv[1]);
+ exit(0);
+ }
+
+usage:
+ timerlat_usage();
+ exit(1);
+}
diff --git a/tools/tracing/rtla/src/timerlat.h b/tools/tracing/rtla/src/timerlat.h
new file mode 100644
index 000000000000..88561bfd14f3
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat.h
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+int timerlat_hist_main(int argc, char *argv[]);
+int timerlat_top_main(int argc, char *argv[]);
+int timerlat_main(int argc, char *argv[]);
diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c
new file mode 100644
index 000000000000..235f9620ef3d
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_hist.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+struct timerlat_hist_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int set_sched;
+ struct sched_attr sched_param;
+
+ char no_irq;
+ char no_thread;
+ char no_header;
+ char no_summary;
+ char no_index;
+ char with_zeros;
+ int bucket_size;
+ int entries;
+};
+
+struct timerlat_hist_cpu {
+ int *irq;
+ int *thread;
+
+ int irq_count;
+ int thread_count;
+
+ unsigned long long min_irq;
+ unsigned long long sum_irq;
+ unsigned long long max_irq;
+
+ unsigned long long min_thread;
+ unsigned long long sum_thread;
+ unsigned long long max_thread;
+};
+
+struct timerlat_hist_data {
+ struct timerlat_hist_cpu *hist;
+ int entries;
+ int bucket_size;
+ int nr_cpus;
+};
+
+/*
+ * timerlat_free_histogram - free runtime data
+ */
+static void
+timerlat_free_histogram(struct timerlat_hist_data *data)
+{
+ int cpu;
+
+ /* one histogram for IRQ and one for thread, per CPU */
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (data->hist[cpu].irq)
+ free(data->hist[cpu].irq);
+
+ if (data->hist[cpu].thread)
+ free(data->hist[cpu].thread);
+ }
+
+ /* one set of histograms per CPU */
+ if (data->hist)
+ free(data->hist);
+
+ free(data);
+}
+
+/*
+ * timerlat_alloc_histogram - alloc runtime data
+ */
+static struct timerlat_hist_data
+*timerlat_alloc_histogram(int nr_cpus, int entries, int bucket_size)
+{
+ struct timerlat_hist_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->entries = entries;
+ data->bucket_size = bucket_size;
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->hist = calloc(1, sizeof(*data->hist) * nr_cpus);
+ if (!data->hist)
+ goto cleanup;
+
+ /* one histogram for IRQ and one for thread, per cpu */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].irq = calloc(1, sizeof(*data->hist->irq) * (entries + 1));
+ if (!data->hist[cpu].irq)
+ goto cleanup;
+ data->hist[cpu].thread = calloc(1, sizeof(*data->hist->thread) * (entries + 1));
+ if (!data->hist[cpu].thread)
+ goto cleanup;
+ }
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->hist[cpu].min_irq = ~0;
+ data->hist[cpu].min_thread = ~0;
+ }
+
+ return data;
+
+cleanup:
+ timerlat_free_histogram(data);
+ return NULL;
+}
+
+/*
+ * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
+ */
+static void
+timerlat_hist_update(struct osnoise_tool *tool, int cpu,
+ unsigned long long thread,
+ unsigned long long latency)
+{
+ struct timerlat_hist_params *params = tool->params;
+ struct timerlat_hist_data *data = tool->data;
+ int entries = data->entries;
+ int bucket;
+ int *hist;
+
+ if (params->output_divisor)
+ latency = latency / params->output_divisor;
+
+ if (data->bucket_size)
+ bucket = latency / data->bucket_size;
+
+ if (!thread) {
+ hist = data->hist[cpu].irq;
+ data->hist[cpu].irq_count++;
+ update_min(&data->hist[cpu].min_irq, &latency);
+ update_sum(&data->hist[cpu].sum_irq, &latency);
+ update_max(&data->hist[cpu].max_irq, &latency);
+ } else {
+ hist = data->hist[cpu].thread;
+ data->hist[cpu].thread_count++;
+ update_min(&data->hist[cpu].min_thread, &latency);
+ update_sum(&data->hist[cpu].sum_thread, &latency);
+ update_max(&data->hist[cpu].max_thread, &latency);
+ }
+
+ if (bucket < entries)
+ hist[bucket]++;
+ else
+ hist[entries]++;
+}
+
+/*
+ * timerlat_hist_handler - this is the handler for timerlat tracer events
+ */
+static int
+timerlat_hist_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *data)
+{
+ struct trace_instance *trace = data;
+ unsigned long long thread, latency;
+ struct osnoise_tool *tool;
+ int cpu = record->cpu;
+
+ tool = container_of(trace, struct osnoise_tool, trace);
+
+ tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+
+ timerlat_hist_update(tool, cpu, thread, latency);
+
+ return 0;
+}
+
+/*
+ * timerlat_hist_header - print the header of the tracer to the output
+ */
+static void timerlat_hist_header(struct osnoise_tool *tool)
+{
+ struct timerlat_hist_params *params = tool->params;
+ struct timerlat_hist_data *data = tool->data;
+ struct trace_seq *s = tool->trace.seq;
+ char duration[26];
+ int cpu;
+
+ if (params->no_header)
+ return;
+
+ get_duration(tool->start_time, duration, sizeof(duration));
+ trace_seq_printf(s, "# RTLA timerlat histogram\n");
+ trace_seq_printf(s, "# Time unit is %s (%s)\n",
+ params->output_divisor == 1 ? "nanoseconds" : "microseconds",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "# Duration: %s\n", duration);
+
+ if (!params->no_index)
+ trace_seq_printf(s, "Index");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(s, " IRQ-%03d", cpu);
+
+ if (!params->no_thread)
+ trace_seq_printf(s, " Thr-%03d", cpu);
+ }
+ trace_seq_printf(s, "\n");
+
+
+ trace_seq_do_printf(s);
+ trace_seq_reset(s);
+}
+
+/*
+ * timerlat_print_summary - print the summary of the hist data to the output
+ */
+static void
+timerlat_print_summary(struct timerlat_hist_params *params,
+ struct trace_instance *trace,
+ struct timerlat_hist_data *data)
+{
+ int cpu;
+
+ if (params->no_summary)
+ return;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "count:");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq_count);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread_count);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "min: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].min_thread);
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "avg: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq) {
+ if (data->hist[cpu].irq_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_irq / data->hist[cpu].irq_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+
+ if (!params->no_thread) {
+ if (data->hist[cpu].thread_count)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].sum_thread / data->hist[cpu].thread_count);
+ else
+ trace_seq_printf(trace->seq, " - ");
+ }
+ }
+ trace_seq_printf(trace->seq, "\n");
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "max: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_irq);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9llu ",
+ data->hist[cpu].max_thread);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * timerlat_print_stats - print data for all CPUs
+ */
+static void
+timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *tool)
+{
+ struct timerlat_hist_data *data = tool->data;
+ struct trace_instance *trace = &tool->trace;
+ int bucket, cpu;
+ int total;
+
+ timerlat_hist_header(tool);
+
+ for (bucket = 0; bucket < data->entries; bucket++) {
+ total = 0;
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "%-6d",
+ bucket * data->bucket_size);
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq) {
+ total += data->hist[cpu].irq[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq[bucket]);
+ }
+
+ if (!params->no_thread) {
+ total += data->hist[cpu].thread[bucket];
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread[bucket]);
+ }
+
+ }
+
+ if (total == 0 && !params->with_zeros) {
+ trace_seq_reset(trace->seq);
+ continue;
+ }
+
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+ }
+
+ if (!params->no_index)
+ trace_seq_printf(trace->seq, "over: ");
+
+ for (cpu = 0; cpu < data->nr_cpus; cpu++) {
+ if (params->cpus && !params->monitored_cpus[cpu])
+ continue;
+
+ if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count)
+ continue;
+
+ if (!params->no_irq)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].irq[data->entries]);
+
+ if (!params->no_thread)
+ trace_seq_printf(trace->seq, "%9d ",
+ data->hist[cpu].thread[data->entries]);
+ }
+ trace_seq_printf(trace->seq, "\n");
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+
+ timerlat_print_summary(params, trace, data);
+}
+
+/*
+ * timerlat_hist_usage - prints timerlat top usage message
+ */
+static void timerlat_hist_usage(char *usage)
+{
+ int i;
+
+ char *msg[] = {
+ "",
+ " usage: [rtla] timerlat hist [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority] [-e N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\",
+ " [--no-index] [--with-zeros]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: timerlat period in us",
+ " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+ " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
+ " -c/--cpus cpus: run the tracer only on the given cpus",
+ " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -D/--debug: print debug info",
+ " -T/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -n/--nano: display data in nanoseconds",
+ " -b/--bucket-size N: set the histogram bucket size (default 1)",
+ " -e/--entries N: set the number of entries of the histogram (default 256)",
+ " --no-irq: ignore IRQ latencies",
+ " --no-thread: ignore thread latencies",
+ " --no-header: do not print header",
+ " --no-summary: do not print summary",
+ " --no-index: do not print index",
+ " --with-zeros: print zero only entries",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla timerlat hist: a per-cpu histogram of the timer latency (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * timerlat_hist_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct timerlat_hist_params
+*timerlat_hist_parse_args(int argc, char *argv[])
+{
+ struct timerlat_hist_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+ params->bucket_size = 1;
+ params->entries = 256;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"bucket-size", required_argument, 0, 'b'},
+ {"debug", no_argument, 0, 'D'},
+ {"entries", required_argument, 0, 'e'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"irq", required_argument, 0, 'i'},
+ {"nano", no_argument, 0, 'n'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"stack", required_argument, 0, 's'},
+ {"thread", required_argument, 0, 'T'},
+ {"trace", optional_argument, 0, 't'},
+ {"no-irq", no_argument, 0, '0'},
+ {"no-thread", no_argument, 0, '1'},
+ {"no-header", no_argument, 0, '2'},
+ {"no-summary", no_argument, 0, '3'},
+ {"no-index", no_argument, 0, '4'},
+ {"with-zeros", no_argument, 0, '5'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:b:d:e:Dhi:np:P:s:t::T:012345",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ timerlat_hist_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'b':
+ params->bucket_size = get_llong_from_str(optarg);
+ if ((params->bucket_size == 0) || (params->bucket_size >= 1000000))
+ timerlat_hist_usage("Bucket size needs to be > 0 and <= 1000000\n");
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ timerlat_hist_usage("Invalid -D duration\n");
+ break;
+ case 'e':
+ params->entries = get_llong_from_str(optarg);
+ if ((params->entries < 10) || (params->entries > 9999999))
+ timerlat_hist_usage("Entries must be > 10 and < 9999999\n");
+ break;
+ case 'h':
+ case '?':
+ timerlat_hist_usage(NULL);
+ break;
+ case 'i':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'n':
+ params->output_divisor = 1;
+ break;
+ case 'p':
+ params->timerlat_period_us = get_llong_from_str(optarg);
+ if (params->timerlat_period_us > 1000000)
+ timerlat_hist_usage("Period longer than 1 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ timerlat_hist_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 's':
+ params->print_stack = get_llong_from_str(optarg);
+ break;
+ case 'T':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "timerlat_trace.txt";
+ break;
+ case '0': /* no irq */
+ params->no_irq = 1;
+ break;
+ case '1': /* no thread */
+ params->no_thread = 1;
+ break;
+ case '2': /* no header */
+ params->no_header = 1;
+ break;
+ case '3': /* no summary */
+ params->no_summary = 1;
+ break;
+ case '4': /* no index */
+ params->no_index = 1;
+ break;
+ case '5': /* with zeros */
+ params->with_zeros = 1;
+ break;
+ default:
+ timerlat_hist_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (params->no_irq && params->no_thread)
+ timerlat_hist_usage("no-irq and no-thread set, there is nothing to do here");
+
+ if (params->no_index && !params->with_zeros)
+ timerlat_hist_usage("no-index set with with-zeros is not set - it does not make sense");
+
+ return params;
+}
+
+/*
+ * timerlat_hist_apply_config - apply the hist configs to the initialized tool
+ */
+static int
+timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(tool->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(tool->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(tool->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->timerlat_period_us) {
+ retval = osnoise_set_timerlat_period_us(tool->context, params->timerlat_period_us);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+ }
+
+ if (params->print_stack) {
+ retval = osnoise_set_print_stack(tool->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * timerlat_init_hist - initialize a timerlat hist tool with parameters
+ */
+static struct osnoise_tool
+*timerlat_init_hist(struct timerlat_hist_params *params)
+{
+ struct osnoise_tool *tool;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ tool = osnoise_init_tool("timerlat_hist");
+ if (!tool)
+ return NULL;
+
+ tool->data = timerlat_alloc_histogram(nr_cpus, params->entries, params->bucket_size);
+ if (!tool->data)
+ goto out_err;
+
+ tool->params = params;
+
+ tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_hist_handler, tool);
+
+ return tool;
+
+out_err:
+ osnoise_destroy_tool(tool);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_hist(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * timerlat_hist_set_signals - handles the signal to stop the tool
+ */
+static void
+timerlat_hist_set_signals(struct timerlat_hist_params *params)
+{
+ signal(SIGINT, stop_hist);
+ if (params->duration) {
+ signal(SIGALRM, stop_hist);
+ alarm(params->duration);
+ }
+}
+
+int timerlat_hist_main(int argc, char *argv[])
+{
+ struct timerlat_hist_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *tool;
+ int return_value = 1;
+ int retval;
+
+ params = timerlat_hist_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ tool = timerlat_init_hist(params);
+ if (!tool) {
+ err_msg("Could not init osnoise hist\n");
+ goto out_exit;
+ }
+
+ retval = timerlat_hist_apply_config(tool, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_hist;
+ }
+
+ trace = &tool->trace;
+
+ retval = enable_timerlat(trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_hist;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("timerlat/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_hist;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("timerlat");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_hist;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ tool->start_time = time(NULL);
+ timerlat_hist_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_hist;
+ }
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+ };
+
+ timerlat_print_stats(params, tool);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_hist:
+ timerlat_free_histogram(tool->data);
+ osnoise_destroy_tool(tool);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c
new file mode 100644
index 000000000000..1ebd5291539c
--- /dev/null
+++ b/tools/tracing/rtla/src/timerlat_top.c
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "utils.h"
+#include "osnoise.h"
+#include "timerlat.h"
+
+struct timerlat_top_params {
+ char *cpus;
+ char *monitored_cpus;
+ char *trace_output;
+ unsigned long long runtime;
+ long long stop_us;
+ long long stop_total_us;
+ long long timerlat_period_us;
+ long long print_stack;
+ int sleep_time;
+ int output_divisor;
+ int duration;
+ int quiet;
+ int set_sched;
+ struct sched_attr sched_param;
+};
+
+struct timerlat_top_cpu {
+ int irq_count;
+ int thread_count;
+
+ unsigned long long cur_irq;
+ unsigned long long min_irq;
+ unsigned long long sum_irq;
+ unsigned long long max_irq;
+
+ unsigned long long cur_thread;
+ unsigned long long min_thread;
+ unsigned long long sum_thread;
+ unsigned long long max_thread;
+};
+
+struct timerlat_top_data {
+ struct timerlat_top_cpu *cpu_data;
+ int nr_cpus;
+};
+
+/*
+ * timerlat_free_top - free runtime data
+ */
+static void
+timerlat_free_top(struct timerlat_top_data *data)
+{
+ free(data->cpu_data);
+ free(data);
+}
+
+/*
+ * timerlat_alloc_histogram - alloc runtime data
+ */
+static struct timerlat_top_data *timerlat_alloc_top(int nr_cpus)
+{
+ struct timerlat_top_data *data;
+ int cpu;
+
+ data = calloc(1, sizeof(*data));
+ if (!data)
+ return NULL;
+
+ data->nr_cpus = nr_cpus;
+
+ /* one set of histograms per CPU */
+ data->cpu_data = calloc(1, sizeof(*data->cpu_data) * nr_cpus);
+ if (!data->cpu_data)
+ goto cleanup;
+
+ /* set the min to max */
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ data->cpu_data[cpu].min_irq = ~0;
+ data->cpu_data[cpu].min_thread = ~0;
+ }
+
+ return data;
+
+cleanup:
+ timerlat_free_top(data);
+ return NULL;
+}
+
+/*
+ * timerlat_hist_update - record a new timerlat occurent on cpu, updating data
+ */
+static void
+timerlat_top_update(struct osnoise_tool *tool, int cpu,
+ unsigned long long thread,
+ unsigned long long latency)
+{
+ struct timerlat_top_data *data = tool->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+
+ if (!thread) {
+ cpu_data->irq_count++;
+ cpu_data->cur_irq = latency;
+ update_min(&cpu_data->min_irq, &latency);
+ update_sum(&cpu_data->sum_irq, &latency);
+ update_max(&cpu_data->max_irq, &latency);
+ } else {
+ cpu_data->thread_count++;
+ cpu_data->cur_thread = latency;
+ update_min(&cpu_data->min_thread, &latency);
+ update_sum(&cpu_data->sum_thread, &latency);
+ update_max(&cpu_data->max_thread, &latency);
+ }
+}
+
+/*
+ * timerlat_top_handler - this is the handler for timerlat tracer events
+ */
+static int
+timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct trace_instance *trace = context;
+ unsigned long long latency, thread;
+ struct osnoise_tool *top;
+ int cpu = record->cpu;
+
+ top = container_of(trace, struct osnoise_tool, trace);
+
+ tep_get_field_val(s, event, "context", record, &thread, 1);
+ tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+
+ timerlat_top_update(top, cpu, thread, latency);
+
+ return 0;
+}
+
+/*
+ * timerlat_top_header - print the header of the tool output
+ */
+static void timerlat_top_header(struct osnoise_tool *top)
+{
+ struct timerlat_top_params *params = top->params;
+ struct trace_seq *s = top->trace.seq;
+ char duration[26];
+
+ get_duration(top->start_time, duration, sizeof(duration));
+
+ trace_seq_printf(s, "\033[2;37;40m");
+ trace_seq_printf(s, " Timer Latency ");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+
+ trace_seq_printf(s, "%-6s | IRQ Timer Latency (%s) | Thread Timer Latency (%s)\n", duration,
+ params->output_divisor == 1 ? "ns" : "us",
+ params->output_divisor == 1 ? "ns" : "us");
+
+ trace_seq_printf(s, "\033[2;30;47m");
+ trace_seq_printf(s, "CPU COUNT | cur min avg max | cur min avg max");
+ trace_seq_printf(s, "\033[0;0;0m");
+ trace_seq_printf(s, "\n");
+}
+
+/*
+ * timerlat_top_print - prints the output of a given CPU
+ */
+static void timerlat_top_print(struct osnoise_tool *top, int cpu)
+{
+
+ struct timerlat_top_params *params = top->params;
+ struct timerlat_top_data *data = top->data;
+ struct timerlat_top_cpu *cpu_data = &data->cpu_data[cpu];
+ int divisor = params->output_divisor;
+ struct trace_seq *s = top->trace.seq;
+
+ if (divisor == 0)
+ return;
+
+ /*
+ * Skip if no data is available: is this cpu offline?
+ */
+ if (!cpu_data->irq_count && !cpu_data->thread_count)
+ return;
+
+ /*
+ * Unless trace is being lost, IRQ counter is always the max.
+ */
+ trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count);
+
+ if (!cpu_data->irq_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - |");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor);
+ trace_seq_printf(s, "%9llu ", (cpu_data->sum_irq / cpu_data->irq_count) / divisor);
+ trace_seq_printf(s, "%9llu |", cpu_data->max_irq / divisor);
+ }
+
+ if (!cpu_data->thread_count) {
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " - ");
+ trace_seq_printf(s, " -\n");
+ } else {
+ trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor);
+ trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor);
+ trace_seq_printf(s, "%9llu ",
+ (cpu_data->sum_thread / cpu_data->thread_count) / divisor);
+ trace_seq_printf(s, "%9llu\n", cpu_data->max_thread / divisor);
+ }
+}
+
+/*
+ * clear_terminal - clears the output terminal
+ */
+static void clear_terminal(struct trace_seq *seq)
+{
+ if (!config_debug)
+ trace_seq_printf(seq, "\033c");
+}
+
+/*
+ * timerlat_print_stats - print data for all cpus
+ */
+static void
+timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *top)
+{
+ struct trace_instance *trace = &top->trace;
+ static int nr_cpus = -1;
+ int i;
+
+ if (nr_cpus == -1)
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (!params->quiet)
+ clear_terminal(trace->seq);
+
+ timerlat_top_header(top);
+
+ for (i = 0; i < nr_cpus; i++) {
+ if (params->cpus && !params->monitored_cpus[i])
+ continue;
+ timerlat_top_print(top, i);
+ }
+
+ trace_seq_do_printf(trace->seq);
+ trace_seq_reset(trace->seq);
+}
+
+/*
+ * timerlat_top_usage - prints timerlat top usage message
+ */
+static void timerlat_top_usage(char *usage)
+{
+ int i;
+
+ static const char *const msg[] = {
+ "",
+ " usage: rtla timerlat [top] [-h] [-q] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] [-t[=file]] \\",
+ " [-c cpu-list] [-P priority]",
+ "",
+ " -h/--help: print this menu",
+ " -p/--period us: timerlat period in us",
+ " -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ " -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+ " -s/--stack us: save the stack trace at the IRQ if a thread latency is higher than the argument in us",
+ " -c/--cpus cpus: run the tracer only on the given cpus",
+ " -d/--duration time[m|h|d]: duration of the session in seconds",
+ " -D/--debug: print debug info",
+ " -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ " -n/--nano: display data in nanoseconds",
+ " -q/--quiet print only a summary at the end",
+ " -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+ " o:prio - use SCHED_OTHER with prio",
+ " r:prio - use SCHED_RR with prio",
+ " f:prio - use SCHED_FIFO with prio",
+ " d:runtime[us|ms|s]:period[us|ms|s] - use SCHED_DEADLINE with runtime and period",
+ " in nanoseconds",
+ NULL,
+ };
+
+ if (usage)
+ fprintf(stderr, "%s\n", usage);
+
+ fprintf(stderr, "rtla timerlat top: a per-cpu summary of the timer latency (version %s)\n",
+ VERSION);
+
+ for (i = 0; msg[i]; i++)
+ fprintf(stderr, "%s\n", msg[i]);
+ exit(1);
+}
+
+/*
+ * timerlat_top_parse_args - allocs, parse and fill the cmd line parameters
+ */
+static struct timerlat_top_params
+*timerlat_top_parse_args(int argc, char **argv)
+{
+ struct timerlat_top_params *params;
+ int retval;
+ int c;
+
+ params = calloc(1, sizeof(*params));
+ if (!params)
+ exit(1);
+
+ /* display data in microseconds */
+ params->output_divisor = 1000;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"cpus", required_argument, 0, 'c'},
+ {"debug", no_argument, 0, 'D'},
+ {"duration", required_argument, 0, 'd'},
+ {"help", no_argument, 0, 'h'},
+ {"irq", required_argument, 0, 'i'},
+ {"nano", no_argument, 0, 'n'},
+ {"period", required_argument, 0, 'p'},
+ {"priority", required_argument, 0, 'P'},
+ {"quiet", no_argument, 0, 'q'},
+ {"stack", required_argument, 0, 's'},
+ {"thread", required_argument, 0, 'T'},
+ {"trace", optional_argument, 0, 't'},
+ {0, 0, 0, 0}
+ };
+
+ /* getopt_long stores the option index here. */
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "c:d:Dhi:np:P:qs:t::T:",
+ long_options, &option_index);
+
+ /* detect the end of the options. */
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ if (retval)
+ timerlat_top_usage("\nInvalid -c cpu list\n");
+ params->cpus = optarg;
+ break;
+ case 'D':
+ config_debug = 1;
+ break;
+ case 'd':
+ params->duration = parse_seconds_duration(optarg);
+ if (!params->duration)
+ timerlat_top_usage("Invalid -D duration\n");
+ break;
+ case 'h':
+ case '?':
+ timerlat_top_usage(NULL);
+ break;
+ case 'i':
+ params->stop_us = get_llong_from_str(optarg);
+ break;
+ case 'n':
+ params->output_divisor = 1;
+ break;
+ case 'p':
+ params->timerlat_period_us = get_llong_from_str(optarg);
+ if (params->timerlat_period_us > 1000000)
+ timerlat_top_usage("Period longer than 1 s\n");
+ break;
+ case 'P':
+ retval = parse_prio(optarg, &params->sched_param);
+ if (retval == -1)
+ timerlat_top_usage("Invalid -P priority");
+ params->set_sched = 1;
+ break;
+ case 'q':
+ params->quiet = 1;
+ break;
+ case 's':
+ params->print_stack = get_llong_from_str(optarg);
+ break;
+ case 'T':
+ params->stop_total_us = get_llong_from_str(optarg);
+ break;
+ case 't':
+ if (optarg)
+ /* skip = */
+ params->trace_output = &optarg[1];
+ else
+ params->trace_output = "timerlat_trace.txt";
+ break;
+ default:
+ timerlat_top_usage("Invalid option");
+ }
+ }
+
+ if (geteuid()) {
+ err_msg("rtla needs root permission\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return params;
+}
+
+/*
+ * timerlat_top_apply_config - apply the top configs to the initialized tool
+ */
+static int
+timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params *params)
+{
+ int retval;
+
+ if (!params->sleep_time)
+ params->sleep_time = 1;
+
+ if (params->cpus) {
+ retval = osnoise_set_cpus(top->context, params->cpus);
+ if (retval) {
+ err_msg("Failed to apply CPUs config\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_us) {
+ retval = osnoise_set_stop_us(top->context, params->stop_us);
+ if (retval) {
+ err_msg("Failed to set stop us\n");
+ goto out_err;
+ }
+ }
+
+ if (params->stop_total_us) {
+ retval = osnoise_set_stop_total_us(top->context, params->stop_total_us);
+ if (retval) {
+ err_msg("Failed to set stop total us\n");
+ goto out_err;
+ }
+ }
+
+
+ if (params->timerlat_period_us) {
+ retval = osnoise_set_timerlat_period_us(top->context, params->timerlat_period_us);
+ if (retval) {
+ err_msg("Failed to set timerlat period\n");
+ goto out_err;
+ }
+ }
+
+
+ if (params->print_stack) {
+ retval = osnoise_set_print_stack(top->context, params->print_stack);
+ if (retval) {
+ err_msg("Failed to set print stack\n");
+ goto out_err;
+ }
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+/*
+ * timerlat_init_top - initialize a timerlat top tool with parameters
+ */
+static struct osnoise_tool
+*timerlat_init_top(struct timerlat_top_params *params)
+{
+ struct osnoise_tool *top;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ top = osnoise_init_tool("timerlat_top");
+ if (!top)
+ return NULL;
+
+ top->data = timerlat_alloc_top(nr_cpus);
+ if (!top->data)
+ goto out_err;
+
+ top->params = params;
+
+ tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
+ timerlat_top_handler, top);
+
+ return top;
+
+out_err:
+ osnoise_destroy_tool(top);
+ return NULL;
+}
+
+static int stop_tracing;
+static void stop_top(int sig)
+{
+ stop_tracing = 1;
+}
+
+/*
+ * timerlat_top_set_signals - handles the signal to stop the tool
+ */
+static void
+timerlat_top_set_signals(struct timerlat_top_params *params)
+{
+ signal(SIGINT, stop_top);
+ if (params->duration) {
+ signal(SIGALRM, stop_top);
+ alarm(params->duration);
+ }
+}
+
+int timerlat_top_main(int argc, char *argv[])
+{
+ struct timerlat_top_params *params;
+ struct trace_instance *trace;
+ struct osnoise_tool *record;
+ struct osnoise_tool *top;
+ int return_value = 1;
+ int retval;
+
+ params = timerlat_top_parse_args(argc, argv);
+ if (!params)
+ exit(1);
+
+ top = timerlat_init_top(params);
+ if (!top) {
+ err_msg("Could not init osnoise top\n");
+ goto out_exit;
+ }
+
+ retval = timerlat_top_apply_config(top, params);
+ if (retval) {
+ err_msg("Could not apply config\n");
+ goto out_top;
+ }
+
+ trace = &top->trace;
+
+ retval = enable_timerlat(trace);
+ if (retval) {
+ err_msg("Failed to enable timerlat tracer\n");
+ goto out_top;
+ }
+
+ if (params->set_sched) {
+ retval = set_comm_sched_attr("timerlat/", &params->sched_param);
+ if (retval) {
+ err_msg("Failed to set sched parameters\n");
+ goto out_top;
+ }
+ }
+
+ trace_instance_start(trace);
+
+ if (params->trace_output) {
+ record = osnoise_init_trace_tool("timerlat");
+ if (!record) {
+ err_msg("Failed to enable the trace instance\n");
+ goto out_top;
+ }
+ trace_instance_start(&record->trace);
+ }
+
+ top->start_time = time(NULL);
+ timerlat_top_set_signals(params);
+
+ while (!stop_tracing) {
+ sleep(params->sleep_time);
+
+ retval = tracefs_iterate_raw_events(trace->tep,
+ trace->inst,
+ NULL,
+ 0,
+ collect_registered_events,
+ trace);
+ if (retval < 0) {
+ err_msg("Error iterating on events\n");
+ goto out_top;
+ }
+
+ if (!params->quiet)
+ timerlat_print_stats(params, top);
+
+ if (!tracefs_trace_is_on(trace->inst))
+ break;
+
+ };
+
+ timerlat_print_stats(params, top);
+
+ return_value = 0;
+
+ if (!tracefs_trace_is_on(trace->inst)) {
+ printf("rtla timelat hit stop tracing\n");
+ if (params->trace_output) {
+ printf(" Saving trace to %s\n", params->trace_output);
+ save_trace_to_file(record->trace.inst, params->trace_output);
+ }
+ }
+
+out_top:
+ timerlat_free_top(top->data);
+ osnoise_destroy_tool(top);
+ if (params->trace_output)
+ osnoise_destroy_tool(record);
+ free(params);
+out_exit:
+ exit(return_value);
+}
diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c
new file mode 100644
index 000000000000..107a0c6387f7
--- /dev/null
+++ b/tools/tracing/rtla/src/trace.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/sendfile.h>
+#include <tracefs.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "trace.h"
+#include "utils.h"
+
+/*
+ * enable_tracer_by_name - enable a tracer on the given instance
+ */
+int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name)
+{
+ enum tracefs_tracers tracer;
+ int retval;
+
+ tracer = TRACEFS_TRACER_CUSTOM;
+
+ debug_msg("enabling %s tracer\n", tracer_name);
+
+ retval = tracefs_tracer_set(inst, tracer, tracer_name);
+ if (retval < 0) {
+ if (errno == ENODEV)
+ err_msg("tracer %s not found!\n", tracer_name);
+
+ err_msg("failed to enable the tracer %s\n", tracer_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * disable_tracer - set nop tracer to the insta
+ */
+void disable_tracer(struct tracefs_instance *inst)
+{
+ enum tracefs_tracers t = TRACEFS_TRACER_NOP;
+ int retval;
+
+ retval = tracefs_tracer_set(inst, t);
+ if (retval < 0)
+ err_msg("oops, error disabling tracer\n");
+}
+
+/*
+ * create_instance - create a trace instance with *instance_name
+ */
+struct tracefs_instance *create_instance(char *instance_name)
+{
+ return tracefs_instance_create(instance_name);
+}
+
+/*
+ * destroy_instance - remove a trace instance and free the data
+ */
+void destroy_instance(struct tracefs_instance *inst)
+{
+ tracefs_instance_destroy(inst);
+ tracefs_instance_free(inst);
+}
+
+/*
+ * save_trace_to_file - save the trace output of the instance to the file
+ */
+int save_trace_to_file(struct tracefs_instance *inst, const char *filename)
+{
+ const char *file = "trace";
+ mode_t mode = 0644;
+ char buffer[4096];
+ int out_fd, in_fd;
+ int retval = -1;
+
+ in_fd = tracefs_instance_file_open(inst, file, O_RDONLY);
+ if (in_fd < 0) {
+ err_msg("Failed to open trace file\n");
+ return -1;
+ }
+
+ out_fd = creat(filename, mode);
+ if (out_fd < 0) {
+ err_msg("Failed to create output file %s\n", filename);
+ goto out_close_in;
+ }
+
+ do {
+ retval = read(in_fd, buffer, sizeof(buffer));
+ if (retval <= 0)
+ goto out_close;
+
+ retval = write(out_fd, buffer, retval);
+ if (retval < 0)
+ goto out_close;
+ } while (retval > 0);
+
+ retval = 0;
+out_close:
+ close(out_fd);
+out_close_in:
+ close(in_fd);
+ return retval;
+}
+
+/*
+ * collect_registered_events - call the existing callback function for the event
+ *
+ * If an event has a registered callback function, call it.
+ * Otherwise, ignore the event.
+ */
+int
+collect_registered_events(struct tep_event *event, struct tep_record *record,
+ int cpu, void *context)
+{
+ struct trace_instance *trace = context;
+ struct trace_seq *s = trace->seq;
+
+ if (!event->handler)
+ return 0;
+
+ event->handler(s, record, event, context);
+
+ return 0;
+}
+
+/*
+ * trace_instance_destroy - destroy and free a rtla trace instance
+ */
+void trace_instance_destroy(struct trace_instance *trace)
+{
+ if (trace->inst) {
+ disable_tracer(trace->inst);
+ destroy_instance(trace->inst);
+ }
+
+ if (trace->seq)
+ free(trace->seq);
+
+ if (trace->tep)
+ tep_free(trace->tep);
+}
+
+/*
+ * trace_instance_init - create an rtla trace instance
+ *
+ * It is more than the tracefs instance, as it contains other
+ * things required for the tracing, such as the local events and
+ * a seq file.
+ *
+ * Note that the trace instance is returned disabled. This allows
+ * the tool to apply some other configs, like setting priority
+ * to the kernel threads, before starting generating trace entries.
+ */
+int trace_instance_init(struct trace_instance *trace, char *tool_name)
+{
+ trace->seq = calloc(1, sizeof(*trace->seq));
+ if (!trace->seq)
+ goto out_err;
+
+ trace_seq_init(trace->seq);
+
+ trace->inst = create_instance(tool_name);
+ if (!trace->inst)
+ goto out_err;
+
+ trace->tep = tracefs_local_events(NULL);
+ if (!trace->tep)
+ goto out_err;
+
+ /*
+ * Let the main enable the record after setting some other
+ * things such as the priority of the tracer's threads.
+ */
+ tracefs_trace_off(trace->inst);
+
+ return 0;
+
+out_err:
+ trace_instance_destroy(trace);
+ return 1;
+}
+
+/*
+ * trace_instance_start - start tracing a given rtla instance
+ */
+int trace_instance_start(struct trace_instance *trace)
+{
+ return tracefs_trace_on(trace->inst);
+}
diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h
new file mode 100644
index 000000000000..0ea1df0ad9a7
--- /dev/null
+++ b/tools/tracing/rtla/src/trace.h
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <tracefs.h>
+#include <stddef.h>
+
+struct trace_instance {
+ struct tracefs_instance *inst;
+ struct tep_handle *tep;
+ struct trace_seq *seq;
+};
+
+int trace_instance_init(struct trace_instance *trace, char *tool_name);
+int trace_instance_start(struct trace_instance *trace);
+void trace_instance_destroy(struct trace_instance *trace);
+
+struct trace_seq *get_trace_seq(void);
+int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name);
+void disable_tracer(struct tracefs_instance *inst);
+
+int enable_osnoise(struct trace_instance *trace);
+int enable_timerlat(struct trace_instance *trace);
+
+struct tracefs_instance *create_instance(char *instance_name);
+void destroy_instance(struct tracefs_instance *inst);
+
+int save_trace_to_file(struct tracefs_instance *inst, const char *filename);
+int collect_registered_events(struct tep_event *tep, struct tep_record *record,
+ int cpu, void *context);
diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c
new file mode 100644
index 000000000000..1c9f0eea6166
--- /dev/null
+++ b/tools/tracing/rtla/src/utils.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
+ */
+
+#include <proc/readproc.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+#define MAX_MSG_LENGTH 1024
+int config_debug;
+
+/*
+ * err_msg - print an error message to the stderr
+ */
+void err_msg(const char *fmt, ...)
+{
+ char message[MAX_MSG_LENGTH];
+ va_list ap;
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", message);
+}
+
+/*
+ * debug_msg - print a debug message to stderr if debug is set
+ */
+void debug_msg(const char *fmt, ...)
+{
+ char message[MAX_MSG_LENGTH];
+ va_list ap;
+
+ if (!config_debug)
+ return;
+
+ va_start(ap, fmt);
+ vsnprintf(message, sizeof(message), fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", message);
+}
+
+/*
+ * get_llong_from_str - get a long long int from a string
+ */
+long long get_llong_from_str(char *start)
+{
+ long long value;
+ char *end;
+
+ errno = 0;
+ value = strtoll(start, &end, 10);
+ if (errno || start == end)
+ return -1;
+
+ return value;
+}
+
+/*
+ * get_duration - fill output with a human readable duration since start_time
+ */
+void get_duration(time_t start_time, char *output, int output_size)
+{
+ time_t now = time(NULL);
+ struct tm *tm_info;
+ time_t duration;
+
+ duration = difftime(now, start_time);
+ tm_info = localtime(&duration);
+
+ snprintf(output, output_size, "%3d %02d:%02d:%02d",
+ tm_info->tm_yday,
+ tm_info->tm_hour - 1,
+ tm_info->tm_min,
+ tm_info->tm_sec);
+}
+
+/*
+ * parse_cpu_list - parse a cpu_list filling a char vector with cpus set
+ *
+ * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set the char
+ * in the monitored_cpus.
+ *
+ * XXX: convert to a bitmask.
+ */
+int parse_cpu_list(char *cpu_list, char **monitored_cpus)
+{
+ char *mon_cpus;
+ const char *p;
+ int end_cpu;
+ int nr_cpus;
+ int cpu;
+ int i;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ mon_cpus = malloc(nr_cpus * sizeof(char));
+ memset(mon_cpus, 0, (nr_cpus * sizeof(char)));
+
+ for (p = cpu_list; *p; ) {
+ cpu = atoi(p);
+ if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
+ goto err;
+
+ while (isdigit(*p))
+ p++;
+ if (*p == '-') {
+ p++;
+ end_cpu = atoi(p);
+ if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
+ goto err;
+ while (isdigit(*p))
+ p++;
+ } else
+ end_cpu = cpu;
+
+ if (cpu == end_cpu) {
+ debug_msg("cpu_list: adding cpu %d\n", cpu);
+ mon_cpus[cpu] = 1;
+ } else {
+ for (i = cpu; i <= end_cpu; i++) {
+ debug_msg("cpu_list: adding cpu %d\n", i);
+ mon_cpus[i] = 1;
+ }
+ }
+
+ if (*p == ',')
+ p++;
+ }
+
+ *monitored_cpus = mon_cpus;
+
+ return 0;
+
+err:
+ debug_msg("Error parsing the cpu list %s", cpu_list);
+ return 1;
+}
+
+/*
+ * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
+ */
+long parse_seconds_duration(char *val)
+{
+ char *end;
+ long t;
+
+ t = strtol(val, &end, 10);
+
+ if (end) {
+ switch (*end) {
+ case 's':
+ case 'S':
+ break;
+ case 'm':
+ case 'M':
+ t *= 60;
+ break;
+ case 'h':
+ case 'H':
+ t *= 60 * 60;
+ break;
+
+ case 'd':
+ case 'D':
+ t *= 24 * 60 * 60;
+ break;
+ }
+ }
+
+ return t;
+}
+
+/*
+ * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
+ */
+long parse_ns_duration(char *val)
+{
+ char *end;
+ long t;
+
+ t = strtol(val, &end, 10);
+
+ if (end) {
+ if (!strncmp(end, "ns", 2)) {
+ return t;
+ } else if (!strncmp(end, "us", 2)) {
+ t *= 1000;
+ return t;
+ } else if (!strncmp(end, "ms", 2)) {
+ t *= 1000 * 1000;
+ return t;
+ } else if (!strncmp(end, "s", 1)) {
+ t *= 1000 * 1000 * 1000;
+ return t;
+ }
+ return -1;
+ }
+
+ return t;
+}
+
+/*
+ * This is a set of helper functions to use SCHED_DEADLINE.
+ */
+#ifdef __x86_64__
+# define __NR_sched_setattr 314
+# define __NR_sched_getattr 315
+#elif __i386__
+# define __NR_sched_setattr 351
+# define __NR_sched_getattr 352
+#elif __arm__
+# define __NR_sched_setattr 380
+# define __NR_sched_getattr 381
+#elif __aarch64__
+# define __NR_sched_setattr 274
+# define __NR_sched_getattr 275
+#elif __powerpc__
+# define __NR_sched_setattr 355
+# define __NR_sched_getattr 356
+#elif __s390x__
+# define __NR_sched_setattr 345
+# define __NR_sched_getattr 346
+#endif
+
+#define SCHED_DEADLINE 6
+
+static inline int sched_setattr(pid_t pid, const struct sched_attr *attr,
+ unsigned int flags) {
+ return syscall(__NR_sched_setattr, pid, attr, flags);
+}
+
+static inline int sched_getattr(pid_t pid, struct sched_attr *attr,
+ unsigned int size, unsigned int flags)
+{
+ return syscall(__NR_sched_getattr, pid, attr, size, flags);
+}
+
+int __set_sched_attr(int pid, struct sched_attr *attr)
+{
+ int flags = 0;
+ int retval;
+
+ retval = sched_setattr(pid, attr, flags);
+ if (retval < 0) {
+ err_msg("boost_with_deadline failed to boost pid %d: %s\n",
+ pid, strerror(errno));
+ return 1;
+ }
+
+ return 0;
+}
+/*
+ * set_comm_sched_attr - set sched params to threads starting with char *comm
+ *
+ * This function uses procps to list the currently running threads and then
+ * set the sched_attr *attr to the threads that start with char *comm. It is
+ * mainly used to set the priority to the kernel threads created by the
+ * tracers.
+ */
+int set_comm_sched_attr(const char *comm, struct sched_attr *attr)
+{
+ int flags = PROC_FILLCOM | PROC_FILLSTAT;
+ PROCTAB *ptp;
+ proc_t task;
+ int retval;
+
+ ptp = openproc(flags);
+ if (!ptp) {
+ err_msg("error openproc()\n");
+ return -ENOENT;
+ }
+
+ memset(&task, 0, sizeof(task));
+
+ while (readproc(ptp, &task)) {
+ retval = strncmp(comm, task.cmd, strlen(comm));
+ if (retval)
+ continue;
+ retval = __set_sched_attr(task.tid, attr);
+ if (retval)
+ goto out_err;
+ }
+
+ closeproc(ptp);
+ return 0;
+
+out_err:
+ closeproc(ptp);
+ return 1;
+}
+
+#define INVALID_VAL (~0L)
+static long get_long_ns_after_colon(char *start)
+{
+ long val = INVALID_VAL;
+
+ /* find the ":" */
+ start = strstr(start, ":");
+ if (!start)
+ return -1;
+
+ /* skip ":" */
+ start++;
+ val = parse_ns_duration(start);
+
+ return val;
+}
+
+static long get_long_after_colon(char *start)
+{
+ long val = INVALID_VAL;
+
+ /* find the ":" */
+ start = strstr(start, ":");
+ if (!start)
+ return -1;
+
+ /* skip ":" */
+ start++;
+ val = get_llong_from_str(start);
+
+ return val;
+}
+
+/*
+ * parse priority in the format:
+ * SCHED_OTHER:
+ * o:<prio>
+ * O:<prio>
+ * SCHED_RR:
+ * r:<prio>
+ * R:<prio>
+ * SCHED_FIFO:
+ * f:<prio>
+ * F:<prio>
+ * SCHED_DEADLINE:
+ * d:runtime:period
+ * D:runtime:period
+ */
+int parse_prio(char *arg, struct sched_attr *sched_param)
+{
+ long prio;
+ long runtime;
+ long period;
+
+ memset(sched_param, 0, sizeof(*sched_param));
+ sched_param->size = sizeof(*sched_param);
+
+ switch (arg[0]) {
+ case 'd':
+ case 'D':
+ /* d:runtime:period */
+ if (strlen(arg) < 4)
+ return -1;
+
+ runtime = get_long_ns_after_colon(arg);
+ if (runtime == INVALID_VAL)
+ return -1;
+
+ period = get_long_ns_after_colon(&arg[2]);
+ if (period == INVALID_VAL)
+ return -1;
+
+ if (runtime > period)
+ return -1;
+
+ sched_param->sched_policy = SCHED_DEADLINE;
+ sched_param->sched_runtime = runtime;
+ sched_param->sched_deadline = period;
+ sched_param->sched_period = period;
+ break;
+ case 'f':
+ case 'F':
+ /* f:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_FIFO))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_FIFO))
+ return -1;
+
+ sched_param->sched_policy = SCHED_FIFO;
+ sched_param->sched_priority = prio;
+ break;
+ case 'r':
+ case 'R':
+ /* r:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_RR))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_RR))
+ return -1;
+
+ sched_param->sched_policy = SCHED_RR;
+ sched_param->sched_priority = prio;
+ break;
+ case 'o':
+ case 'O':
+ /* o:prio */
+ prio = get_long_after_colon(arg);
+ if (prio == INVALID_VAL)
+ return -1;
+
+ if (prio < sched_get_priority_min(SCHED_OTHER))
+ return -1;
+ if (prio > sched_get_priority_max(SCHED_OTHER))
+ return -1;
+
+ sched_param->sched_policy = SCHED_OTHER;
+ sched_param->sched_priority = prio;
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h
new file mode 100644
index 000000000000..9aa962319ca2
--- /dev/null
+++ b/tools/tracing/rtla/src/utils.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <time.h>
+
+/*
+ * '18446744073709551615\0'
+ */
+#define BUFF_U64_STR_SIZE 24
+
+#define container_of(ptr, type, member)({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)) ; })
+
+extern int config_debug;
+void debug_msg(const char *fmt, ...);
+void err_msg(const char *fmt, ...);
+
+long parse_seconds_duration(char *val);
+void get_duration(time_t start_time, char *output, int output_size);
+
+int parse_cpu_list(char *cpu_list, char **monitored_cpus);
+long long get_llong_from_str(char *start);
+
+static inline void
+update_min(unsigned long long *a, unsigned long long *b)
+{
+ if (*a > *b)
+ *a = *b;
+}
+
+static inline void
+update_max(unsigned long long *a, unsigned long long *b)
+{
+ if (*a < *b)
+ *a = *b;
+}
+
+static inline void
+update_sum(unsigned long long *a, unsigned long long *b)
+{
+ *a += *b;
+}
+
+struct sched_attr {
+ uint32_t size;
+ uint32_t sched_policy;
+ uint64_t sched_flags;
+ int32_t sched_nice;
+ uint32_t sched_priority;
+ uint64_t sched_runtime;
+ uint64_t sched_deadline;
+ uint64_t sched_period;
+};
+
+int parse_prio(char *arg, struct sched_attr *sched_param);
+int set_comm_sched_attr(const char *comm, struct sched_attr *attr);