aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc2
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c169
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c149
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_test.c10
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c8
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h4
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h6
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h4
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c44
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c40
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c14
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c122
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c13
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c19
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh34
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh5
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh1
-rwxr-xr-xtools/testing/selftests/netfilter/rpath.sh147
-rw-r--r--tools/testing/selftests/proc/.gitignore1
-rw-r--r--tools/testing/selftests/proc/Makefile1
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c386
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c56
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c49
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c22
27 files changed, 1139 insertions, 171 deletions
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 3145b0f1835c..8d26d5505808 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -85,7 +85,7 @@ run_enable_disable() {
echo $check_disable > $EVENT_ENABLE
done
sleep $SLEEP_TIME
- echo " make sure it's still works"
+ echo " make sure it still works"
test_event_enabled $check_enable_star
reset_ftrace_filter
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 45d9aee1c0d8..2f0d705db9db 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/aarch32_id_regs
/aarch64/arch_timer
/aarch64/debug-exceptions
/aarch64/get-reg-list
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index fde3ae8cfa4c..0172eb6cb6ee 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -147,6 +147,7 @@ TEST_GEN_PROGS_x86_64 += system_counter_offset_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
+TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
new file mode 100644
index 000000000000..6f9c1f19c7f6
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
+ * and WI from userspace.
+ */
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define BAD_ID_REG_VAL 0x1badc0deul
+
+#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
+
+static void guest_main(void)
+{
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
+
+ GUEST_DONE();
+}
+
+static void test_guest_raz(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static uint64_t raz_wi_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
+};
+
+static void test_user_raz_wi(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
+ uint64_t reg_id = raz_wi_reg_ids[i];
+ uint64_t val;
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ ASSERT_EQ(val, 0);
+
+ /*
+ * Expect the ioctl to succeed with no effect on the register
+ * value.
+ */
+ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ ASSERT_EQ(val, 0);
+ }
+}
+
+static uint64_t raz_invariant_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
+};
+
+static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
+{
+ int i, r;
+
+ for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
+ uint64_t reg_id = raz_invariant_reg_ids[i];
+ uint64_t val;
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ ASSERT_EQ(val, 0);
+
+ r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ ASSERT_EQ(val, 0);
+ }
+}
+
+
+
+static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
+{
+ uint64_t val, el0;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+
+ el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT;
+ return el0 == ID_AA64PFR0_ELx_64BIT_ONLY;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ TEST_REQUIRE(vcpu_aarch64_only(vcpu));
+
+ ucall_init(vm, NULL);
+
+ test_user_raz_wi(vcpu);
+ test_user_raz_invariant(vcpu);
+ test_guest_raz(vcpu);
+
+ ucall_uninit(vm);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 2ee35cf9801e..947bd201435c 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -22,6 +22,7 @@
#define SPSR_SS (1 << 21)
extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start;
+extern unsigned char iter_ss_begin, iter_ss_end;
static volatile uint64_t sw_bp_addr, hw_bp_addr;
static volatile uint64_t wp_addr, wp_data_addr;
static volatile uint64_t svc_addr;
@@ -238,6 +239,46 @@ static void guest_svc_handler(struct ex_regs *regs)
svc_addr = regs->pc;
}
+enum single_step_op {
+ SINGLE_STEP_ENABLE = 0,
+ SINGLE_STEP_DISABLE = 1,
+};
+
+static void guest_code_ss(int test_cnt)
+{
+ uint64_t i;
+ uint64_t bvr, wvr, w_bvr, w_wvr;
+
+ for (i = 0; i < test_cnt; i++) {
+ /* Bits [1:0] of dbg{b,w}vr are RES0 */
+ w_bvr = i << 2;
+ w_wvr = i << 2;
+
+ /* Enable Single Step execution */
+ GUEST_SYNC(SINGLE_STEP_ENABLE);
+
+ /*
+ * The userspace will veriry that the pc is as expected during
+ * single step execution between iter_ss_begin and iter_ss_end.
+ */
+ asm volatile("iter_ss_begin:nop\n");
+
+ write_sysreg(w_bvr, dbgbvr0_el1);
+ write_sysreg(w_wvr, dbgwvr0_el1);
+ bvr = read_sysreg(dbgbvr0_el1);
+ wvr = read_sysreg(dbgwvr0_el1);
+
+ asm volatile("iter_ss_end:\n");
+
+ /* Disable Single Step execution */
+ GUEST_SYNC(SINGLE_STEP_DISABLE);
+
+ GUEST_ASSERT(bvr == w_bvr);
+ GUEST_ASSERT(wvr == w_wvr);
+ }
+ GUEST_DONE();
+}
+
static int debug_version(struct kvm_vcpu *vcpu)
{
uint64_t id_aa64dfr0;
@@ -246,7 +287,7 @@ static int debug_version(struct kvm_vcpu *vcpu)
return id_aa64dfr0 & 0xf;
}
-int main(int argc, char *argv[])
+static void test_guest_debug_exceptions(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -259,9 +300,6 @@ int main(int argc, char *argv[])
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vcpu);
- __TEST_REQUIRE(debug_version(vcpu) >= 6,
- "Armv8 debug architecture not supported.");
-
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_EC_BRK_INS, guest_sw_bp_handler);
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
@@ -294,5 +332,108 @@ int main(int argc, char *argv[])
done:
kvm_vm_free(vm);
+}
+
+void test_single_step_from_userspace(int test_cnt)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ struct kvm_run *run;
+ uint64_t pc, cmd;
+ uint64_t test_pc = 0;
+ bool ss_enable = false;
+ struct kvm_guest_debug debug = {};
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
+ ucall_init(vm, NULL);
+ run = vcpu->run;
+ vcpu_args_set(vcpu, 1, test_cnt);
+
+ while (1) {
+ vcpu_run(vcpu);
+ if (run->exit_reason != KVM_EXIT_DEBUG) {
+ cmd = get_ucall(vcpu, &uc);
+ if (cmd == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ } else if (cmd == UCALL_DONE) {
+ break;
+ }
+
+ TEST_ASSERT(cmd == UCALL_SYNC,
+ "Unexpected ucall cmd 0x%lx", cmd);
+
+ if (uc.args[1] == SINGLE_STEP_ENABLE) {
+ debug.control = KVM_GUESTDBG_ENABLE |
+ KVM_GUESTDBG_SINGLESTEP;
+ ss_enable = true;
+ } else {
+ debug.control = SINGLE_STEP_DISABLE;
+ ss_enable = false;
+ }
+
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
+
+ /* Check if the current pc is expected. */
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
+ TEST_ASSERT(!test_pc || pc == test_pc,
+ "Unexpected pc 0x%lx (expected 0x%lx)",
+ pc, test_pc);
+
+ /*
+ * If the current pc is between iter_ss_bgin and
+ * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
+ * be the current pc + 4.
+ */
+ if ((pc >= (uint64_t)&iter_ss_begin) &&
+ (pc < (uint64_t)&iter_ss_end))
+ test_pc = pc + 4;
+ else
+ test_pc = 0;
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int opt;
+ int ss_iteration = 10000;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(debug_version(vcpu) >= 6,
+ "Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+
+ while ((opt = getopt(argc, argv, "i:")) != -1) {
+ switch (opt) {
+ case 'i':
+ ss_iteration = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ test_guest_debug_exceptions();
+ test_single_step_from_userspace(ss_iteration);
+
return 0;
}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index f7621f6e938e..e0b9e81a3e09 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
- * CPU_ON PSCI call matches what the caller requested.
+ * psci_test - Tests relating to KVM's PSCI implementation.
*
* Copyright (c) 2021 Google LLC.
*
- * This is a regression test for a race between KVM servicing the PSCI call and
- * userspace reading the vCPUs registers.
+ * This test includes:
+ * - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ * and userspace reading the targeted vCPU's registers.
+ * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ * KVM_SYSTEM_EVENT_SUSPEND UAPI.
*/
#define _GNU_SOURCE
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 9c883c94d478..b5234d6efbe1 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -17,6 +17,7 @@
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/atomic.h>
+#include <asm/barrier.h>
#include "kvm_util.h"
#include "test_util.h"
@@ -264,7 +265,8 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
static bool dirty_ring_supported(void)
{
- return kvm_has_cap(KVM_CAP_DIRTY_LOG_RING);
+ return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) ||
+ kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL));
}
static void dirty_ring_create_vm_done(struct kvm_vm *vm)
@@ -279,12 +281,12 @@ static void dirty_ring_create_vm_done(struct kvm_vm *vm)
static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
{
- return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+ return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY;
}
static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
{
- gfn->flags = KVM_DIRTY_GFN_F_RESET;
+ smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET);
}
static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 24fde97f6121..e42a09cd24a0 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -175,6 +175,10 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
int open_kvm_dev_path_or_exit(void);
+
+bool get_kvm_intel_param_bool(const char *param);
+bool get_kvm_amd_param_bool(const char *param);
+
unsigned int kvm_check_cap(long cap);
static inline bool kvm_has_cap(long cap)
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 5c5a88180b6c..befc754ce9b3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -63,8 +63,10 @@ void test_assert(bool exp, const char *exp_str,
#a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
} while (0)
-#define TEST_FAIL(fmt, ...) \
- TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+#define TEST_FAIL(fmt, ...) do { \
+ TEST_ASSERT(false, fmt, ##__VA_ARGS__); \
+ __builtin_unreachable(); \
+} while (0)
size_t parse_size(const char *size);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0cbc71b7af50..e8ca0d8a6a7e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -825,6 +825,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
}
+bool kvm_is_tdp_enabled(void);
+
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
uint64_t vaddr);
void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
@@ -855,6 +857,8 @@ enum pg_level {
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level);
/*
* Basic CPU control in CR0
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 9889fe0d8919..f1cb1627161f 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -50,6 +50,45 @@ int open_kvm_dev_path_or_exit(void)
return _open_kvm_dev_path_or_exit(O_RDONLY);
}
+static bool get_module_param_bool(const char *module_name, const char *param)
+{
+ const int path_size = 128;
+ char path[path_size];
+ char value;
+ ssize_t r;
+ int fd;
+
+ r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
+ module_name, param);
+ TEST_ASSERT(r < path_size,
+ "Failed to construct sysfs path in %d bytes.", path_size);
+
+ fd = open_path_or_exit(path, O_RDONLY);
+
+ r = read(fd, &value, 1);
+ TEST_ASSERT(r == 1, "read(%s) failed", path);
+
+ r = close(fd);
+ TEST_ASSERT(!r, "close(%s) failed", path);
+
+ if (value == 'Y')
+ return true;
+ else if (value == 'N')
+ return false;
+
+ TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
+}
+
+bool get_kvm_intel_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_intel", param);
+}
+
+bool get_kvm_amd_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_amd", param);
+}
+
/*
* Capability
*
@@ -82,7 +121,10 @@ unsigned int kvm_check_cap(long cap)
void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
{
- vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
+ if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
+ else
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
vm->dirty_ring_size = ring_size;
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 2e6e61bbe81b..39c4409ef56a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -111,6 +111,14 @@ static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
}
}
+bool kvm_is_tdp_enabled(void)
+{
+ if (is_intel_cpu())
+ return get_kvm_intel_param_bool("ept");
+ else
+ return get_kvm_amd_param_bool("npt");
+}
+
void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -214,6 +222,25 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level)
+{
+ uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t nr_pages = nr_bytes / pg_size;
+ int i;
+
+ TEST_ASSERT(nr_bytes % pg_size == 0,
+ "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+ nr_bytes, pg_size);
+
+ for (i = 0; i < nr_pages; i++) {
+ __virt_pg_map(vm, vaddr, paddr, level);
+
+ vaddr += pg_size;
+ paddr += pg_size;
+ }
+}
+
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
struct kvm_vcpu *vcpu,
uint64_t vaddr)
@@ -1294,20 +1321,9 @@ done:
/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
bool vm_is_unrestricted_guest(struct kvm_vm *vm)
{
- char val = 'N';
- size_t count;
- FILE *f;
-
/* Ensure that a KVM vendor-specific module is loaded. */
if (vm == NULL)
close(open_kvm_dev_path_or_exit());
- f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
- if (f) {
- count = fread(&val, sizeof(char), 1, f);
- TEST_ASSERT(count == 1, "Unable to read from param file.");
- fclose(f);
- }
-
- return val == 'Y';
+ return get_kvm_intel_param_bool("unrestricted_guest");
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 6d445886e16c..5495a92dfd5a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -60,18 +60,6 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
seg->base = base;
}
-/*
- * Avoid using memset to clear the vmcb, since libc may not be
- * available in L1 (and, even if it is, features that libc memset may
- * want to use, like AVX, may not be enabled).
- */
-static void clear_vmcb(struct vmcb *vmcb)
-{
- int n = sizeof(*vmcb) / sizeof(u32);
-
- asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory");
-}
-
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
{
struct vmcb *vmcb = svm->vmcb;
@@ -88,7 +76,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
wrmsr(MSR_EFER, efer | EFER_SVME);
wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
- clear_vmcb(vmcb);
+ memset(vmcb, 0, sizeof(*vmcb));
asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
index e0004bd26536..32f7e09ef67c 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -17,84 +17,70 @@
/* VMCALL and VMMCALL are both 3-byte opcodes. */
#define HYPERCALL_INSN_SIZE 3
-static bool ud_expected;
+static bool quirk_disabled;
static void guest_ud_handler(struct ex_regs *regs)
{
- GUEST_ASSERT(ud_expected);
- GUEST_DONE();
+ regs->rax = -EFAULT;
+ regs->rip += HYPERCALL_INSN_SIZE;
}
-extern uint8_t svm_hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t svm_do_sched_yield(uint8_t apic_id)
-{
- uint64_t ret;
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
- asm volatile("mov %1, %%rax\n\t"
- "mov %2, %%rbx\n\t"
- "svm_hypercall_insn:\n\t"
- "vmmcall\n\t"
- "mov %%rax, %0\n\t"
- : "=r"(ret)
- : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id)
- : "rax", "rbx", "memory");
-
- return ret;
-}
-
-extern uint8_t vmx_hypercall_insn[HYPERCALL_INSN_SIZE];
-static uint64_t vmx_do_sched_yield(uint8_t apic_id)
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
{
uint64_t ret;
- asm volatile("mov %1, %%rax\n\t"
- "mov %2, %%rbx\n\t"
- "vmx_hypercall_insn:\n\t"
- "vmcall\n\t"
- "mov %%rax, %0\n\t"
- : "=r"(ret)
- : "r"((uint64_t)KVM_HC_SCHED_YIELD), "r"((uint64_t)apic_id)
- : "rax", "rbx", "memory");
+ asm volatile("hypercall_insn:\n\t"
+ ".byte 0xcc,0xcc,0xcc\n\t"
+ : "=a"(ret)
+ : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "memory");
return ret;
}
static void guest_main(void)
{
- uint8_t *native_hypercall_insn, *hypercall_insn;
- uint8_t apic_id;
-
- apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ const uint8_t *native_hypercall_insn;
+ const uint8_t *other_hypercall_insn;
+ uint64_t ret;
if (is_intel_cpu()) {
- native_hypercall_insn = vmx_hypercall_insn;
- hypercall_insn = svm_hypercall_insn;
- svm_do_sched_yield(apic_id);
+ native_hypercall_insn = vmx_vmcall;
+ other_hypercall_insn = svm_vmmcall;
} else if (is_amd_cpu()) {
- native_hypercall_insn = svm_hypercall_insn;
- hypercall_insn = vmx_hypercall_insn;
- vmx_do_sched_yield(apic_id);
+ native_hypercall_insn = svm_vmmcall;
+ other_hypercall_insn = vmx_vmcall;
} else {
GUEST_ASSERT(0);
/* unreachable */
return;
}
+ memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+ ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
/*
- * The hypercall didn't #UD (guest_ud_handler() signals "done" if a #UD
- * occurs). Verify that a #UD is NOT expected and that KVM patched in
- * the native hypercall.
+ * If the quirk is disabled, verify that guest_ud_handler() "returned"
+ * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
+ * enabled, verify that the hypercall succeeded and that KVM patched in
+ * the "right" hypercall.
*/
- GUEST_ASSERT(!ud_expected);
- GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn, HYPERCALL_INSN_SIZE));
- GUEST_DONE();
-}
+ if (quirk_disabled) {
+ GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ } else {
+ GUEST_ASSERT(!ret);
+ GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ }
-static void setup_ud_vector(struct kvm_vcpu *vcpu)
-{
- vm_init_descriptor_tables(vcpu->vm);
- vcpu_init_descriptor_tables(vcpu);
- vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+ GUEST_DONE();
}
static void enter_guest(struct kvm_vcpu *vcpu)
@@ -117,35 +103,23 @@ static void enter_guest(struct kvm_vcpu *vcpu)
}
}
-static void test_fix_hypercall(void)
+static void test_fix_hypercall(bool disable_quirk)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- setup_ud_vector(vcpu);
-
- ud_expected = false;
- sync_global_to_guest(vm, ud_expected);
-
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
-
- enter_guest(vcpu);
-}
-static void test_fix_hypercall_disabled(void)
-{
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
-
- vm = vm_create_with_one_vcpu(&vcpu, guest_main);
- setup_ud_vector(vcpu);
+ vm_init_descriptor_tables(vcpu->vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
- KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+ if (disable_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
- ud_expected = true;
- sync_global_to_guest(vm, ud_expected);
+ quirk_disabled = disable_quirk;
+ sync_global_to_guest(vm, quirk_disabled);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
@@ -156,6 +130,6 @@ int main(void)
{
TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
- test_fix_hypercall();
- test_fix_hypercall_disabled();
+ test_fix_hypercall(false);
+ test_fix_hypercall(true);
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 79ab0152d281..05b32e550a80 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -26,7 +26,8 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
: "=a" (*hv_status),
"+c" (control), "+d" (input_address),
KVM_ASM_SAFE_OUTPUTS(vector)
- : [output_address] "r"(output_address)
+ : [output_address] "r"(output_address),
+ "a" (-EFAULT)
: "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
return vector;
}
@@ -81,13 +82,13 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
}
vector = hypercall(hcall->control, input, output, &res);
- if (hcall->ud_expected)
+ if (hcall->ud_expected) {
GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
- else
+ } else {
GUEST_ASSERT_2(!vector, hcall->control, vector);
+ GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+ }
- GUEST_ASSERT_2(!hcall->ud_expected || res == hcall->expect,
- hcall->expect, res);
GUEST_DONE();
}
@@ -507,7 +508,7 @@ static void guest_test_hcalls_access(void)
switch (stage) {
case 0:
feat->eax |= HV_MSR_HYPERCALL_AVAILABLE;
- hcall->control = 0xdeadbeef;
+ hcall->control = 0xbeef;
hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index e19933ea34ca..59ffe7fd354f 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -112,6 +112,7 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
+ uint64_t nr_bytes;
void *hva;
int r;
@@ -134,10 +135,24 @@ void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
HPAGE_GPA, HPAGE_SLOT,
HPAGE_SLOT_NPAGES, 0);
- virt_map(vm, HPAGE_GVA, HPAGE_GPA, HPAGE_SLOT_NPAGES);
+ nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+ /*
+ * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+ * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+ * whenever KVM is shadowing the guest page tables).
+ *
+ * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+ * pages irrespective of the guest page size, so map with 4KiB pages
+ * to test that that is the case.
+ */
+ if (kvm_is_tdp_enabled())
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+ else
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
hva = addr_gpa2hva(vm, HPAGE_GPA);
- memset(hva, RETURN_OPCODE, HPAGE_SLOT_NPAGES * PAGE_SIZE);
+ memset(hva, RETURN_OPCODE, nr_bytes);
check_2m_page_count(vm, 0);
check_split_count(vm, 0);
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index 46a97f318f58..74ee5067a8ce 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -134,6 +134,16 @@ offline_memory_expect_fail()
return 0
}
+online_all_offline_memory()
+{
+ for memory in `hotpluggable_offline_memory`; do
+ if ! online_memory_expect_success $memory; then
+ echo "$FUNCNAME $memory: unexpected fail" >&2
+ retval=1
+ fi
+ done
+}
+
error=-12
priority=0
# Run with default of ratio=2 for Kselftest run
@@ -197,8 +207,11 @@ echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):"
for memory in `hotpluggable_online_memory`; do
if [ "$target" -gt 0 ]; then
echo "online->offline memory$memory"
- if offline_memory_expect_success $memory; then
+ if offline_memory_expect_success $memory &>/dev/null; then
target=$(($target - 1))
+ echo "-> Success"
+ else
+ echo "-> Failure"
fi
fi
done
@@ -257,7 +270,7 @@ prerequisite_extra
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
if [ $((RANDOM % 100)) -lt $ratio ]; then
- offline_memory_expect_success $memory
+ offline_memory_expect_success $memory &>/dev/null
fi
done
@@ -266,16 +279,16 @@ done
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
for memory in `hotpluggable_offline_memory`; do
- online_memory_expect_fail $memory
+ if ! online_memory_expect_fail $memory; then
+ retval=1
+ fi
done
#
# Online all hot-pluggable memory
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
-for memory in `hotpluggable_offline_memory`; do
- online_memory_expect_success $memory
-done
+online_all_offline_memory
#
# Test memory hot-remove error handling (online => offline)
@@ -283,11 +296,18 @@ done
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
if [ $((RANDOM % 100)) -lt $ratio ]; then
- offline_memory_expect_fail $memory
+ if ! offline_memory_expect_fail $memory; then
+ retval=1
+ fi
fi
done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
/sbin/modprobe -q -r memory-notifier-error-inject
+#
+# Restore memory before exit
+#
+online_all_offline_memory
+
exit $retval
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d5a0dd548989..ee5e98204d3d 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1223,6 +1223,11 @@ ipv4_fcnal()
log_test $rc 0 "Delete nexthop route warning"
run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
run_cmd "$IP nexthop del id 12"
+
+ run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1"
+ run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
+ run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
+ log_test $? 2 "Delete multipath route with only nh id based entry"
}
ipv4_grp_fcnal()
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 600e3a19d5e2..4504ee07be08 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -6,7 +6,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
- conntrack_vrf.sh nft_synproxy.sh
+ conntrack_vrf.sh nft_synproxy.sh rpath.sh
CFLAGS += $(shell pkg-config --cflags libmnl 2>/dev/null || echo "-I/usr/include/libmnl")
LDLIBS = -lmnl
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index fd76b69635a4..dff476e45e77 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -188,6 +188,7 @@ test_ping() {
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
sleep 3
diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh
new file mode 100755
index 000000000000..2d8da7bd8ab7
--- /dev/null
+++ b/tools/testing/selftests/netfilter/rpath.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+
+# search for legacy iptables (it uses the xtables extensions
+if iptables-legacy --version >/dev/null 2>&1; then
+ iptables='iptables-legacy'
+elif iptables --version >/dev/null 2>&1; then
+ iptables='iptables'
+else
+ iptables=''
+fi
+
+if ip6tables-legacy --version >/dev/null 2>&1; then
+ ip6tables='ip6tables-legacy'
+elif ! ip6tables --version >/dev/null 2>&1; then
+ ip6tables='ip6tables'
+else
+ ip6tables=''
+fi
+
+if nft --version >/dev/null 2>&1; then
+ nft='nft'
+else
+ nft=''
+fi
+
+if [ -z "$iptables$ip6tables$nft" ]; then
+ echo "SKIP: Test needs iptables, ip6tables or nft"
+ exit $ksft_skip
+fi
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+trap "ip netns del $ns1; ip netns del $ns2" EXIT
+
+# create two netns, disable rp_filter in ns2 and
+# keep IPv6 address when moving into VRF
+ip netns add "$ns1"
+ip netns add "$ns2"
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
+
+# a standard connection between the netns, should not trigger rp filter
+ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2"
+ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up
+ip -net "$ns1" a a 192.168.23.2/24 dev v0
+ip -net "$ns2" a a 192.168.23.1/24 dev v0
+ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad
+
+# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0
+ip -net "$ns2" link add d0 type dummy
+ip -net "$ns2" link set d0 up
+ip -net "$ns1" a a 192.168.42.2/24 dev v0
+ip -net "$ns2" a a 192.168.42.1/24 dev d0
+ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+
+# firewall matches to test
+ip netns exec "$ns2" "$iptables" -t raw -A PREROUTING -s 192.168.0.0/16 -m rpfilter
+ip netns exec "$ns2" "$ip6tables" -t raw -A PREROUTING -s fec0::/16 -m rpfilter
+ip netns exec "$ns2" nft -f - <<EOF
+table inet t {
+ chain c {
+ type filter hook prerouting priority raw;
+ ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
+ ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+ }
+}
+EOF
+
+die() {
+ echo "FAIL: $*"
+ #ip netns exec "$ns2" "$iptables" -t raw -vS
+ #ip netns exec "$ns2" "$ip6tables" -t raw -vS
+ #ip netns exec "$ns2" nft list ruleset
+ exit 1
+}
+
+# check rule counters, return true if rule did not match
+ipt_zero_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
+}
+nft_zero_rule() { # (family)
+ [ -n "$nft" ] || return 0
+ ip netns exec "$ns2" "$nft" list chain inet t c | \
+ grep -q "$1 saddr .* counter packets 0 bytes 0"
+}
+
+netns_ping() { # (netns, args...)
+ local netns="$1"
+ shift
+ ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
+}
+
+testrun() {
+ # clear counters first
+ [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
+ [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
+ if [ -n "$nft" ]; then
+ (
+ echo "delete table inet t";
+ ip netns exec "$ns2" nft -s list table inet t;
+ ) | ip netns exec "$ns2" nft -f -
+ fi
+
+ # test 1: martian traffic should fail rpfilter matches
+ netns_ping "$ns1" -I v0 192.168.42.1 && \
+ die "martian ping 192.168.42.1 succeeded"
+ netns_ping "$ns1" -I v0 fec0:42::1 && \
+ die "martian ping fec0:42::1 succeeded"
+
+ ipt_zero_rule "$iptables" || die "iptables matched martian"
+ ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
+ nft_zero_rule ip || die "nft IPv4 matched martian"
+ nft_zero_rule ip6 || die "nft IPv6 matched martian"
+
+ # test 2: rpfilter match should pass for regular traffic
+ netns_ping "$ns1" 192.168.23.1 || \
+ die "regular ping 192.168.23.1 failed"
+ netns_ping "$ns1" fec0:23::1 || \
+ die "regular ping fec0:23::1 failed"
+
+ ipt_zero_rule "$iptables" && die "iptables match not effective"
+ ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
+ nft_zero_rule ip && die "nft IPv4 match not effective"
+ nft_zero_rule ip6 && die "nft IPv6 match not effective"
+
+}
+
+testrun
+
+# repeat test with vrf device in $ns2
+ip -net "$ns2" link add vrf0 type vrf table 10
+ip -net "$ns2" link set vrf0 up
+ip -net "$ns2" link set v0 master vrf0
+
+testrun
+
+echo "PASS: netfilter reverse path match works as intended"
+exit 0
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index c4e6a34f9657..a156ac5dd2c6 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -5,6 +5,7 @@
/proc-fsconfig-hidepid
/proc-loadavg-001
/proc-multiple-procfs
+/proc-empty-vm
/proc-pid-vm
/proc-self-map-files-001
/proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index 219fc6113847..cd95369254c0 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -8,6 +8,7 @@ TEST_GEN_PROGS += fd-001-lookup
TEST_GEN_PROGS += fd-002-posix-eq
TEST_GEN_PROGS += fd-003-kthread
TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-empty-vm
TEST_GEN_PROGS += proc-pid-vm
TEST_GEN_PROGS += proc-self-map-files-001
TEST_GEN_PROGS += proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
new file mode 100644
index 000000000000..d95b1cb43d9d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Create a process without mappings by unmapping everything at once and
+ * holding it with ptrace(2). See what happens to
+ *
+ * /proc/${pid}/maps
+ * /proc/${pid}/numa_maps
+ * /proc/${pid}/smaps
+ * /proc/${pid}/smaps_rollup
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/*
+ * 0: vsyscall VMA doesn't exist vsyscall=none
+ * 1: vsyscall VMA is --xp vsyscall=xonly
+ * 2: vsyscall VMA is r-xp vsyscall=emulate
+ */
+static int g_vsyscall;
+static const char *g_proc_pid_maps_vsyscall;
+static const char *g_proc_pid_smaps_vsyscall;
+
+static const char proc_pid_maps_vsyscall_0[] = "";
+static const char proc_pid_maps_vsyscall_1[] =
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
+static const char proc_pid_maps_vsyscall_2[] =
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
+
+static const char proc_pid_smaps_vsyscall_0[] = "";
+
+static const char proc_pid_smaps_vsyscall_1[] =
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"
+"Size: 4 kB\n"
+"KernelPageSize: 4 kB\n"
+"MMUPageSize: 4 kB\n"
+"Rss: 0 kB\n"
+"Pss: 0 kB\n"
+"Pss_Dirty: 0 kB\n"
+"Shared_Clean: 0 kB\n"
+"Shared_Dirty: 0 kB\n"
+"Private_Clean: 0 kB\n"
+"Private_Dirty: 0 kB\n"
+"Referenced: 0 kB\n"
+"Anonymous: 0 kB\n"
+"LazyFree: 0 kB\n"
+"AnonHugePages: 0 kB\n"
+"ShmemPmdMapped: 0 kB\n"
+"FilePmdMapped: 0 kB\n"
+"Shared_Hugetlb: 0 kB\n"
+"Private_Hugetlb: 0 kB\n"
+"Swap: 0 kB\n"
+"SwapPss: 0 kB\n"
+"Locked: 0 kB\n"
+"THPeligible: 0\n"
+/*
+ * "ProtectionKey:" field is conditional. It is possible to check it as well,
+ * but I don't have such machine.
+ */
+;
+
+static const char proc_pid_smaps_vsyscall_2[] =
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"
+"Size: 4 kB\n"
+"KernelPageSize: 4 kB\n"
+"MMUPageSize: 4 kB\n"
+"Rss: 0 kB\n"
+"Pss: 0 kB\n"
+"Pss_Dirty: 0 kB\n"
+"Shared_Clean: 0 kB\n"
+"Shared_Dirty: 0 kB\n"
+"Private_Clean: 0 kB\n"
+"Private_Dirty: 0 kB\n"
+"Referenced: 0 kB\n"
+"Anonymous: 0 kB\n"
+"LazyFree: 0 kB\n"
+"AnonHugePages: 0 kB\n"
+"ShmemPmdMapped: 0 kB\n"
+"FilePmdMapped: 0 kB\n"
+"Shared_Hugetlb: 0 kB\n"
+"Private_Hugetlb: 0 kB\n"
+"Swap: 0 kB\n"
+"SwapPss: 0 kB\n"
+"Locked: 0 kB\n"
+"THPeligible: 0\n"
+/*
+ * "ProtectionKey:" field is conditional. It is possible to check it as well,
+ * but I'm too tired.
+ */
+;
+
+static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
+{
+ _exit(EXIT_FAILURE);
+}
+
+static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
+{
+ _exit(g_vsyscall);
+}
+
+/*
+ * vsyscall page can't be unmapped, probe it directly.
+ */
+static void vsyscall(void)
+{
+ pid_t pid;
+ int wstatus;
+
+ pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "fork, errno %d\n", errno);
+ exit(1);
+ }
+ if (pid == 0) {
+ setrlimit(RLIMIT_CORE, &(struct rlimit){});
+
+ /* Hide "segfault at ffffffffff600000" messages. */
+ struct sigaction act = {};
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
+ sigaction(SIGSEGV, &act, NULL);
+
+ g_vsyscall = 0;
+ /* gettimeofday(NULL, NULL); */
+ asm volatile (
+ "call %P0"
+ :
+ : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
+ : "rax", "rcx", "r11"
+ );
+
+ g_vsyscall = 1;
+ *(volatile int *)0xffffffffff600000UL;
+
+ g_vsyscall = 2;
+ exit(g_vsyscall);
+ }
+ waitpid(pid, &wstatus, 0);
+ if (WIFEXITED(wstatus)) {
+ g_vsyscall = WEXITSTATUS(wstatus);
+ } else {
+ fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
+ exit(1);
+ }
+}
+
+static int test_proc_pid_maps(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ perror("open /proc/${pid}/maps");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (g_vsyscall == 0) {
+ assert(rv == 0);
+ } else {
+ size_t len = strlen(g_proc_pid_maps_vsyscall);
+ assert(rv == len);
+ assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
+ }
+ return EXIT_SUCCESS;
+ }
+}
+
+static int test_proc_pid_numa_maps(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /*
+ * /proc/${pid}/numa_maps is under CONFIG_NUMA,
+ * it doesn't necessarily exist.
+ */
+ return EXIT_SUCCESS;
+ }
+ perror("open /proc/${pid}/numa_maps");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ assert(rv == 0);
+ return EXIT_SUCCESS;
+ }
+}
+
+static int test_proc_pid_smaps(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /*
+ * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
+ * it doesn't necessarily exist.
+ */
+ return EXIT_SUCCESS;
+ }
+ perror("open /proc/${pid}/smaps");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (g_vsyscall == 0) {
+ assert(rv == 0);
+ } else {
+ size_t len = strlen(g_proc_pid_maps_vsyscall);
+ /* TODO "ProtectionKey:" */
+ assert(rv > len);
+ assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
+ }
+ return EXIT_SUCCESS;
+ }
+}
+
+static const char g_smaps_rollup[] =
+"00000000-00000000 ---p 00000000 00:00 0 [rollup]\n"
+"Rss: 0 kB\n"
+"Pss: 0 kB\n"
+"Pss_Dirty: 0 kB\n"
+"Pss_Anon: 0 kB\n"
+"Pss_File: 0 kB\n"
+"Pss_Shmem: 0 kB\n"
+"Shared_Clean: 0 kB\n"
+"Shared_Dirty: 0 kB\n"
+"Private_Clean: 0 kB\n"
+"Private_Dirty: 0 kB\n"
+"Referenced: 0 kB\n"
+"Anonymous: 0 kB\n"
+"LazyFree: 0 kB\n"
+"AnonHugePages: 0 kB\n"
+"ShmemPmdMapped: 0 kB\n"
+"FilePmdMapped: 0 kB\n"
+"Shared_Hugetlb: 0 kB\n"
+"Private_Hugetlb: 0 kB\n"
+"Swap: 0 kB\n"
+"SwapPss: 0 kB\n"
+"Locked: 0 kB\n"
+;
+
+static int test_proc_pid_smaps_rollup(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /*
+ * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
+ * it doesn't necessarily exist.
+ */
+ return EXIT_SUCCESS;
+ }
+ perror("open /proc/${pid}/smaps_rollup");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ assert(rv == sizeof(g_smaps_rollup) - 1);
+ assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
+ return EXIT_SUCCESS;
+ }
+}
+
+int main(void)
+{
+ int rv = EXIT_SUCCESS;
+
+ vsyscall();
+
+ switch (g_vsyscall) {
+ case 0:
+ g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0;
+ g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
+ break;
+ case 1:
+ g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1;
+ g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
+ break;
+ case 2:
+ g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2;
+ g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
+ break;
+ default:
+ abort();
+ }
+
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ return EXIT_FAILURE;
+ } else if (pid == 0) {
+ rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+ if (rv != 0) {
+ if (errno == EPERM) {
+ fprintf(stderr,
+"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
+ );
+ kill(getppid(), SIGTERM);
+ return EXIT_FAILURE;
+ }
+ perror("ptrace PTRACE_TRACEME");
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * Hide "segfault at ..." messages. Signal handler won't run.
+ */
+ struct sigaction act = {};
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = sigaction_SIGSEGV;
+ sigaction(SIGSEGV, &act, NULL);
+
+#ifdef __amd64__
+ munmap(NULL, ((size_t)1 << 47) - 4096);
+#else
+#error "implement 'unmap everything'"
+#endif
+ return EXIT_FAILURE;
+ } else {
+ /*
+ * TODO find reliable way to signal parent that munmap(2) completed.
+ * Child can't do it directly because it effectively doesn't exist
+ * anymore. Looking at child's VM files isn't 100% reliable either:
+ * due to a bug they may not become empty or empty-like.
+ */
+ sleep(1);
+
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_maps(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_numa_maps(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_smaps(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_smaps_rollup(pid);
+ }
+ /*
+ * TODO test /proc/${pid}/statm, task_statm()
+ * ->start_code, ->end_code aren't updated by munmap().
+ * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
+ */
+
+ /* Cut the rope. */
+ int wstatus;
+ waitpid(pid, &wstatus, 0);
+ assert(WIFSTOPPED(wstatus));
+ assert(WSTOPSIG(wstatus) == SIGSEGV);
+ }
+
+ return rv;
+}
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index e5962f4794f5..69551bfa215c 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -213,22 +213,22 @@ static int make_exe(const uint8_t *payload, size_t len)
/*
* 0: vsyscall VMA doesn't exist vsyscall=none
- * 1: vsyscall VMA is r-xp vsyscall=emulate
- * 2: vsyscall VMA is --xp vsyscall=xonly
+ * 1: vsyscall VMA is --xp vsyscall=xonly
+ * 2: vsyscall VMA is r-xp vsyscall=emulate
*/
-static int g_vsyscall;
+static volatile int g_vsyscall;
static const char *str_vsyscall;
static const char str_vsyscall_0[] = "";
static const char str_vsyscall_1[] =
-"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
-static const char str_vsyscall_2[] =
"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
+static const char str_vsyscall_2[] =
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
#ifdef __x86_64__
static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
{
- _exit(1);
+ _exit(g_vsyscall);
}
/*
@@ -255,6 +255,7 @@ static void vsyscall(void)
act.sa_sigaction = sigaction_SIGSEGV;
(void)sigaction(SIGSEGV, &act, NULL);
+ g_vsyscall = 0;
/* gettimeofday(NULL, NULL); */
asm volatile (
"call %P0"
@@ -262,45 +263,20 @@ static void vsyscall(void)
: "i" (0xffffffffff600000), "D" (NULL), "S" (NULL)
: "rax", "rcx", "r11"
);
- exit(0);
- }
- waitpid(pid, &wstatus, 0);
- if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
- /* vsyscall page exists and is executable. */
- } else {
- /* vsyscall page doesn't exist. */
- g_vsyscall = 0;
- return;
- }
-
- pid = fork();
- if (pid < 0) {
- fprintf(stderr, "fork, errno %d\n", errno);
- exit(1);
- }
- if (pid == 0) {
- struct rlimit rlim = {0, 0};
- (void)setrlimit(RLIMIT_CORE, &rlim);
-
- /* Hide "segfault at ffffffffff600000" messages. */
- struct sigaction act;
- memset(&act, 0, sizeof(struct sigaction));
- act.sa_flags = SA_SIGINFO;
- act.sa_sigaction = sigaction_SIGSEGV;
- (void)sigaction(SIGSEGV, &act, NULL);
+ g_vsyscall = 1;
*(volatile int *)0xffffffffff600000UL;
- exit(0);
+
+ g_vsyscall = 2;
+ exit(g_vsyscall);
}
waitpid(pid, &wstatus, 0);
- if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
- /* vsyscall page is readable and executable. */
- g_vsyscall = 1;
- return;
+ if (WIFEXITED(wstatus)) {
+ g_vsyscall = WEXITSTATUS(wstatus);
+ } else {
+ fprintf(stderr, "error: wstatus %08x\n", wstatus);
+ exit(1);
}
-
- /* vsyscall page is executable but unreadable. */
- g_vsyscall = 2;
}
int main(void)
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 7d722265dcd7..4adaad1b822f 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1054,6 +1054,55 @@ TEST_F(hmm, migrate_fault)
hmm_buffer_free(buffer);
}
+TEST_F(hmm, migrate_release)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Release device memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
/*
* Migrate anonymous shared memory to device private memory.
*/
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 74babdbc02e5..297f250c1d95 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -774,7 +774,27 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
continue_range(uffd, msg->arg.pagefault.address, page_size);
stats->minor_faults++;
} else {
- /* Missing page faults */
+ /*
+ * Missing page faults.
+ *
+ * Here we force a write check for each of the missing mode
+ * faults. It's guaranteed because the only threads that
+ * will trigger uffd faults are the locking threads, and
+ * their first instruction to touch the missing page will
+ * always be pthread_mutex_lock().
+ *
+ * Note that here we relied on an NPTL glibc impl detail to
+ * always read the lock type at the entry of the lock op
+ * (pthread_mutex_t.__data.__type, offset 0x10) before
+ * doing any locking operations to guarantee that. It's
+ * actually not good to rely on this impl detail because
+ * logically a pthread-compatible lib can implement the
+ * locks without types and we can fail when linking with
+ * them. However since we used to find bugs with this
+ * strict check we still keep it around. Hopefully this
+ * could be a good hint when it fails again. If one day
+ * it'll break on some other impl of glibc we'll revisit.
+ */
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
err("unexpected write fault");