aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/kvm_asm.h2
-rw-r--r--arch/arm64/include/asm/kvm_host.h5
-rw-r--r--arch/arm64/include/asm/kvm_nested.h25
-rw-r--r--arch/arm64/include/asm/kvm_pkvm.h1
-rw-r--r--arch/arm64/include/asm/traps.h1
-rw-r--r--arch/arm64/kernel/traps.c15
-rw-r--r--arch/arm64/kvm/arm.c14
-rw-r--r--arch/arm64/kvm/at.c376
-rw-r--r--arch/arm64/kvm/handle_exit.c3
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h4
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/trap_handler.h3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c217
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c14
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c177
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c12
-rw-r--r--arch/arm64/kvm/inject_fault.c27
-rw-r--r--arch/arm64/kvm/nested.c2
-rw-r--r--arch/arm64/kvm/pkvm.c76
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c14
-rw-r--r--arch/x86/kvm/vmx/vmx.c7
-rw-r--r--include/kvm/arm_vgic.h1
-rw-r--r--include/linux/arm_ffa.h1
-rw-r--r--include/linux/kvm_host.h11
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c42
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c13
-rw-r--r--virt/kvm/kvm_main.c43
28 files changed, 817 insertions, 291 deletions
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index bec227f9500a..9da54d4ee49e 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -81,6 +81,8 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs,
__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs,
+ __KVM_HOST_SMCCC_FUNC___pkvm_reserve_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_unreserve_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2b07f0a27a7d..8ede884f091d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -252,7 +252,8 @@ struct kvm_protected_vm {
pkvm_handle_t handle;
struct kvm_hyp_memcache teardown_mc;
struct kvm_hyp_memcache stage2_teardown_mc;
- bool enabled;
+ bool is_protected;
+ bool is_created;
};
struct kvm_mpidr_data {
@@ -1441,7 +1442,7 @@ struct kvm *kvm_arch_alloc_vm(void);
#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
-#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.enabled)
+#define kvm_vm_is_protected(kvm) (is_protected_kvm_enabled() && (kvm)->arch.pkvm.is_protected)
#define vcpu_is_protected(vcpu) kvm_vm_is_protected((vcpu)->kvm)
diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index 7fd76f41c296..2be6c3de74e3 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -265,7 +265,7 @@ static inline u64 decode_range_tlbi(u64 val, u64 *range, u16 *asid)
return base;
}
-static inline unsigned int ps_to_output_size(unsigned int ps)
+static inline unsigned int ps_to_output_size(unsigned int ps, bool pa52bit)
{
switch (ps) {
case 0: return 32;
@@ -273,7 +273,10 @@ static inline unsigned int ps_to_output_size(unsigned int ps)
case 2: return 40;
case 3: return 42;
case 4: return 44;
- case 5:
+ case 5: return 48;
+ case 6: if (pa52bit)
+ return 52;
+ fallthrough;
default:
return 48;
}
@@ -285,13 +288,28 @@ enum trans_regime {
TR_EL2,
};
+struct s1_walk_info;
+
+struct s1_walk_context {
+ struct s1_walk_info *wi;
+ u64 table_ipa;
+ int level;
+};
+
+struct s1_walk_filter {
+ int (*fn)(struct s1_walk_context *, void *);
+ void *priv;
+};
+
struct s1_walk_info {
+ struct s1_walk_filter *filter;
u64 baddr;
enum trans_regime regime;
unsigned int max_oa_bits;
unsigned int pgshift;
unsigned int txsz;
int sl;
+ u8 sh;
bool as_el0;
bool hpd;
bool e0poe;
@@ -299,6 +317,7 @@ struct s1_walk_info {
bool pan;
bool be;
bool s2;
+ bool pa52bit;
};
struct s1_walk_result {
@@ -334,6 +353,8 @@ struct s1_walk_result {
int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
struct s1_walk_result *wr, u64 va);
+int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa,
+ int *level);
/* VNCR management */
int kvm_vcpu_allocate_vncr_tlb(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 35f9d9478004..0aecd4ac5f45 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -18,6 +18,7 @@
int pkvm_init_host_vm(struct kvm *kvm);
int pkvm_create_hyp_vm(struct kvm *kvm);
+bool pkvm_hyp_vm_is_created(struct kvm *kvm);
void pkvm_destroy_hyp_vm(struct kvm *kvm);
int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index e3e8944a71c3..e92e4a0e48fc 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -36,6 +36,7 @@ int kasan_brk_handler(struct pt_regs *regs, unsigned long esr);
int ubsan_brk_handler(struct pt_regs *regs, unsigned long esr);
int early_brk64(unsigned long addr, unsigned long esr, struct pt_regs *regs);
+void dump_kernel_instr(unsigned long kaddr);
/*
* Move regs->pc to next instruction and do necessary setup before it
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index f528b6041f6a..83e6d1409e1f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -149,19 +149,18 @@ pstate_check_t * const aarch32_opcode_cond_checks[16] = {
int show_unhandled_signals = 0;
-static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
+void dump_kernel_instr(unsigned long kaddr)
{
- unsigned long addr = instruction_pointer(regs);
char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
int i;
- if (user_mode(regs))
+ if (!is_ttbr1_addr(kaddr))
return;
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = aarch64_insn_read(&((u32 *)addr)[i], &val);
+ bad = aarch64_insn_read(&((u32 *)kaddr)[i], &val);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
@@ -169,7 +168,7 @@ static void dump_kernel_instr(const char *lvl, struct pt_regs *regs)
p += sprintf(p, i == 0 ? "(????????) " : "???????? ");
}
- printk("%sCode: %s\n", lvl, str);
+ printk(KERN_EMERG "Code: %s\n", str);
}
#define S_SMP " SMP"
@@ -178,6 +177,7 @@ static int __die(const char *str, long err, struct pt_regs *regs)
{
static int die_counter;
int ret;
+ unsigned long addr = instruction_pointer(regs);
pr_emerg("Internal error: %s: %016lx [#%d] " S_SMP "\n",
str, err, ++die_counter);
@@ -190,7 +190,10 @@ static int __die(const char *str, long err, struct pt_regs *regs)
print_modules();
show_regs(regs);
- dump_kernel_instr(KERN_EMERG, regs);
+ if (user_mode(regs))
+ return ret;
+
+ dump_kernel_instr(addr);
return ret;
}
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 49485d08ece2..a59b4046617c 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -170,10 +170,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
return ret;
- ret = pkvm_init_host_vm(kvm);
- if (ret)
- goto err_unshare_kvm;
-
if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL_ACCOUNT)) {
ret = -ENOMEM;
goto err_unshare_kvm;
@@ -184,6 +180,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto err_free_cpumask;
+ if (is_protected_kvm_enabled()) {
+ /*
+ * If any failures occur after this is successful, make sure to
+ * call __pkvm_unreserve_vm to unreserve the VM in hyp.
+ */
+ ret = pkvm_init_host_vm(kvm);
+ if (ret)
+ goto err_free_cpumask;
+ }
+
kvm_vgic_early_init(kvm);
kvm_timer_init_vm(kvm);
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index d71ca4ddc9d1..20bb9af125b1 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -28,9 +28,57 @@ static int get_ia_size(struct s1_walk_info *wi)
/* Return true if the IPA is out of the OA range */
static bool check_output_size(u64 ipa, struct s1_walk_info *wi)
{
+ if (wi->pa52bit)
+ return wi->max_oa_bits < 52 && (ipa & GENMASK_ULL(51, wi->max_oa_bits));
return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits));
}
+static bool has_52bit_pa(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, u64 tcr)
+{
+ switch (BIT(wi->pgshift)) {
+ case SZ_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR0_EL1, PARANGE, 52))
+ return false;
+ return ((wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_PS_MASK, tcr) :
+ FIELD_GET(TCR_IPS_MASK, tcr)) == 0b0110);
+ case SZ_16K:
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT))
+ return false;
+ break;
+ case SZ_4K:
+ if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT))
+ return false;
+ break;
+ }
+
+ return (tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS));
+}
+
+static u64 desc_to_oa(struct s1_walk_info *wi, u64 desc)
+{
+ u64 addr;
+
+ if (!wi->pa52bit)
+ return desc & GENMASK_ULL(47, wi->pgshift);
+
+ switch (BIT(wi->pgshift)) {
+ case SZ_4K:
+ case SZ_16K:
+ addr = desc & GENMASK_ULL(49, wi->pgshift);
+ addr |= FIELD_GET(KVM_PTE_ADDR_51_50_LPA2, desc) << 50;
+ break;
+ case SZ_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ addr = desc & GENMASK_ULL(47, wi->pgshift);
+ addr |= FIELD_GET(KVM_PTE_ADDR_51_48, desc) << 48;
+ break;
+ }
+
+ return addr;
+}
+
/* Return the translation regime that applies to an AT instruction */
static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op)
{
@@ -50,21 +98,26 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o
}
}
+static u64 effective_tcr2(struct kvm_vcpu *vcpu, enum trans_regime regime)
+{
+ if (regime == TR_EL10) {
+ if (vcpu_has_nv(vcpu) &&
+ !(__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En))
+ return 0;
+
+ return vcpu_read_sys_reg(vcpu, TCR2_EL1);
+ }
+
+ return vcpu_read_sys_reg(vcpu, TCR2_EL2);
+}
+
static bool s1pie_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime)
{
if (!kvm_has_s1pie(vcpu->kvm))
return false;
- switch (regime) {
- case TR_EL2:
- case TR_EL20:
- return vcpu_read_sys_reg(vcpu, TCR2_EL2) & TCR2_EL2_PIE;
- case TR_EL10:
- return (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) &&
- (__vcpu_sys_reg(vcpu, TCR2_EL1) & TCR2_EL1_PIE);
- default:
- BUG();
- }
+ /* Abuse TCR2_EL1_PIE and use it for EL2 as well */
+ return effective_tcr2(vcpu, regime) & TCR2_EL1_PIE;
}
static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
@@ -76,23 +129,11 @@ static void compute_s1poe(struct kvm_vcpu *vcpu, struct s1_walk_info *wi)
return;
}
- switch (wi->regime) {
- case TR_EL2:
- case TR_EL20:
- val = vcpu_read_sys_reg(vcpu, TCR2_EL2);
- wi->poe = val & TCR2_EL2_POE;
- wi->e0poe = (wi->regime == TR_EL20) && (val & TCR2_EL2_E0POE);
- break;
- case TR_EL10:
- if (__vcpu_sys_reg(vcpu, HCRX_EL2) & HCRX_EL2_TCR2En) {
- wi->poe = wi->e0poe = false;
- return;
- }
+ val = effective_tcr2(vcpu, wi->regime);
- val = __vcpu_sys_reg(vcpu, TCR2_EL1);
- wi->poe = val & TCR2_EL1_POE;
- wi->e0poe = val & TCR2_EL1_E0POE;
- }
+ /* Abuse TCR2_EL1_* for EL2 */
+ wi->poe = val & TCR2_EL1_POE;
+ wi->e0poe = (wi->regime != TR_EL2) && (val & TCR2_EL1_E0POE);
}
static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
@@ -102,14 +143,16 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
unsigned int stride, x;
bool va55, tbi, lva;
- hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
-
va55 = va & BIT(55);
- if (wi->regime == TR_EL2 && va55)
- goto addrsz;
-
- wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
+ if (vcpu_has_nv(vcpu)) {
+ hcr = __vcpu_sys_reg(vcpu, HCR_EL2);
+ wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC));
+ } else {
+ WARN_ON_ONCE(wi->regime != TR_EL10);
+ wi->s2 = false;
+ hcr = 0;
+ }
switch (wi->regime) {
case TR_EL10:
@@ -131,6 +174,46 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
BUG();
}
+ /* Someone was silly enough to encode TG0/TG1 differently */
+ if (va55 && wi->regime != TR_EL2) {
+ wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+ tg = FIELD_GET(TCR_TG1_MASK, tcr);
+
+ switch (tg << TCR_TG1_SHIFT) {
+ case TCR_TG1_4K:
+ wi->pgshift = 12; break;
+ case TCR_TG1_16K:
+ wi->pgshift = 14; break;
+ case TCR_TG1_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+ } else {
+ wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+ tg = FIELD_GET(TCR_TG0_MASK, tcr);
+
+ switch (tg << TCR_TG0_SHIFT) {
+ case TCR_TG0_4K:
+ wi->pgshift = 12; break;
+ case TCR_TG0_16K:
+ wi->pgshift = 14; break;
+ case TCR_TG0_64K:
+ default: /* IMPDEF: treat any other value as 64k */
+ wi->pgshift = 16; break;
+ }
+ }
+
+ wi->pa52bit = has_52bit_pa(vcpu, wi, tcr);
+
+ ia_bits = get_ia_size(wi);
+
+ /* AArch64.S1StartLevel() */
+ stride = wi->pgshift - 3;
+ wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
+
+ if (wi->regime == TR_EL2 && va55)
+ goto addrsz;
+
tbi = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_TBI, tcr) :
(va55 ?
@@ -140,6 +223,12 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
if (!tbi && (u64)sign_extend64(va, 55) != va)
goto addrsz;
+ wi->sh = (wi->regime == TR_EL2 ?
+ FIELD_GET(TCR_EL2_SH0_MASK, tcr) :
+ (va55 ?
+ FIELD_GET(TCR_SH1_MASK, tcr) :
+ FIELD_GET(TCR_SH0_MASK, tcr)));
+
va = (u64)sign_extend64(va, 55);
/* Let's put the MMU disabled case aside immediately */
@@ -194,53 +283,20 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
/* R_BVXDG */
wi->hpd |= (wi->poe || wi->e0poe);
- /* Someone was silly enough to encode TG0/TG1 differently */
- if (va55) {
- wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
- tg = FIELD_GET(TCR_TG1_MASK, tcr);
-
- switch (tg << TCR_TG1_SHIFT) {
- case TCR_TG1_4K:
- wi->pgshift = 12; break;
- case TCR_TG1_16K:
- wi->pgshift = 14; break;
- case TCR_TG1_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
- }
- } else {
- wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
- tg = FIELD_GET(TCR_TG0_MASK, tcr);
-
- switch (tg << TCR_TG0_SHIFT) {
- case TCR_TG0_4K:
- wi->pgshift = 12; break;
- case TCR_TG0_16K:
- wi->pgshift = 14; break;
- case TCR_TG0_64K:
- default: /* IMPDEF: treat any other value as 64k */
- wi->pgshift = 16; break;
- }
- }
-
/* R_PLCGL, R_YXNYW */
if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) {
if (wi->txsz > 39)
- goto transfault_l0;
+ goto transfault;
} else {
if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47))
- goto transfault_l0;
+ goto transfault;
}
/* R_GTJBY, R_SXWGM */
switch (BIT(wi->pgshift)) {
case SZ_4K:
- lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT);
- lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
- break;
case SZ_16K:
- lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT);
- lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS);
+ lva = wi->pa52bit;
break;
case SZ_64K:
lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52);
@@ -248,38 +304,42 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
}
if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16))
- goto transfault_l0;
-
- ia_bits = get_ia_size(wi);
+ goto transfault;
/* R_YYVYV, I_THCZK */
if ((!va55 && va > GENMASK(ia_bits - 1, 0)) ||
(va55 && va < GENMASK(63, ia_bits)))
- goto transfault_l0;
+ goto transfault;
/* I_ZFSYQ */
if (wi->regime != TR_EL2 &&
(tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK)))
- goto transfault_l0;
+ goto transfault;
/* R_BNDVG and following statements */
if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) &&
wi->as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0)))
- goto transfault_l0;
-
- /* AArch64.S1StartLevel() */
- stride = wi->pgshift - 3;
- wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride);
+ goto transfault;
ps = (wi->regime == TR_EL2 ?
FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr));
- wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps));
+ wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps, wi->pa52bit));
/* Compute minimal alignment */
x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift);
wi->baddr = ttbr & TTBRx_EL1_BADDR;
+ if (wi->pa52bit) {
+ /*
+ * Force the alignment on 64 bytes for top-level tables
+ * smaller than 8 entries, since TTBR.BADDR[5:2] are used to
+ * store bits [51:48] of the first level of lookup.
+ */
+ x = max(x, 6);
+
+ wi->baddr |= FIELD_GET(GENMASK_ULL(5, 2), ttbr) << 48;
+ }
/* R_VPBBF */
if (check_output_size(wi->baddr, wi))
@@ -289,12 +349,17 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return 0;
-addrsz: /* Address Size Fault level 0 */
+addrsz:
+ /*
+ * Address Size Fault level 0 to indicate it comes from TTBR.
+ * yes, this is an oddity.
+ */
fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false);
return -EFAULT;
-transfault_l0: /* Translation Fault level 0 */
- fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false);
+transfault:
+ /* Translation Fault on start level */
+ fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(wi->sl), false);
return -EFAULT;
}
@@ -339,6 +404,17 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
ipa = kvm_s2_trans_output(&s2_trans);
}
+ if (wi->filter) {
+ ret = wi->filter->fn(&(struct s1_walk_context)
+ {
+ .wi = wi,
+ .table_ipa = baddr,
+ .level = level,
+ }, wi->filter->priv);
+ if (ret)
+ return ret;
+ }
+
ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc));
if (ret) {
fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false);
@@ -369,7 +445,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc);
}
- baddr = desc & GENMASK_ULL(47, wi->pgshift);
+ baddr = desc_to_oa(wi, desc);
/* Check for out-of-range OA */
if (check_output_size(baddr, wi))
@@ -386,11 +462,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
switch (BIT(wi->pgshift)) {
case SZ_4K:
- valid_block = level == 1 || level == 2;
+ valid_block = level == 1 || level == 2 || (wi->pa52bit && level == 0);
break;
case SZ_16K:
case SZ_64K:
- valid_block = level == 2;
+ valid_block = level == 2 || (wi->pa52bit && level == 1);
break;
}
@@ -398,7 +474,8 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
goto transfault;
}
- if (check_output_size(desc & GENMASK(47, va_bottom), wi))
+ baddr = desc_to_oa(wi, desc);
+ if (check_output_size(baddr & GENMASK(52, va_bottom), wi))
goto addrsz;
if (!(desc & PTE_AF)) {
@@ -411,7 +488,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
wr->failed = false;
wr->level = level;
wr->desc = desc;
- wr->pa = desc & GENMASK(47, va_bottom);
+ wr->pa = baddr & GENMASK(52, va_bottom);
wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0);
wr->nG = (wi->regime != TR_EL2) && (desc & PTE_NG);
@@ -640,21 +717,36 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2)
#define ATTR_OSH 0b10
#define ATTR_ISH 0b11
-static u8 compute_sh(u8 attr, u64 desc)
+static u8 compute_final_sh(u8 attr, u8 sh)
{
- u8 sh;
-
/* Any form of device, as well as NC has SH[1:0]=0b10 */
if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
return ATTR_OSH;
- sh = FIELD_GET(PTE_SHARED, desc);
if (sh == ATTR_RSV) /* Reserved, mapped to NSH */
sh = ATTR_NSH;
return sh;
}
+static u8 compute_s1_sh(struct s1_walk_info *wi, struct s1_walk_result *wr,
+ u8 attr)
+{
+ u8 sh;
+
+ /*
+ * non-52bit and LPA have their basic shareability described in the
+ * descriptor. LPA2 gets it from the corresponding field in TCR,
+ * conveniently recorded in the walk info.
+ */
+ if (!wi->pa52bit || BIT(wi->pgshift) == SZ_64K)
+ sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_SH, wr->desc);
+ else
+ sh = wi->sh;
+
+ return compute_final_sh(attr, sh);
+}
+
static u8 combine_sh(u8 s1_sh, u8 s2_sh)
{
if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
@@ -668,7 +760,7 @@ static u8 combine_sh(u8 s1_sh, u8 s2_sh)
static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
struct kvm_s2_trans *tr)
{
- u8 s1_parattr, s2_memattr, final_attr;
+ u8 s1_parattr, s2_memattr, final_attr, s2_sh;
u64 par;
/* If S2 has failed to translate, report the damage */
@@ -741,17 +833,19 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
!MEMATTR_IS_DEVICE(final_attr))
final_attr = MEMATTR(NC, NC);
+ s2_sh = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_SH, tr->desc);
+
par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
par |= tr->output & GENMASK(47, 12);
par |= FIELD_PREP(SYS_PAR_EL1_SH,
combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
- compute_sh(final_attr, tr->desc)));
+ compute_final_sh(final_attr, s2_sh)));
return par;
}
-static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
- enum trans_regime regime)
+static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
+ struct s1_walk_result *wr)
{
u64 par;
@@ -764,9 +858,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
} else if (wr->level == S1_MMU_DISABLED) {
/* MMU off or HCR_EL2.DC == 1 */
par = SYS_PAR_EL1_NSE;
- par |= wr->pa & GENMASK_ULL(47, 12);
+ par |= wr->pa & SYS_PAR_EL1_PA;
- if (regime == TR_EL10 &&
+ if (wi->regime == TR_EL10 && vcpu_has_nv(vcpu) &&
(__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) {
par |= FIELD_PREP(SYS_PAR_EL1_ATTR,
MEMATTR(WbRaWa, WbRaWa));
@@ -781,14 +875,14 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
par = SYS_PAR_EL1_NSE;
- mair = (regime == TR_EL10 ?
+ mair = (wi->regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, MAIR_EL1) :
vcpu_read_sys_reg(vcpu, MAIR_EL2));
mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8;
mair &= 0xff;
- sctlr = (regime == TR_EL10 ?
+ sctlr = (wi->regime == TR_EL10 ?
vcpu_read_sys_reg(vcpu, SCTLR_EL1) :
vcpu_read_sys_reg(vcpu, SCTLR_EL2));
@@ -797,9 +891,9 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr,
mair = MEMATTR(NC, NC);
par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair);
- par |= wr->pa & GENMASK_ULL(47, 12);
+ par |= wr->pa & SYS_PAR_EL1_PA;
- sh = compute_sh(mair, wr->desc);
+ sh = compute_s1_sh(wi, wr, mair);
par |= FIELD_PREP(SYS_PAR_EL1_SH, sh);
}
@@ -873,7 +967,7 @@ static void compute_s1_direct_permissions(struct kvm_vcpu *vcpu,
wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL2) & SCTLR_ELx_WXN);
break;
case TR_EL10:
- wxn = (__vcpu_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
+ wxn = (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_WXN);
break;
}
@@ -1186,7 +1280,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false);
compute_par:
- return compute_par_s1(vcpu, &wr, wi.regime);
+ return compute_par_s1(vcpu, &wi, &wr);
}
/*
@@ -1202,7 +1296,7 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
{
struct mmu_config config;
struct kvm_s2_mmu *mmu;
- bool fail;
+ bool fail, mmu_cs;
u64 par;
par = SYS_PAR_EL1_F;
@@ -1218,8 +1312,13 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
* If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already
* the right one (as we trapped from vEL2). If not, save the
* full MMU context.
+ *
+ * We are also guaranteed to be in the correct context if
+ * we're not in a nested VM.
*/
- if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))
+ mmu_cs = (vcpu_has_nv(vcpu) &&
+ !(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)));
+ if (!mmu_cs)
goto skip_mmu_switch;
/*
@@ -1287,7 +1386,7 @@ skip_mmu_switch:
write_sysreg_hcr(HCR_HOST_VHE_FLAGS);
- if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ if (mmu_cs)
__mmu_config_restore(&config);
return par;
@@ -1470,3 +1569,68 @@ int __kvm_translate_va(struct kvm_vcpu *vcpu, struct s1_walk_info *wi,
return 0;
}
+
+struct desc_match {
+ u64 ipa;
+ int level;
+};
+
+static int match_s1_desc(struct s1_walk_context *ctxt, void *priv)
+{
+ struct desc_match *dm = priv;
+ u64 ipa = dm->ipa;
+
+ /* Use S1 granule alignment */
+ ipa &= GENMASK(51, ctxt->wi->pgshift);
+
+ /* Not the IPA we're looking for? Continue. */
+ if (ipa != ctxt->table_ipa)
+ return 0;
+
+ /* Note the level and interrupt the walk */
+ dm->level = ctxt->level;
+ return -EINTR;
+}
+
+int __kvm_find_s1_desc_level(struct kvm_vcpu *vcpu, u64 va, u64 ipa, int *level)
+{
+ struct desc_match dm = {
+ .ipa = ipa,
+ };
+ struct s1_walk_info wi = {
+ .filter = &(struct s1_walk_filter){
+ .fn = match_s1_desc,
+ .priv = &dm,
+ },
+ .regime = TR_EL10,
+ .as_el0 = false,
+ .pan = false,
+ };
+ struct s1_walk_result wr = {};
+ int ret;
+
+ ret = setup_s1_walk(vcpu, &wi, &wr, va);
+ if (ret)
+ return ret;
+
+ /* We really expect the S1 MMU to be on here... */
+ if (WARN_ON_ONCE(wr.level == S1_MMU_DISABLED)) {
+ *level = 0;
+ return 0;
+ }
+
+ /* Walk the guest's PT, looking for a match along the way */
+ ret = walk_s1(vcpu, &wi, &wr, va);
+ switch (ret) {
+ case -EINTR:
+ /* We interrupted the walk on a match, return the level */
+ *level = dm.level;
+ return 0;
+ case 0:
+ /* The walk completed, we failed to find the entry */
+ return -ENOENT;
+ default:
+ /* Any other error... */
+ return ret;
+ }
+}
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index a598072f36d2..d449e15680e4 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -559,6 +559,9 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
/* Dump the nVHE hypervisor backtrace */
kvm_nvhe_dump_backtrace(hyp_offset);
+ /* Dump the faulting instruction */
+ dump_kernel_instr(panic_addr + kaslr_offset());
+
/*
* Hyp has panicked and we're going to handle that by panicking the
* kernel. The kernel offset will be revealed in the panic so we're
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index ce31d3b73603..184ad7a39950 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -29,7 +29,7 @@ struct pkvm_hyp_vcpu {
};
/*
- * Holds the relevant data for running a protected vm.
+ * Holds the relevant data for running a vm in protected mode.
*/
struct pkvm_hyp_vm {
struct kvm kvm;
@@ -67,6 +67,8 @@ static inline bool pkvm_hyp_vm_is_protected(struct pkvm_hyp_vm *hyp_vm)
void pkvm_hyp_vm_table_init(void *tbl);
+int __pkvm_reserve_vm(void);
+void __pkvm_unreserve_vm(pkvm_handle_t handle);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
index 1e6d995968a1..ba5382c12787 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h
@@ -12,7 +12,8 @@
#include <asm/kvm_host.h>
#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r]
-#define DECLARE_REG(type, name, ctxt, reg) \
+#define DECLARE_REG(type, name, ctxt, reg) \
+ __always_unused int ___check_reg_ ## reg; \
type name = (type)cpu_reg(ctxt, (reg))
#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 0b0a68b663d4..a244ec25f8c5 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -27,6 +27,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
+hyp-obj-y += ../../../kernel/smccc-call.o
hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
hyp-obj-y += $(lib-objs)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 3369dd0c4009..4e16f9b96f63 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -71,36 +71,68 @@ static u32 hyp_ffa_version;
static bool has_version_negotiated;
static hyp_spinlock_t version_lock;
-static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno)
+static void ffa_to_smccc_error(struct arm_smccc_1_2_regs *res, u64 ffa_errno)
{
- *res = (struct arm_smccc_res) {
+ *res = (struct arm_smccc_1_2_regs) {
.a0 = FFA_ERROR,
.a2 = ffa_errno,
};
}
-static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop)
+static void ffa_to_smccc_res_prop(struct arm_smccc_1_2_regs *res, int ret, u64 prop)
{
if (ret == FFA_RET_SUCCESS) {
- *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS,
- .a2 = prop };
+ *res = (struct arm_smccc_1_2_regs) { .a0 = FFA_SUCCESS,
+ .a2 = prop };
} else {
ffa_to_smccc_error(res, ret);
}
}
-static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret)
+static void ffa_to_smccc_res(struct arm_smccc_1_2_regs *res, int ret)
{
ffa_to_smccc_res_prop(res, ret, 0);
}
static void ffa_set_retval(struct kvm_cpu_context *ctxt,
- struct arm_smccc_res *res)
+ struct arm_smccc_1_2_regs *res)
{
cpu_reg(ctxt, 0) = res->a0;
cpu_reg(ctxt, 1) = res->a1;
cpu_reg(ctxt, 2) = res->a2;
cpu_reg(ctxt, 3) = res->a3;
+ cpu_reg(ctxt, 4) = res->a4;
+ cpu_reg(ctxt, 5) = res->a5;
+ cpu_reg(ctxt, 6) = res->a6;
+ cpu_reg(ctxt, 7) = res->a7;
+
+ /*
+ * DEN0028C 2.6: SMC32/HVC32 call from aarch64 must preserve x8-x30.
+ *
+ * In FF-A 1.2, we cannot rely on the function ID sent by the caller to
+ * detect 32-bit calls because the CPU cycle management interfaces (e.g.
+ * FFA_MSG_WAIT, FFA_RUN) are 32-bit only but can have 64-bit responses.
+ *
+ * FFA-1.3 introduces 64-bit variants of the CPU cycle management
+ * interfaces. Moreover, FF-A 1.3 clarifies that SMC32 direct requests
+ * complete with SMC32 direct reponses which *should* allow us use the
+ * function ID sent by the caller to determine whether to return x8-x17.
+ *
+ * Note that we also cannot rely on function IDs in the response.
+ *
+ * Given the above, assume SMC64 and send back x0-x17 unconditionally
+ * as the passthrough code (__kvm_hyp_host_forward_smc) does the same.
+ */
+ cpu_reg(ctxt, 8) = res->a8;
+ cpu_reg(ctxt, 9) = res->a9;
+ cpu_reg(ctxt, 10) = res->a10;
+ cpu_reg(ctxt, 11) = res->a11;
+ cpu_reg(ctxt, 12) = res->a12;
+ cpu_reg(ctxt, 13) = res->a13;
+ cpu_reg(ctxt, 14) = res->a14;
+ cpu_reg(ctxt, 15) = res->a15;
+ cpu_reg(ctxt, 16) = res->a16;
+ cpu_reg(ctxt, 17) = res->a17;
}
static bool is_ffa_call(u64 func_id)
@@ -113,82 +145,92 @@ static bool is_ffa_call(u64 func_id)
static int ffa_map_hyp_buffers(u64 ffa_page_count)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
- arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP,
- hyp_virt_to_phys(hyp_buffers.tx),
- hyp_virt_to_phys(hyp_buffers.rx),
- ffa_page_count,
- 0, 0, 0, 0,
- &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_FN64_RXTX_MAP,
+ .a1 = hyp_virt_to_phys(hyp_buffers.tx),
+ .a2 = hyp_virt_to_phys(hyp_buffers.rx),
+ .a3 = ffa_page_count,
+ }, &res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
static int ffa_unmap_hyp_buffers(void)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
- arm_smccc_1_1_smc(FFA_RXTX_UNMAP,
- HOST_FFA_ID,
- 0, 0, 0, 0, 0, 0,
- &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_RXTX_UNMAP,
+ .a1 = HOST_FFA_ID,
+ }, &res);
return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2;
}
-static void ffa_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo,
+static void ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 fraglen, u32 endpoint_id)
{
- arm_smccc_1_1_smc(FFA_MEM_FRAG_TX,
- handle_lo, handle_hi, fraglen, endpoint_id,
- 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_MEM_FRAG_TX,
+ .a1 = handle_lo,
+ .a2 = handle_hi,
+ .a3 = fraglen,
+ .a4 = endpoint_id,
+ }, res);
}
-static void ffa_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo,
+static void ffa_mem_frag_rx(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 fragoff)
{
- arm_smccc_1_1_smc(FFA_MEM_FRAG_RX,
- handle_lo, handle_hi, fragoff, HOST_FFA_ID,
- 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_MEM_FRAG_RX,
+ .a1 = handle_lo,
+ .a2 = handle_hi,
+ .a3 = fragoff,
+ .a4 = HOST_FFA_ID,
+ }, res);
}
-static void ffa_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len,
+static void ffa_mem_xfer(struct arm_smccc_1_2_regs *res, u64 func_id, u32 len,
u32 fraglen)
{
- arm_smccc_1_1_smc(func_id, len, fraglen,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = func_id,
+ .a1 = len,
+ .a2 = fraglen,
+ }, res);
}
-static void ffa_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo,
+static void ffa_mem_reclaim(struct arm_smccc_1_2_regs *res, u32 handle_lo,
u32 handle_hi, u32 flags)
{
- arm_smccc_1_1_smc(FFA_MEM_RECLAIM,
- handle_lo, handle_hi, flags,
- 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_MEM_RECLAIM,
+ .a1 = handle_lo,
+ .a2 = handle_hi,
+ .a3 = flags,
+ }, res);
}
-static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
+static void ffa_retrieve_req(struct arm_smccc_1_2_regs *res, u32 len)
{
- arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ,
- len, len,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_FN64_MEM_RETRIEVE_REQ,
+ .a1 = len,
+ .a2 = len,
+ }, res);
}
-static void ffa_rx_release(struct arm_smccc_res *res)
+static void ffa_rx_release(struct arm_smccc_1_2_regs *res)
{
- arm_smccc_1_1_smc(FFA_RX_RELEASE,
- 0, 0,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_RX_RELEASE,
+ }, res);
}
-static void do_ffa_rxtx_map(struct arm_smccc_res *res,
+static void do_ffa_rxtx_map(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(phys_addr_t, tx, ctxt, 1);
@@ -267,7 +309,7 @@ err_unmap:
goto out_unlock;
}
-static void do_ffa_rxtx_unmap(struct arm_smccc_res *res,
+static void do_ffa_rxtx_unmap(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
@@ -368,7 +410,7 @@ static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges,
return ret;
}
-static void do_ffa_mem_frag_tx(struct arm_smccc_res *res,
+static void do_ffa_mem_frag_tx(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
@@ -427,7 +469,7 @@ out:
}
static void __do_ffa_mem_xfer(const u64 func_id,
- struct arm_smccc_res *res,
+ struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, len, ctxt, 1);
@@ -521,7 +563,7 @@ err_unshare:
__do_ffa_mem_xfer((fid), (res), (ctxt)); \
} while (0);
-static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
+static void do_ffa_mem_reclaim(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, handle_lo, ctxt, 1);
@@ -628,13 +670,26 @@ static bool ffa_call_supported(u64 func_id)
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
case FFA_MEM_RETRIEVE_REQ:
+ /* Optional notification interfaces added in FF-A 1.1 */
+ case FFA_NOTIFICATION_BITMAP_CREATE:
+ case FFA_NOTIFICATION_BITMAP_DESTROY:
+ case FFA_NOTIFICATION_BIND:
+ case FFA_NOTIFICATION_UNBIND:
+ case FFA_NOTIFICATION_SET:
+ case FFA_NOTIFICATION_GET:
+ case FFA_NOTIFICATION_INFO_GET:
+ /* Optional interfaces added in FF-A 1.2 */
+ case FFA_MSG_SEND_DIRECT_REQ2: /* Optional per 7.5.1 */
+ case FFA_MSG_SEND_DIRECT_RESP2: /* Optional per 7.5.1 */
+ case FFA_CONSOLE_LOG: /* Optional per 13.1: not in Table 13.1 */
+ case FFA_PARTITION_INFO_GET_REGS: /* Optional for virtual instances per 13.1 */
return false;
}
return true;
}
-static bool do_ffa_features(struct arm_smccc_res *res,
+static bool do_ffa_features(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, id, ctxt, 1);
@@ -666,21 +721,25 @@ out_handled:
static int hyp_ffa_post_init(void)
{
size_t min_rxtx_sz;
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
- arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
+ .a0 = FFA_ID_GET,
+ }, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
if (res.a2 != HOST_FFA_ID)
return -EINVAL;
- arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP,
- 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs){
+ .a0 = FFA_FEATURES,
+ .a1 = FFA_FN64_RXTX_MAP,
+ }, &res);
if (res.a0 != FFA_SUCCESS)
return -EOPNOTSUPP;
- switch (res.a2) {
+ switch (res.a2 & FFA_FEAT_RXTX_MIN_SZ_MASK) {
case FFA_FEAT_RXTX_MIN_SZ_4K:
min_rxtx_sz = SZ_4K;
break;
@@ -700,7 +759,7 @@ static int hyp_ffa_post_init(void)
return 0;
}
-static void do_ffa_version(struct arm_smccc_res *res,
+static void do_ffa_version(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, ffa_req_version, ctxt, 1);
@@ -712,7 +771,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_spin_lock(&version_lock);
if (has_version_negotiated) {
- res->a0 = hyp_ffa_version;
+ if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version))
+ res->a0 = FFA_RET_NOT_SUPPORTED;
+ else
+ res->a0 = hyp_ffa_version;
goto unlock;
}
@@ -721,9 +783,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
* first if TEE supports it.
*/
if (FFA_MINOR_VERSION(ffa_req_version) < FFA_MINOR_VERSION(hyp_ffa_version)) {
- arm_smccc_1_1_smc(FFA_VERSION, ffa_req_version, 0,
- 0, 0, 0, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_VERSION,
+ .a1 = ffa_req_version,
+ }, res);
if (res->a0 == FFA_RET_NOT_SUPPORTED)
goto unlock;
@@ -740,7 +803,7 @@ unlock:
hyp_spin_unlock(&version_lock);
}
-static void do_ffa_part_get(struct arm_smccc_res *res,
+static void do_ffa_part_get(struct arm_smccc_1_2_regs *res,
struct kvm_cpu_context *ctxt)
{
DECLARE_REG(u32, uuid0, ctxt, 1);
@@ -756,9 +819,14 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
goto out_unlock;
}
- arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1,
- uuid2, uuid3, flags, 0, 0,
- res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_PARTITION_INFO_GET,
+ .a1 = uuid0,
+ .a2 = uuid1,
+ .a3 = uuid2,
+ .a4 = uuid3,
+ .a5 = flags,
+ }, res);
if (res->a0 != FFA_SUCCESS)
goto out_unlock;
@@ -791,7 +859,7 @@ out_unlock:
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
/*
* There's no way we can tell what a non-standard SMC call might
@@ -860,13 +928,16 @@ out_handled:
int hyp_ffa_init(void *pages)
{
- struct arm_smccc_res res;
+ struct arm_smccc_1_2_regs res;
void *tx, *rx;
if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_2)
return 0;
- arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_1, 0, 0, 0, 0, 0, 0, &res);
+ arm_smccc_1_2_smc(&(struct arm_smccc_1_2_regs) {
+ .a0 = FFA_VERSION,
+ .a1 = FFA_VERSION_1_2,
+ }, &res);
if (res.a0 == FFA_RET_NOT_SUPPORTED)
return 0;
@@ -886,10 +957,10 @@ int hyp_ffa_init(void *pages)
if (FFA_MAJOR_VERSION(res.a0) != 1)
return -EOPNOTSUPP;
- if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_1))
+ if (FFA_MINOR_VERSION(res.a0) < FFA_MINOR_VERSION(FFA_VERSION_1_2))
hyp_ffa_version = res.a0;
else
- hyp_ffa_version = FFA_VERSION_1_1;
+ hyp_ffa_version = FFA_VERSION_1_2;
tx = pages;
pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE;
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 3206b2c07f82..29430c031095 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -546,6 +546,18 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize();
}
+static void handle___pkvm_reserve_vm(struct kvm_cpu_context *host_ctxt)
+{
+ cpu_reg(host_ctxt, 1) = __pkvm_reserve_vm();
+}
+
+static void handle___pkvm_unreserve_vm(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+
+ __pkvm_unreserve_vm(handle);
+}
+
static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
@@ -606,6 +618,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
+ HANDLE_FUNC(__pkvm_reserve_vm),
+ HANDLE_FUNC(__pkvm_unreserve_vm),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 338505cb0171..05774aed09cb 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -23,8 +23,8 @@ unsigned int kvm_arm_vmid_bits;
unsigned int kvm_host_sve_max_vl;
/*
- * The currently loaded hyp vCPU for each physical CPU. Used only when
- * protected KVM is enabled, but for both protected and non-protected VMs.
+ * The currently loaded hyp vCPU for each physical CPU. Used in protected mode
+ * for both protected and non-protected VMs.
*/
static DEFINE_PER_CPU(struct pkvm_hyp_vcpu *, loaded_hyp_vcpu);
@@ -135,7 +135,7 @@ static int pkvm_check_pvm_cpu_features(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- /* Protected KVM does not support AArch32 guests. */
+ /* No AArch32 support for protected guests. */
if (kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL0, AARCH32) ||
kvm_has_feat(kvm, ID_AA64PFR0_EL1, EL1, AARCH32))
return -EINVAL;
@@ -192,6 +192,11 @@ static int pkvm_vcpu_init_traps(struct pkvm_hyp_vcpu *hyp_vcpu)
*/
#define HANDLE_OFFSET 0x1000
+/*
+ * Marks a reserved but not yet used entry in the VM table.
+ */
+#define RESERVED_ENTRY ((void *)0xa110ca7ed)
+
static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
{
return handle - HANDLE_OFFSET;
@@ -210,8 +215,8 @@ static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
DEFINE_HYP_SPINLOCK(vm_table_lock);
/*
- * The table of VM entries for protected VMs in hyp.
- * Allocated at hyp initialization and setup.
+ * A table that tracks all VMs in protected mode.
+ * Allocated during hyp initialization and setup.
*/
static struct pkvm_hyp_vm **vm_table;
@@ -231,6 +236,10 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
if (unlikely(idx >= KVM_MAX_PVMS))
return NULL;
+ /* A reserved entry doesn't represent an initialized VM. */
+ if (unlikely(vm_table[idx] == RESERVED_ENTRY))
+ return NULL;
+
return vm_table[idx];
}
@@ -401,14 +410,26 @@ static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
}
static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
- unsigned int nr_vcpus)
+ unsigned int nr_vcpus, pkvm_handle_t handle)
{
+ struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
+ int idx = vm_handle_to_idx(handle);
+
+ hyp_vm->kvm.arch.pkvm.handle = handle;
+
hyp_vm->host_kvm = host_kvm;
hyp_vm->kvm.created_vcpus = nr_vcpus;
- hyp_vm->kvm.arch.mmu.vtcr = host_mmu.arch.mmu.vtcr;
- hyp_vm->kvm.arch.pkvm.enabled = READ_ONCE(host_kvm->arch.pkvm.enabled);
+ hyp_vm->kvm.arch.pkvm.is_protected = READ_ONCE(host_kvm->arch.pkvm.is_protected);
+ hyp_vm->kvm.arch.pkvm.is_created = true;
hyp_vm->kvm.arch.flags = 0;
pkvm_init_features_from_host(hyp_vm, host_kvm);
+
+ /* VMID 0 is reserved for the host */
+ atomic64_set(&mmu->vmid.id, idx + 1);
+
+ mmu->vtcr = host_mmu.arch.mmu.vtcr;
+ mmu->arch = &hyp_vm->kvm.arch;
+ mmu->pgt = &hyp_vm->pgt;
}
static int pkvm_vcpu_init_sve(struct pkvm_hyp_vcpu *hyp_vcpu, struct kvm_vcpu *host_vcpu)
@@ -480,7 +501,7 @@ done:
return ret;
}
-static int find_free_vm_table_entry(struct kvm *host_kvm)
+static int find_free_vm_table_entry(void)
{
int i;
@@ -493,15 +514,13 @@ static int find_free_vm_table_entry(struct kvm *host_kvm)
}
/*
- * Allocate a VM table entry and insert a pointer to the new vm.
+ * Reserve a VM table entry.
*
- * Return a unique handle to the protected VM on success,
+ * Return a unique handle to the VM on success,
* negative error code on failure.
*/
-static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
- struct pkvm_hyp_vm *hyp_vm)
+static int allocate_vm_table_entry(void)
{
- struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
int idx;
hyp_assert_lock_held(&vm_table_lock);
@@ -514,20 +533,57 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
if (unlikely(!vm_table))
return -EINVAL;
- idx = find_free_vm_table_entry(host_kvm);
- if (idx < 0)
+ idx = find_free_vm_table_entry();
+ if (unlikely(idx < 0))
return idx;
- hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
+ vm_table[idx] = RESERVED_ENTRY;
- /* VMID 0 is reserved for the host */
- atomic64_set(&mmu->vmid.id, idx + 1);
+ return idx;
+}
- mmu->arch = &hyp_vm->kvm.arch;
- mmu->pgt = &hyp_vm->pgt;
+static int __insert_vm_table_entry(pkvm_handle_t handle,
+ struct pkvm_hyp_vm *hyp_vm)
+{
+ unsigned int idx;
+
+ hyp_assert_lock_held(&vm_table_lock);
+
+ /*
+ * Initializing protected state might have failed, yet a malicious
+ * host could trigger this function. Thus, ensure that 'vm_table'
+ * exists.
+ */
+ if (unlikely(!vm_table))
+ return -EINVAL;
+
+ idx = vm_handle_to_idx(handle);
+ if (unlikely(idx >= KVM_MAX_PVMS))
+ return -EINVAL;
+
+ if (unlikely(vm_table[idx] != RESERVED_ENTRY))
+ return -EINVAL;
vm_table[idx] = hyp_vm;
- return hyp_vm->kvm.arch.pkvm.handle;
+
+ return 0;
+}
+
+/*
+ * Insert a pointer to the initialized VM into the VM table.
+ *
+ * Return 0 on success, or negative error code on failure.
+ */
+static int insert_vm_table_entry(pkvm_handle_t handle,
+ struct pkvm_hyp_vm *hyp_vm)
+{
+ int ret;
+
+ hyp_spin_lock(&vm_table_lock);
+ ret = __insert_vm_table_entry(handle, hyp_vm);
+ hyp_spin_unlock(&vm_table_lock);
+
+ return ret;
}
/*
@@ -594,10 +650,45 @@ static void unmap_donated_memory_noclear(void *va, size_t size)
}
/*
- * Initialize the hypervisor copy of the protected VM state using the
- * memory donated by the host.
+ * Reserves an entry in the hypervisor for a new VM in protected mode.
*
- * Unmaps the donated memory from the host at stage 2.
+ * Return a unique handle to the VM on success, negative error code on failure.
+ */
+int __pkvm_reserve_vm(void)
+{
+ int ret;
+
+ hyp_spin_lock(&vm_table_lock);
+ ret = allocate_vm_table_entry();
+ hyp_spin_unlock(&vm_table_lock);
+
+ if (ret < 0)
+ return ret;
+
+ return idx_to_vm_handle(ret);
+}
+
+/*
+ * Removes a reserved entry, but only if is hasn't been used yet.
+ * Otherwise, the VM needs to be destroyed.
+ */
+void __pkvm_unreserve_vm(pkvm_handle_t handle)
+{
+ unsigned int idx = vm_handle_to_idx(handle);
+
+ if (unlikely(!vm_table))
+ return;
+
+ hyp_spin_lock(&vm_table_lock);
+ if (likely(idx < KVM_MAX_PVMS && vm_table[idx] == RESERVED_ENTRY))
+ remove_vm_table_entry(handle);
+ hyp_spin_unlock(&vm_table_lock);
+}
+
+/*
+ * Initialize the hypervisor copy of the VM state using host-donated memory.
+ *
+ * Unmap the donated memory from the host at stage 2.
*
* host_kvm: A pointer to the host's struct kvm.
* vm_hva: The host va of the area being donated for the VM state.
@@ -606,8 +697,7 @@ static void unmap_donated_memory_noclear(void *va, size_t size)
* the VM. Must be page aligned. Its size is implied by the VM's
* VTCR.
*
- * Return a unique handle to the protected VM on success,
- * negative error code on failure.
+ * Return 0 success, negative error code on failure.
*/
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva)
@@ -615,6 +705,7 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
struct pkvm_hyp_vm *hyp_vm = NULL;
size_t vm_size, pgd_size;
unsigned int nr_vcpus;
+ pkvm_handle_t handle;
void *pgd = NULL;
int ret;
@@ -628,6 +719,12 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
goto err_unpin_kvm;
}
+ handle = READ_ONCE(host_kvm->arch.pkvm.handle);
+ if (unlikely(handle < HANDLE_OFFSET)) {
+ ret = -EINVAL;
+ goto err_unpin_kvm;
+ }
+
vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.mmu.vtcr);
@@ -641,24 +738,19 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
if (!pgd)
goto err_remove_mappings;
- init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus);
-
- hyp_spin_lock(&vm_table_lock);
- ret = insert_vm_table_entry(host_kvm, hyp_vm);
- if (ret < 0)
- goto err_unlock;
+ init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus, handle);
ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
if (ret)
- goto err_remove_vm_table_entry;
- hyp_spin_unlock(&vm_table_lock);
+ goto err_remove_mappings;
- return hyp_vm->kvm.arch.pkvm.handle;
+ /* Must be called last since this publishes the VM. */
+ ret = insert_vm_table_entry(handle, hyp_vm);
+ if (ret)
+ goto err_remove_mappings;
+
+ return 0;
-err_remove_vm_table_entry:
- remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
-err_unlock:
- hyp_spin_unlock(&vm_table_lock);
err_remove_mappings:
unmap_donated_memory(hyp_vm, vm_size);
unmap_donated_memory(pgd, pgd_size);
@@ -668,10 +760,9 @@ err_unpin_kvm:
}
/*
- * Initialize the hypervisor copy of the protected vCPU state using the
- * memory donated by the host.
+ * Initialize the hypervisor copy of the vCPU state using host-donated memory.
*
- * handle: The handle for the protected vm.
+ * handle: The hypervisor handle for the vm.
* host_vcpu: A pointer to the corresponding host vcpu.
* vcpu_hva: The host va of the area being donated for the vcpu state.
* Must be page aligned. The size of the area must be equal to
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index a48d3f5a5afb..90bd014e952f 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -192,6 +192,7 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum pkvm_page_state state;
struct hyp_page *page;
phys_addr_t phys;
+ enum kvm_pgtable_prot prot;
if (!kvm_pte_valid(ctx->old))
return 0;
@@ -210,11 +211,18 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
* configured in the hypervisor stage-1, and make sure to propagate them
* to the hyp_vmemmap state.
*/
- state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old));
+ prot = kvm_pgtable_hyp_pte_prot(ctx->old);
+ state = pkvm_getstate(prot);
switch (state) {
case PKVM_PAGE_OWNED:
set_hyp_state(page, PKVM_PAGE_OWNED);
- return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
+ /* hyp text is RO in the host stage-2 to be inspected on panic. */
+ if (prot == PAGE_HYP_EXEC) {
+ set_host_state(page, PKVM_NOPAGE);
+ return host_stage2_idmap_locked(phys, PAGE_SIZE, KVM_PGTABLE_PROT_R);
+ } else {
+ return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
+ }
case PKVM_PAGE_SHARED_OWNED:
set_hyp_state(page, PKVM_PAGE_SHARED_OWNED);
set_host_state(page, PKVM_PAGE_SHARED_BORROWED);
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 6745f38b64f9..dfcd66c65517 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -106,7 +106,30 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
{
unsigned long cpsr = *vcpu_cpsr(vcpu);
bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
- u64 esr = 0;
+ u64 esr = 0, fsc;
+ int level;
+
+ /*
+ * If injecting an abort from a failed S1PTW, rewalk the S1 PTs to
+ * find the failing level. If we can't find it, assume the error was
+ * transient and restart without changing the state.
+ */
+ if (kvm_vcpu_abt_iss1tw(vcpu)) {
+ u64 hpfar = kvm_vcpu_get_fault_ipa(vcpu);
+ int ret;
+
+ if (hpfar == INVALID_GPA)
+ return;
+
+ ret = __kvm_find_s1_desc_level(vcpu, addr, hpfar, &level);
+ if (ret)
+ return;
+
+ WARN_ON_ONCE(level < -1 || level > 3);
+ fsc = ESR_ELx_FSC_SEA_TTW(level);
+ } else {
+ fsc = ESR_ELx_FSC_EXTABT;
+ }
/* This delight is brought to you by FEAT_DoubleFault2. */
if (effective_sctlr2_ease(vcpu))
@@ -133,7 +156,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr
if (!is_iabt)
esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT;
- esr |= ESR_ELx_FSC_EXTABT;
+ esr |= fsc;
vcpu_write_sys_reg(vcpu, addr, exception_far_elx(vcpu));
vcpu_write_sys_reg(vcpu, esr, exception_esr_elx(vcpu));
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index 77db81bae86f..cb36974e010a 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -349,7 +349,7 @@ static void vtcr_to_walk_info(u64 vtcr, struct s2_walk_info *wi)
wi->sl = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
/* Global limit for now, should eventually be per-VM */
wi->max_oa_bits = min(get_kvm_ipa_limit(),
- ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr)));
+ ps_to_output_size(FIELD_GET(VTCR_EL2_PS_MASK, vtcr), false));
}
int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 61827cf6fea4..d7a0f69a9982 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -85,16 +85,23 @@ void __init kvm_hyp_reserve(void)
hyp_mem_base);
}
-static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+static void __pkvm_destroy_hyp_vm(struct kvm *kvm)
{
- if (host_kvm->arch.pkvm.handle) {
+ if (pkvm_hyp_vm_is_created(kvm)) {
WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
- host_kvm->arch.pkvm.handle));
+ kvm->arch.pkvm.handle));
+ } else if (kvm->arch.pkvm.handle) {
+ /*
+ * The VM could have been reserved but hyp initialization has
+ * failed. Make sure to unreserve it.
+ */
+ kvm_call_hyp_nvhe(__pkvm_unreserve_vm, kvm->arch.pkvm.handle);
}
- host_kvm->arch.pkvm.handle = 0;
- free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
- free_hyp_memcache(&host_kvm->arch.pkvm.stage2_teardown_mc);
+ kvm->arch.pkvm.handle = 0;
+ kvm->arch.pkvm.is_created = false;
+ free_hyp_memcache(&kvm->arch.pkvm.teardown_mc);
+ free_hyp_memcache(&kvm->arch.pkvm.stage2_teardown_mc);
}
static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
@@ -129,16 +136,16 @@ static int __pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
*
* Return 0 on success, negative error code on failure.
*/
-static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
+static int __pkvm_create_hyp_vm(struct kvm *kvm)
{
size_t pgd_sz, hyp_vm_sz;
void *pgd, *hyp_vm;
int ret;
- if (host_kvm->created_vcpus < 1)
+ if (kvm->created_vcpus < 1)
return -EINVAL;
- pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr);
+ pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.mmu.vtcr);
/*
* The PGD pages will be reclaimed using a hyp_memcache which implies
@@ -152,7 +159,7 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
/* Allocate memory to donate to hyp for vm and vcpu pointers. */
hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
size_mul(sizeof(void *),
- host_kvm->created_vcpus)));
+ kvm->created_vcpus)));
hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
if (!hyp_vm) {
ret = -ENOMEM;
@@ -160,12 +167,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
}
/* Donate the VM memory to hyp and let hyp initialize it. */
- ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
- if (ret < 0)
+ ret = kvm_call_hyp_nvhe(__pkvm_init_vm, kvm, hyp_vm, pgd);
+ if (ret)
goto free_vm;
- host_kvm->arch.pkvm.handle = ret;
- host_kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
+ kvm->arch.pkvm.is_created = true;
+ kvm->arch.pkvm.stage2_teardown_mc.flags |= HYP_MEMCACHE_ACCOUNT_STAGE2;
kvm_account_pgtable_pages(pgd, pgd_sz / PAGE_SIZE);
return 0;
@@ -176,14 +183,19 @@ free_pgd:
return ret;
}
-int pkvm_create_hyp_vm(struct kvm *host_kvm)
+bool pkvm_hyp_vm_is_created(struct kvm *kvm)
+{
+ return READ_ONCE(kvm->arch.pkvm.is_created);
+}
+
+int pkvm_create_hyp_vm(struct kvm *kvm)
{
int ret = 0;
- mutex_lock(&host_kvm->arch.config_lock);
- if (!host_kvm->arch.pkvm.handle)
- ret = __pkvm_create_hyp_vm(host_kvm);
- mutex_unlock(&host_kvm->arch.config_lock);
+ mutex_lock(&kvm->arch.config_lock);
+ if (!pkvm_hyp_vm_is_created(kvm))
+ ret = __pkvm_create_hyp_vm(kvm);
+ mutex_unlock(&kvm->arch.config_lock);
return ret;
}
@@ -200,15 +212,31 @@ int pkvm_create_hyp_vcpu(struct kvm_vcpu *vcpu)
return ret;
}
-void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+void pkvm_destroy_hyp_vm(struct kvm *kvm)
{
- mutex_lock(&host_kvm->arch.config_lock);
- __pkvm_destroy_hyp_vm(host_kvm);
- mutex_unlock(&host_kvm->arch.config_lock);
+ mutex_lock(&kvm->arch.config_lock);
+ __pkvm_destroy_hyp_vm(kvm);
+ mutex_unlock(&kvm->arch.config_lock);
}
-int pkvm_init_host_vm(struct kvm *host_kvm)
+int pkvm_init_host_vm(struct kvm *kvm)
{
+ int ret;
+
+ if (pkvm_hyp_vm_is_created(kvm))
+ return -EINVAL;
+
+ /* VM is already reserved, no need to proceed. */
+ if (kvm->arch.pkvm.handle)
+ return 0;
+
+ /* Reserve the VM in hyp and obtain a hyp handle for the VM. */
+ ret = kvm_call_hyp_nvhe(__pkvm_reserve_vm);
+ if (ret < 0)
+ return ret;
+
+ kvm->arch.pkvm.handle = ret;
+
return 0;
}
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index 1e680ad6e863..ccccb5c04ac1 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -554,7 +554,6 @@ int vgic_lazy_init(struct kvm *kvm)
* Also map the virtual CPU interface into the VM.
* v2 calls vgic_init() if not already done.
* v3 and derivatives return an error if the VGIC is not initialized.
- * vgic_ready() returns true if this function has succeeded.
*/
int kvm_vgic_map_resources(struct kvm *kvm)
{
@@ -563,12 +562,12 @@ int kvm_vgic_map_resources(struct kvm *kvm)
gpa_t dist_base;
int ret = 0;
- if (likely(vgic_ready(kvm)))
+ if (likely(smp_load_acquire(&dist->ready)))
return 0;
mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
- if (vgic_ready(kvm))
+ if (dist->ready)
goto out;
if (!irqchip_in_kernel(kvm))
@@ -594,14 +593,7 @@ int kvm_vgic_map_resources(struct kvm *kvm)
goto out_slots;
}
- /*
- * kvm_io_bus_register_dev() guarantees all readers see the new MMIO
- * registration before returning through synchronize_srcu(), which also
- * implies a full memory barrier. As such, marking the distributor as
- * 'ready' here is guaranteed to be ordered after all vCPUs having seen
- * a completely configured distributor.
- */
- dist->ready = true;
+ smp_store_release(&dist->ready, true);
goto out_slots;
out:
mutex_unlock(&kvm->arch.config_lock);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index aa157fe5b7b3..0bdf9405969a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5785,6 +5785,13 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
+ /*
+ * Ensure that any updates to kvm->buses[] observed by the
+ * previous instruction (emulated or otherwise) are also
+ * visible to the instruction KVM is about to emulate.
+ */
+ smp_rmb();
+
if (!kvm_emulate_instruction(vcpu, 0))
return 0;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 9a6340d9c91e..dd9af8a401d3 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -407,7 +407,6 @@ u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu);
#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel))
#define vgic_initialized(k) ((k)->arch.vgic.initialized)
-#define vgic_ready(k) ((k)->arch.vgic.ready)
#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \
((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS))
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index e1634897e159..cd7ee4df9045 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -128,6 +128,7 @@
#define FFA_FEAT_RXTX_MIN_SZ_4K 0
#define FFA_FEAT_RXTX_MIN_SZ_64K 1
#define FFA_FEAT_RXTX_MIN_SZ_16K 2
+#define FFA_FEAT_RXTX_MIN_SZ_MASK GENMASK(1, 0)
/* FFA Bus/Device/Driver related */
struct ffa_device {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 15656b7fba6c..103be35caf0d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -206,6 +206,7 @@ struct kvm_io_range {
struct kvm_io_bus {
int dev_count;
int ioeventfd_count;
+ struct rcu_head rcu;
struct kvm_io_range range[];
};
@@ -966,11 +967,15 @@ static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
}
+/*
+ * Get a bus reference under the update-side lock. No long-term SRCU reader
+ * references are permitted, to avoid stale reads vs concurrent IO
+ * registrations.
+ */
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
- return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
- lockdep_is_held(&kvm->slots_lock) ||
- !refcount_read(&kvm->users_count));
+ return rcu_dereference_protected(kvm->buses[idx],
+ lockdep_is_held(&kvm->slots_lock));
}
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c
index 062bf84cced1..592b26ded779 100644
--- a/tools/testing/selftests/kvm/arm64/external_aborts.c
+++ b/tools/testing/selftests/kvm/arm64/external_aborts.c
@@ -250,6 +250,47 @@ static void test_serror(void)
kvm_vm_free(vm);
}
+static void expect_sea_s1ptw_handler(struct ex_regs *regs)
+{
+ u64 esr = read_sysreg(esr_el1);
+
+ GUEST_ASSERT_EQ(regs->pc, expected_abort_pc);
+ GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR);
+ GUEST_ASSERT_EQ((esr & ESR_ELx_FSC), ESR_ELx_FSC_SEA_TTW(3));
+
+ GUEST_DONE();
+}
+
+static noinline void test_s1ptw_abort_guest(void)
+{
+ extern char test_s1ptw_abort_insn;
+
+ WRITE_ONCE(expected_abort_pc, (u64)&test_s1ptw_abort_insn);
+
+ asm volatile("test_s1ptw_abort_insn:\n\t"
+ "ldr x0, [%0]\n\t"
+ : : "r" (MMIO_ADDR) : "x0", "memory");
+
+ GUEST_FAIL("Load on S1PTW abort should not retire");
+}
+
+static void test_s1ptw_abort(void)
+{
+ struct kvm_vcpu *vcpu;
+ u64 *ptep, bad_pa;
+ struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_s1ptw_abort_guest,
+ expect_sea_s1ptw_handler);
+
+ ptep = virt_get_pte_hva_at_level(vm, MMIO_ADDR, 2);
+ bad_pa = BIT(vm->pa_bits) - vm->page_size;
+
+ *ptep &= ~GENMASK(47, 12);
+ *ptep |= bad_pa;
+
+ vcpu_run_expect_done(vcpu);
+ kvm_vm_free(vm);
+}
+
static void test_serror_emulated_guest(void)
{
GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A));
@@ -327,4 +368,5 @@ int main(void)
test_serror_masked();
test_serror_emulated();
test_mmio_ease();
+ test_s1ptw_abort();
}
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 255fed769a8a..e3e916b1d9c4 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -175,6 +175,7 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level);
uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
static inline void cpu_relax(void)
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index eb115123d741..bd7480a93f96 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -185,7 +185,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
_virt_pg_map(vm, vaddr, paddr, attr_idx);
}
-uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+uint64_t *virt_get_pte_hva_at_level(struct kvm_vm *vm, vm_vaddr_t gva, int level)
{
uint64_t *ptep;
@@ -195,17 +195,23 @@ uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 0)
+ return ptep;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 1)
+ break;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8;
if (!ptep)
goto unmapped_gva;
+ if (level == 2)
+ break;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8;
@@ -223,6 +229,11 @@ unmapped_gva:
exit(EXIT_FAILURE);
}
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return virt_get_pte_hva_at_level(vm, gva, 3);
+}
+
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep = virt_get_pte_hva(vm, gva);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6c07dd423458..bcef324ccbf2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1103,6 +1103,14 @@ void __weak kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
}
+/* Called only on cleanup and destruction paths when there are no users. */
+static inline struct kvm_io_bus *kvm_get_bus_for_destruction(struct kvm *kvm,
+ enum kvm_bus idx)
+{
+ return rcu_dereference_protected(kvm->buses[idx],
+ !refcount_read(&kvm->users_count));
+}
+
static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
{
struct kvm *kvm = kvm_arch_alloc_vm();
@@ -1228,7 +1236,7 @@ out_err_no_disable:
out_err_no_arch_destroy_vm:
WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
- kfree(kvm_get_bus(kvm, i));
+ kfree(kvm_get_bus_for_destruction(kvm, i));
kvm_free_irq_routing(kvm);
out_err_no_irq_routing:
cleanup_srcu_struct(&kvm->irq_srcu);
@@ -1276,7 +1284,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
- struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
+ struct kvm_io_bus *bus = kvm_get_bus_for_destruction(kvm, i);
if (bus)
kvm_io_bus_destroy(bus);
@@ -1312,6 +1320,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_free_memslots(kvm, &kvm->__memslots[i][1]);
}
cleanup_srcu_struct(&kvm->irq_srcu);
+ srcu_barrier(&kvm->srcu);
cleanup_srcu_struct(&kvm->srcu);
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
xa_destroy(&kvm->mem_attr_array);
@@ -5843,6 +5852,18 @@ static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
return -EOPNOTSUPP;
}
+static struct kvm_io_bus *kvm_get_bus_srcu(struct kvm *kvm, enum kvm_bus idx)
+{
+ /*
+ * Ensure that any updates to kvm_buses[] observed by the previous vCPU
+ * machine instruction are also visible to the vCPU machine instruction
+ * that triggered this call.
+ */
+ smp_mb__after_srcu_read_lock();
+
+ return srcu_dereference(kvm->buses[idx], &kvm->srcu);
+}
+
int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
@@ -5855,7 +5876,7 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
- bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx);
if (!bus)
return -ENOMEM;
r = __kvm_io_bus_write(vcpu, bus, &range, val);
@@ -5874,7 +5895,7 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
.len = len,
};
- bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx);
if (!bus)
return -ENOMEM;
@@ -5924,7 +5945,7 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
.len = len,
};
- bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+ bus = kvm_get_bus_srcu(vcpu->kvm, bus_idx);
if (!bus)
return -ENOMEM;
r = __kvm_io_bus_read(vcpu, bus, &range, val);
@@ -5932,6 +5953,13 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
}
EXPORT_SYMBOL_GPL(kvm_io_bus_read);
+static void __free_bus(struct rcu_head *rcu)
+{
+ struct kvm_io_bus *bus = container_of(rcu, struct kvm_io_bus, rcu);
+
+ kfree(bus);
+}
+
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev)
{
@@ -5970,8 +5998,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
memcpy(new_bus->range + i + 1, bus->range + i,
(bus->dev_count - i) * sizeof(struct kvm_io_range));
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
- synchronize_srcu_expedited(&kvm->srcu);
- kfree(bus);
+ call_srcu(&kvm->srcu, &bus->rcu, __free_bus);
return 0;
}
@@ -6033,7 +6060,7 @@ struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
srcu_idx = srcu_read_lock(&kvm->srcu);
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ bus = kvm_get_bus_srcu(kvm, bus_idx);
if (!bus)
goto out_unlock;