Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini:
 "ARM64:

   - Eager page splitting optimization for dirty logging, optionally
     allowing for a VM to avoid the cost of hugepage splitting in the
     stage-2 fault path.

   - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact
     with services that live in the Secure world. pKVM intervenes on
     FF-A calls to guarantee the host doesn't misuse memory donated to
     the hyp or a pKVM guest.

   - Support for running the split hypervisor with VHE enabled, known as
     'hVHE' mode. This is extremely useful for testing the split
     hypervisor on VHE-only systems, and paves the way for new use cases
     that depend on having two TTBRs available at EL2.

   - Generalized framework for configurable ID registers from userspace.
     KVM/arm64 currently prevents arbitrary CPU feature set
     configuration from userspace, but the intent is to relax this
     limitation and allow userspace to select a feature set consistent
     with the CPU.

   - Enable the use of Branch Target Identification (FEAT_BTI) in the
     hypervisor.

   - Use a separate set of pointer authentication keys for the
     hypervisor when running in protected mode, as the host is untrusted
     at runtime.

   - Ensure timer IRQs are consistently released in the init failure
     paths.

   - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization
     Traps (FEAT_EVT), as it is a register commonly read from userspace.

   - Erratum workaround for the upcoming AmpereOne part, which has
     broken hardware A/D state management.

  RISC-V:

   - Redirect AMO load/store misaligned traps to KVM guest

   - Trap-n-emulate AIA in-kernel irqchip for KVM guest

   - Svnapot support for KVM Guest

  s390:

   - New uvdevice secret API

   - CMM selftest and fixes

   - fix racy access to target CPU for diag 9c

  x86:

   - Fix missing/incorrect #GP checks on ENCLS

   - Use standard mmu_notifier hooks for handling APIC access page

   - Drop now unnecessary TR/TSS load after VM-Exit on AMD

   - Print more descriptive information about the status of SEV and
     SEV-ES during module load

   - Add a test for splitting and reconstituting hugepages during and
     after dirty logging

   - Add support for CPU pinning in demand paging test

   - Add support for AMD PerfMonV2, with a variety of cleanups and minor
     fixes included along the way

   - Add a "nx_huge_pages=never" option to effectively avoid creating NX
     hugepage recovery threads (because nx_huge_pages=off can be toggled
     at runtime)

   - Move handling of PAT out of MTRR code and dedup SVM+VMX code

   - Fix output of PIC poll command emulation when there's an interrupt

   - Add a maintainer's handbook to document KVM x86 processes,
     preferred coding style, testing expectations, etc.

   - Misc cleanups, fixes and comments

  Generic:

   - Miscellaneous bugfixes and cleanups

  Selftests:

   - Generate dependency files so that partial rebuilds work as
     expected"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits)
  Documentation/process: Add a maintainer handbook for KVM x86
  Documentation/process: Add a label for the tip tree handbook's coding style
  KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index
  RISC-V: KVM: Remove unneeded semicolon
  RISC-V: KVM: Allow Svnapot extension for Guest/VM
  riscv: kvm: define vcpu_sbi_ext_pmu in header
  RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
  RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
  RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
  RISC-V: KVM: Add in-kernel emulation of AIA APLIC
  RISC-V: KVM: Implement device interface for AIA irqchip
  RISC-V: KVM: Skeletal in-kernel AIA irqchip support
  RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
  RISC-V: KVM: Add APLIC related defines
  RISC-V: KVM: Add IMSIC related defines
  RISC-V: KVM: Implement guest external interrupt line management
  KVM: x86: Remove PRIx* definitions as they are solely for user space
  s390/uv: Update query for secret-UVCs
  s390/uv: replace scnprintf with sysfs_emit
  s390/uvdevice: Add 'Lock Secret Store' UVC
  ...
This commit is contained in:
Linus Torvalds
2023-07-03 15:32:22 -07:00
125 changed files with 8009 additions and 1012 deletions

View File

@@ -186,15 +186,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
r = kvm_io_bus_unregister_dev(kvm,
zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev);
kvm_iodevice_destructor(&dev->dev);
/*
* On failure, unregister destroys all devices on the
* bus _except_ the target device, i.e. coalesced_zones
* has been modified. Bail after destroying the target
* device, there's no need to restart the walk as there
* aren't any zones left.
* bus, including the target device. There's no need
* to restart the walk as there aren't any zones left.
*/
if (r)
break;

View File

@@ -889,9 +889,9 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
unlock_fail:
mutex_unlock(&kvm->slots_lock);
kfree(p);
fail:
kfree(p);
eventfd_ctx_put(eventfd);
return ret;
@@ -901,7 +901,7 @@ static int
kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_ioeventfd *args)
{
struct _ioeventfd *p, *tmp;
struct _ioeventfd *p;
struct eventfd_ctx *eventfd;
struct kvm_io_bus *bus;
int ret = -ENOENT;
@@ -915,8 +915,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
mutex_lock(&kvm->slots_lock);
list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
list_for_each_entry(p, &kvm->ioeventfds, list) {
if (p->bus_idx != bus_idx ||
p->eventfd != eventfd ||
p->addr != args->addr ||
@@ -931,7 +930,6 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
bus = kvm_get_bus(kvm, bus_idx);
if (bus)
bus->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
break;
}

View File

@@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms;
static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask);
__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
}
__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
{
}
@@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn)
return container_of(mn, struct kvm, mmu_notifier);
}
static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int idx;
idx = srcu_read_lock(&kvm->srcu);
kvm_arch_mmu_notifier_invalidate_range(kvm, start, end);
srcu_read_unlock(&kvm->srcu, idx);
}
typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range);
typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start,
@@ -910,7 +893,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_range = kvm_mmu_notifier_invalidate_range,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
@@ -3891,7 +3873,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
static int vcpu_get_pid(void *data, u64 *val)
{
struct kvm_vcpu *vcpu = data;
*val = pid_nr(rcu_access_pointer(vcpu->pid));
rcu_read_lock();
*val = pid_nr(rcu_dereference(vcpu->pid));
rcu_read_unlock();
return 0;
}
@@ -3993,7 +3978,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
if (r < 0)
goto kvm_put_xa_release;
if (KVM_BUG_ON(!!xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) {
r = -EINVAL;
goto kvm_put_xa_release;
}
@@ -4623,7 +4608,7 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return -EINVAL;
}
static bool kvm_are_all_memslots_empty(struct kvm *kvm)
bool kvm_are_all_memslots_empty(struct kvm *kvm)
{
int i;
@@ -4636,6 +4621,7 @@ static bool kvm_are_all_memslots_empty(struct kvm *kvm)
return true;
}
EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty);
static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
struct kvm_enable_cap *cap)
@@ -5315,6 +5301,12 @@ static void hardware_disable_all(void)
}
#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
static void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->ops->destructor)
dev->ops->destructor(dev);
}
static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
{
int i;
@@ -5538,7 +5530,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
{
int i, j;
int i;
struct kvm_io_bus *new_bus, *bus;
lockdep_assert_held(&kvm->slots_lock);
@@ -5568,18 +5560,19 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
/* Destroy the old bus _after_ installing the (null) bus. */
/*
* If NULL bus is installed, destroy the old bus, including all the
* attached devices. Otherwise, destroy the caller's device only.
*/
if (!new_bus) {
pr_err("kvm: failed to shrink bus, removing it completely\n");
for (j = 0; j < bus->dev_count; j++) {
if (j == i)
continue;
kvm_iodevice_destructor(bus->range[j].dev);
}
kvm_io_bus_destroy(bus);
return -ENOMEM;
}
kvm_iodevice_destructor(dev);
kfree(bus);
return new_bus ? 0 : -ENOMEM;
return 0;
}
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,