summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-20 10:44:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-20 10:44:05 -0800
commit6a447b0e3151893f6d4a889956553c06d2e775c6 (patch)
tree0f0c149c03dd8c2e9a5fbe01d6de528b2724893e /tools
parentf4a2f7866faaf89ea1595b136e01fcb336b46aab (diff)
parentd45f89f7437d0f2c8275b4434096164db106384d (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "Much x86 work was pushed out to 5.12, but ARM more than made up for it. ARM: - PSCI relay at EL2 when "protected KVM" is enabled - New exception injection code - Simplification of AArch32 system register handling - Fix PMU accesses when no PMU is enabled - Expose CSV3 on non-Meltdown hosts - Cache hierarchy discovery fixes - PV steal-time cleanups - Allow function pointers at EL2 - Various host EL2 entry cleanups - Simplification of the EL2 vector allocation s390: - memcg accouting for s390 specific parts of kvm and gmap - selftest for diag318 - new kvm_stat for when async_pf falls back to sync x86: - Tracepoints for the new pagetable code from 5.10 - Catch VFIO and KVM irqfd events before userspace - Reporting dirty pages to userspace with a ring buffer - SEV-ES host support - Nested VMX support for wait-for-SIPI activity state - New feature flag (AVX512 FP16) - New system ioctl to report Hyper-V-compatible paravirtualization features Generic: - Selftest improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (171 commits) KVM: SVM: fix 32-bit compilation KVM: SVM: Add AP_JUMP_TABLE support in prep for AP booting KVM: SVM: Provide support to launch and run an SEV-ES guest KVM: SVM: Provide an updated VMRUN invocation for SEV-ES guests KVM: SVM: Provide support for SEV-ES vCPU loading KVM: SVM: Provide support for SEV-ES vCPU creation/loading KVM: SVM: Update ASID allocation to support SEV-ES guests KVM: SVM: Set the encryption mask for the SVM host save area KVM: SVM: Add NMI support for an SEV-ES guest KVM: SVM: Guest FPU state save/restore not needed for SEV-ES guest KVM: SVM: Do not report support for SMM for an SEV-ES guest KVM: x86: Update __get_sregs() / __set_sregs() to support SEV-ES KVM: SVM: Add support for CR8 write traps for an SEV-ES guest KVM: SVM: Add support for CR4 write traps for an SEV-ES guest KVM: SVM: Add support for CR0 write traps for an SEV-ES guest KVM: SVM: Add support for EFER write traps for an SEV-ES guest KVM: SVM: Support string IO operations for an SEV-ES guest KVM: SVM: Support MMIO for an SEV-ES guest KVM: SVM: Create trace events for VMGEXIT MSR protocol processing KVM: SVM: Create trace events for VMGEXIT processing ...
Diffstat (limited to 'tools')
-rw-r--r--tools/testing/selftests/kvm/.gitignore6
-rw-r--r--tools/testing/selftests/kvm/Makefile5
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c39
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c55
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c344
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h38
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h4
-rw-r--r--tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h13
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h17
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c17
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c158
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h4
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c82
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c22
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c32
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c16
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c87
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c92
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/user_msr_test.c248
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c770
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c15
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c1
36 files changed, 1643 insertions, 465 deletions
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 7a2c242b7152..ce8f4ad39684 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -14,17 +14,17 @@
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
-/x86_64/user_msr_test
-/x86_64/vmx_preemption_timer_test
/x86_64/svm_vmcall_test
/x86_64/sync_regs_test
+/x86_64/tsc_msrs_test
+/x86_64/userspace_msr_exit_test
/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
+/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
/x86_64/xss_msr_test
-/clear_dirty_log_test
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 3d14ef77755e..c7ca4faba272 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -36,7 +36,7 @@ endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
+LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
@@ -50,6 +50,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
@@ -58,7 +59,6 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
-TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -70,6 +70,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 33218a395d9f..486932164cf2 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -42,12 +42,16 @@
#define for_each_reg(i) \
for ((i) = 0; (i) < reg_list->n; ++(i))
+#define for_each_reg_filtered(i) \
+ for_each_reg(i) \
+ if (!filter_reg(reg_list->reg[i]))
+
#define for_each_missing_reg(i) \
for ((i) = 0; (i) < blessed_n; ++(i)) \
if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
#define for_each_new_reg(i) \
- for ((i) = 0; (i) < reg_list->n; ++(i)) \
+ for_each_reg_filtered(i) \
if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
@@ -57,6 +61,18 @@ static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
static __u64 *blessed_reg, blessed_n;
+static bool filter_reg(__u64 reg)
+{
+ /*
+ * DEMUX register presence depends on the host's CLIDR_EL1.
+ * This means there's no set of them that we can bless.
+ */
+ if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return true;
+
+ return false;
+}
+
static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
{
int i;
@@ -325,7 +341,7 @@ int main(int ac, char **av)
struct kvm_vcpu_init init = { .target = -1, };
int new_regs = 0, missing_regs = 0, i;
int failed_get = 0, failed_set = 0, failed_reject = 0;
- bool print_list = false, fixup_core_regs = false;
+ bool print_list = false, print_filtered = false, fixup_core_regs = false;
struct kvm_vm *vm;
__u64 *vec_regs;
@@ -336,8 +352,10 @@ int main(int ac, char **av)
fixup_core_regs = true;
else if (strcmp(av[i], "--list") == 0)
print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
else
- fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+ TEST_FAIL("Unknown option: %s\n", av[i]);
}
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
@@ -350,10 +368,14 @@ int main(int ac, char **av)
if (fixup_core_regs)
core_reg_fixup();
- if (print_list) {
+ if (print_list || print_filtered) {
putchar('\n');
- for_each_reg(i)
- print_reg(reg_list->reg[i]);
+ for_each_reg(i) {
+ __u64 id = reg_list->reg[i];
+ if ((print_list && !filter_reg(id)) ||
+ (print_filtered && filter_reg(id)))
+ print_reg(id);
+ }
putchar('\n');
return 0;
}
@@ -458,6 +480,8 @@ int main(int ac, char **av)
/*
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
+ *
+ * The blessed list is up to date with kernel version v5.10-rc5
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -736,9 +760,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
- KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
- KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
- KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
};
static __u64 base_regs_n = ARRAY_SIZE(base_regs);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 85c9b8f73142..9c6a7be31e03 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -27,6 +27,7 @@
#define TEST_HOST_LOOP_N 2UL
/* Host variables */
+static u64 dirty_log_manual_caps;
static bool host_quit;
static uint64_t iteration;
static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
@@ -88,10 +89,6 @@ static void *vcpu_worker(void *data)
return NULL;
}
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
-
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
uint64_t phys_offset, int wr_fract)
{
@@ -106,10 +103,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
struct timespec get_dirty_log_total = (struct timespec){0};
struct timespec vcpu_dirty_total = (struct timespec){0};
struct timespec avg;
-#ifdef USE_CLEAR_DIRTY_LOG
struct kvm_enable_cap cap = {};
struct timespec clear_dirty_log_total = (struct timespec){0};
-#endif
vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
@@ -120,11 +115,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
bmap = bitmap_alloc(host_num_pages);
-#ifdef USE_CLEAR_DIRTY_LOG
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = dirty_log_manual_caps;
- vm_enable_cap(vm, &cap);
-#endif
+ if (dirty_log_manual_caps) {
+ cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+ cap.args[0] = dirty_log_manual_caps;
+ vm_enable_cap(vm, &cap);
+ }
vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
@@ -190,17 +185,17 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
-#ifdef USE_CLEAR_DIRTY_LOG
- clock_gettime(CLOCK_MONOTONIC, &start);
- kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
- host_num_pages);
+ if (dirty_log_manual_caps) {
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+ host_num_pages);
- ts_diff = timespec_diff_now(start);
- clear_dirty_log_total = timespec_add(clear_dirty_log_total,
- ts_diff);
- pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
- iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
-#endif
+ ts_diff = timespec_diff_now(start);
+ clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+ ts_diff);
+ pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+ }
}
/* Tell the vcpu thread to quit */
@@ -220,12 +215,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
iterations, get_dirty_log_total.tv_sec,
get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-#ifdef USE_CLEAR_DIRTY_LOG
- avg = timespec_div(clear_dirty_log_total, iterations);
- pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
- iterations, clear_dirty_log_total.tv_sec,
- clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-#endif
+ if (dirty_log_manual_caps) {
+ avg = timespec_div(clear_dirty_log_total, iterations);
+ pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ iterations, clear_dirty_log_total.tv_sec,
+ clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+ }
free(bmap);
free(vcpu_threads);
@@ -284,16 +279,10 @@ int main(int argc, char *argv[])
int opt, i;
int wr_fract = 1;
-#ifdef USE_CLEAR_DIRTY_LOG
dirty_log_manual_caps =
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
- if (!dirty_log_manual_caps) {
- print_skip("KVM_CLEAR_DIRTY_LOG not available");
- exit(KSFT_SKIP);
- }
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
#ifdef __x86_64__
guest_mode_init(VM_MODE_PXXV48_4K, true, true);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 54da9cc20db4..471baecb7772 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -12,8 +12,13 @@
#include <unistd.h>
#include <time.h>
#include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
+#include <asm/barrier.h>
#include "test_util.h"
#include "kvm_util.h"
@@ -57,6 +62,10 @@
# define test_and_clear_bit_le test_and_clear_bit
#endif
+#define TEST_DIRTY_RING_COUNT 65536
+
+#define SIG_IPI SIGUSR1
+
/*
* Guest/Host shared variables. Ensure addr_gva2hva() and/or
* sync_global_to/from_guest() are used when accessing from
@@ -128,6 +137,31 @@ static uint64_t host_dirty_count;
static uint64_t host_clear_count;
static uint64_t host_track_next_count;
+/* Whether dirty ring reset is requested, or finished */
+static sem_t dirty_ring_vcpu_stop;
+static sem_t dirty_ring_vcpu_cont;
+/*
+ * This is updated by the vcpu thread to tell the host whether it's a
+ * ring-full event. It should only be read until a sem_wait() of
+ * dirty_ring_vcpu_stop and before vcpu continues to run.
+ */
+static bool dirty_ring_vcpu_ring_full;
+/*
+ * This is only used for verifying the dirty pages. Dirty ring has a very
+ * tricky case when the ring just got full, kvm will do userspace exit due to
+ * ring full. When that happens, the very last PFN is set but actually the
+ * data is not changed (the guest WRITE is not really applied yet), because
+ * we found that the dirty ring is full, refused to continue the vcpu, and
+ * recorded the dirty gfn with the old contents.
+ *
+ * For this specific case, it's safe to skip checking this pfn for this
+ * bit, because it's a redundant bit, and when the write happens later the bit
+ * will be set again. We use this variable to always keep track of the latest
+ * dirty gfn we've collected, so that if a mismatch of data found later in the
+ * verifying process, we let it pass.
+ */
+static uint64_t dirty_ring_last_page;
+
enum log_mode_t {
/* Only use KVM_GET_DIRTY_LOG for logging */
LOG_MODE_DIRTY_LOG = 0,
@@ -135,6 +169,9 @@ enum log_mode_t {
/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
LOG_MODE_CLEAR_LOG = 1,
+ /* Use dirty ring for logging */
+ LOG_MODE_DIRTY_RING = 2,
+
LOG_MODE_NUM,
/* Run all supported modes */
@@ -145,6 +182,26 @@ enum log_mode_t {
static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
/* Logging mode for current run */
static enum log_mode_t host_log_mode;
+static pthread_t vcpu_thread;
+static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
+
+static void vcpu_kick(void)
+{
+ pthread_kill(vcpu_thread, SIG_IPI);
+}
+
+/*
+ * In our test we do signal tricks, let's use a better version of
+ * sem_wait to avoid signal interrupts
+ */
+static void sem_wait_until(sem_t *sem)
+{
+ int ret;
+
+ do
+ ret = sem_wait(sem);
+ while (ret == -1 && errno == EINTR);
+}
static bool clear_log_supported(void)
{
@@ -178,6 +235,152 @@ static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
}
+static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
+ "vcpu run failed: errno=%d", err);
+
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+}
+
+static bool dirty_ring_supported(void)
+{
+ return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING);
+}
+
+static void dirty_ring_create_vm_done(struct kvm_vm *vm)
+{
+ /*
+ * Switch to dirty ring mode after VM creation but before any
+ * of the vcpu creation.
+ */
+ vm_enable_dirty_ring(vm, test_dirty_ring_count *
+ sizeof(struct kvm_dirty_gfn));
+}
+
+static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+ return gfn->flags == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+ gfn->flags = KVM_DIRTY_GFN_F_RESET;
+}
+
+static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+ int slot, void *bitmap,
+ uint32_t num_pages, uint32_t *fetch_index)
+{
+ struct kvm_dirty_gfn *cur;
+ uint32_t count = 0;
+
+ while (true) {
+ cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
+ if (!dirty_gfn_is_dirtied(cur))
+ break;
+ TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
+ "%u != %u", cur->slot, slot);
+ TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
+ "0x%llx >= 0x%x", cur->offset, num_pages);
+ //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
+ set_bit_le(cur->offset, bitmap);
+ dirty_ring_last_page = cur->offset;
+ dirty_gfn_set_collected(cur);
+ (*fetch_index)++;
+ count++;
+ }
+
+ return count;
+}
+
+static void dirty_ring_wait_vcpu(void)
+{
+ /* This makes sure that hardware PML cache flushed */
+ vcpu_kick();
+ sem_wait_until(&dirty_ring_vcpu_stop);
+}
+
+static void dirty_ring_continue_vcpu(void)
+{
+ pr_info("Notifying vcpu to continue\n");
+ sem_post(&dirty_ring_vcpu_cont);
+}
+
+static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot,
+ void *bitmap, uint32_t num_pages)
+{
+ /* We only have one vcpu */
+ static uint32_t fetch_index = 0;
+ uint32_t count = 0, cleared;
+ bool continued_vcpu = false;
+
+ dirty_ring_wait_vcpu();
+
+ if (!dirty_ring_vcpu_ring_full) {
+ /*
+ * This is not a ring-full event, it's safe to allow
+ * vcpu to continue
+ */
+ dirty_ring_continue_vcpu();
+ continued_vcpu = true;
+ }
+
+ /* Only have one vcpu */
+ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID),
+ slot, bitmap, num_pages, &fetch_index);
+
+ cleared = kvm_vm_reset_dirty_ring(vm);
+
+ /* Cleared pages should be the same as collected */
+ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
+ "with collected (%u)", cleared, count);
+
+ if (!continued_vcpu) {
+ TEST_ASSERT(dirty_ring_vcpu_ring_full,
+ "Didn't continue vcpu even without ring full");
+ dirty_ring_continue_vcpu();
+ }
+
+ pr_info("Iteration %ld collected %u pages\n", iteration, count);
+}
+
+static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ /* A ucall-sync or ring-full event is allowed */
+ if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
+ /* We should allow this to continue */
+ ;
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
+ (ret == -1 && err == EINTR)) {
+ /* Update the flag first before pause */
+ WRITE_ONCE(dirty_ring_vcpu_ring_full,
+ run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
+ sem_post(&dirty_ring_vcpu_stop);
+ pr_info("vcpu stops because %s...\n",
+ dirty_ring_vcpu_ring_full ?
+ "dirty ring is full" : "vcpu is kicked out");
+ sem_wait_until(&dirty_ring_vcpu_cont);
+ pr_info("vcpu continues now.\n");
+ } else {
+ TEST_ASSERT(false, "Invalid guest sync status: "
+ "exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+}
+
+static void dirty_ring_before_vcpu_join(void)
+{
+ /* Kick another round of vcpu just to make sure it will quit */
+ sem_post(&dirty_ring_vcpu_cont);
+}
+
struct log_mode {
const char *name;
/* Return true if this mode is supported, otherwise false */
@@ -187,16 +390,29 @@ struct log_mode {
/* Hook to collect the dirty pages into the bitmap provided */
void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
void *bitmap, uint32_t num_pages);
+ /* Hook to call when after each vcpu run */
+ void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err);
+ void (*before_vcpu_join) (void);
} log_modes[LOG_MODE_NUM] = {
{
.name = "dirty-log",
.collect_dirty_pages = dirty_log_collect_dirty_pages,
+ .after_vcpu_run = default_after_vcpu_run,
},
{
.name = "clear-log",
.supported = clear_log_supported,
.create_vm_done = clear_log_create_vm_done,
.collect_dirty_pages = clear_log_collect_dirty_pages,
+ .after_vcpu_run = default_after_vcpu_run,
+ },
+ {
+ .name = "dirty-ring",
+ .supported = dirty_ring_supported,
+ .create_vm_done = dirty_ring_create_vm_done,
+ .collect_dirty_pages = dirty_ring_collect_dirty_pages,
+ .before_vcpu_join = dirty_ring_before_vcpu_join,
+ .after_vcpu_run = dirty_ring_after_vcpu_run,
},
};
@@ -247,6 +463,22 @@ static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
}
+static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->after_vcpu_run)
+ mode->after_vcpu_run(vm, ret, err);
+}
+
+static void log_mode_before_vcpu_join(void)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->before_vcpu_join)
+ mode->before_vcpu_join();
+}
+
static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
@@ -257,29 +489,44 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
static void *vcpu_worker(void *data)
{
- int ret;
+ int ret, vcpu_fd;
struct kvm_vm *vm = data;
uint64_t *guest_array;
uint64_t pages_count = 0;
- struct kvm_run *run;
+ struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+ + sizeof(sigset_t));
+ sigset_t *sigset = (sigset_t *) &sigmask->sigset;
+
+ vcpu_fd = vcpu_get_fd(vm, VCPU_ID);
+
+ /*
+ * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
+ * ioctl to return with -EINTR, but it is still pending and we need
+ * to accept it with the sigwait.
+ */
+ sigmask->len = 8;
+ pthread_sigmask(0, NULL, sigset);
+ vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask);
+ sigaddset(sigset, SIG_IPI);
+ pthread_sigmask(SIG_BLOCK, sigset, NULL);
- run = vcpu_state(vm, VCPU_ID);
+ sigemptyset(sigset);
+ sigaddset(sigset, SIG_IPI);
guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
while (!READ_ONCE(host_quit)) {
+ /* Clear any existing kick signals */
+ generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+ pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
- pages_count += TEST_PAGES_PER_LOOP;
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
- } else {
- TEST_FAIL("Invalid guest sync status: "
- "exit_reason=%s\n",
- exit_reason_str(run->exit_reason));
+ ret = ioctl(vcpu_fd, KVM_RUN, NULL);
+ if (ret == -1 && errno == EINTR) {
+ int sig = -1;
+ sigwait(sigset, &sig);
+ assert(sig == SIG_IPI);
}
+ log_mode_after_vcpu_run(vm, ret, errno);
}
pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -292,6 +539,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
uint64_t step = vm_num_host_pages(mode, 1);
uint64_t page;
uint64_t *value_ptr;
+ uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
value_ptr = host_test_mem + page * host_page_size;
@@ -306,14 +554,64 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
}
if (test_and_clear_bit_le(page, bmap)) {
+ bool matched;
+
host_dirty_count++;
+
/*
* If the bit is set, the value written onto
* the corresponding page should be either the
* previous iteration number or the current one.
*/
- TEST_ASSERT(*value_ptr == iteration ||
- *value_ptr == iteration - 1,
+ matched = (*value_ptr == iteration ||
+ *value_ptr == iteration - 1);
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
+ if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
+ /*
+ * Short answer: this case is special
+ * only for dirty ring test where the
+ * page is the last page before a kvm
+ * dirty ring full in iteration N-2.
+ *
+ * Long answer: Assuming ring size R,
+ * one possible condition is:
+ *
+ * main thr vcpu thr
+ * -------- --------
+ * iter=1
+ * write 1 to page 0~(R-1)
+ * full, vmexit
+ * collect 0~(R-1)
+ * kick vcpu
+ * write 1 to (R-1)~(2R-2)
+ * full, vmexit
+ * iter=2
+ * collect (R-1)~(2R-2)
+ * kick vcpu
+ * write 1 to (2R-2)
+ * (NOTE!!! "1" cached in cpu reg)
+ * write 2 to (2R-1)~(3R-3)
+ * full, vmexit
+ * iter=3
+ * collect (2R-2)~(3R-3)
+ * (here if we read value on page
+ * "2R-2" is 1, while iter=3!!!)
+ *
+ * This however can only happen once per iteration.
+ */
+ min_iter = iteration - 1;
+ continue;
+ } else if (page == dirty_ring_last_page) {
+ /*
+ * Please refer to comments in
+ * dirty_ring_last_page.
+ */
+ continue;
+