summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:42:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:42:49 -0700
commit519f526d391b0ef775aeb04c4b6f632ea6b3ee50 (patch)
tree36985d7882734c136fc3c9a48e9d9abf9e97c1f1 /arch/powerpc/kvm
parent06ab838c2024db468855118087db16d8fa905ddc (diff)
parentba60c41ae392b473a1897faa0b8739fcb8759d69 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm updates from Paolo Bonzini: "ARM: - Full debug support for arm64 - Active state switching for timer interrupts - Lazy FP/SIMD save/restore for arm64 - Generic ARMv8 target PPC: - Book3S: A few bug fixes - Book3S: Allow micro-threading on POWER8 x86: - Compiler warnings Generic: - Adaptive polling for guest halt" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits) kvm: irqchip: fix memory leak kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF KVM: trace kvm_halt_poll_ns grow/shrink KVM: dynamic halt-polling KVM: make halt_poll_ns per-vCPU Silence compiler warning in arch/x86/kvm/emulate.c kvm: compile process_smi_save_seg_64() only for x86_64 KVM: x86: avoid uninitialized variable warning KVM: PPC: Book3S: Fix typo in top comment about locking KVM: PPC: Book3S: Fix size of the PSPB register KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set KVM: PPC: Book3S HV: Fix race in starting secondary threads KVM: PPC: Book3S: correct width in XER handling KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation KVM: PPC: Book3S HV: Fix preempted vcore list locking KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD KVM: PPC: Book3S HV: Fix bug in dirty page tracking KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8 KVM: PPC: Book3S HV: Make use of unused threads when running guests ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/Kconfig8
-rw-r--r--arch/powerpc/kvm/book3s.c3
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c664
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c32
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c161
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S137
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c2
-rw-r--r--arch/powerpc/kvm/book3s_segment.S4
-rw-r--r--arch/powerpc/kvm/book3s_xics.c2
-rw-r--r--arch/powerpc/kvm/booke.c1
-rw-r--r--arch/powerpc/kvm/e500_mmu.c2
-rw-r--r--arch/powerpc/kvm/powerpc.c2
17 files changed, 899 insertions, 134 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 3caec2c42105..c2024ac9d4e8 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -74,14 +74,14 @@ config KVM_BOOK3S_64
If unsure, say N.
config KVM_BOOK3S_64_HV
- tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
+ tristate "KVM for POWER7 and later using hypervisor mode in host"
depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
select MMU_NOTIFIER
select CMA
---help---
Support running unmodified book3s_64 guest kernels in
- virtual machines on POWER7 and PPC970 processors that have
+ virtual machines on POWER7 and newer processors that have
hypervisor mode available to the host.
If you say Y here, KVM will use the hardware virtualization
@@ -89,8 +89,8 @@ config KVM_BOOK3S_64_HV
guest operating systems will run at full hardware speed
using supervisor and user modes. However, this also means
that KVM is not usable under PowerVM (pHyp), is only usable
- on POWER7 (or later) processors and PPC970-family processors,
- and cannot emulate a different processor from the host processor.
+ on POWER7 or later processors, and cannot emulate a
+ different processor from the host processor.
If unsure, say N.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 6d6398f4d632..d75bf325f54a 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -240,7 +240,8 @@ void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
}
-int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
+ unsigned int priority)
{
int deliver = 1;
int vec = 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 2035d16a9262..d5c9bfeb0c9c 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -26,6 +26,7 @@
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
+#include "book3s.h"
/* #define DEBUG_MMU */
/* #define DEBUG_SR */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b982d925c710..79ad35abd196 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,6 +28,7 @@
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
#include "trace_pr.h"
+#include "book3s.h"
#define PTE_SIZE 12
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index dab68b7af3f2..1f9c0a17f445 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -761,6 +761,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if (rcbits & HPTE_R_C)
+ kvmppc_update_rmap_change(rmapp, psize);
if (rcbits & ~rev[i].guest_rpte) {
rev[i].guest_rpte = ptel | rcbits;
note_hpte_modification(kvm, &rev[i]);
@@ -927,8 +929,12 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
retry:
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_CHANGED) {
- *rmapp &= ~KVMPPC_RMAP_CHANGED;
+ long change_order = (*rmapp & KVMPPC_RMAP_CHG_ORDER)
+ >> KVMPPC_RMAP_CHG_SHIFT;
+ *rmapp &= ~(KVMPPC_RMAP_CHANGED | KVMPPC_RMAP_CHG_ORDER);
npages_dirty = 1;
+ if (change_order > PAGE_SHIFT)
+ npages_dirty = 1ul << (change_order - PAGE_SHIFT);
}
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5a2bc4b0dfe5..2afdb9c0937d 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
#include <asm/reg.h>
#include <asm/switch_to.h>
#include <asm/time.h>
+#include "book3s.h"
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a9f753fb73a8..9754e6815e52 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -81,6 +81,12 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
#define MPP_BUFFER_ORDER 3
#endif
+static int dynamic_mt_modes = 6;
+module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
+static int target_smt_mode;
+module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -114,7 +120,7 @@ static bool kvmppc_ipi_thread(int cpu)
static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
{
- int cpu = vcpu->cpu;
+ int cpu;
wait_queue_head_t *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
@@ -123,10 +129,11 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
++vcpu->stat.halt_wakeup;
}
- if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
+ if (kvmppc_ipi_thread(vcpu->arch.thread_cpu))
return;
/* CPU points to the first thread of the core */
+ cpu = vcpu->cpu;
if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
smp_send_reschedule(cpu);
}
@@ -164,6 +171,27 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* they should never fail.)
*/
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ vc->preempt_tb = mftb();
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ if (vc->preempt_tb != TB_NIL) {
+ vc->stolen_tb += mftb() - vc->preempt_tb;
+ vc->preempt_tb = TB_NIL;
+ }
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -175,14 +203,9 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
* vcpu, and once it is set to this vcpu, only this task
* ever sets it to NULL.
*/
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
- spin_lock_irqsave(&vc->stoltb_lock, flags);
- if (vc->preempt_tb != TB_NIL) {
- vc->stolen_tb += mftb() - vc->preempt_tb;
- vc->preempt_tb = TB_NIL;
- }
- spin_unlock_irqrestore(&vc->stoltb_lock, flags);
- }
+ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+ kvmppc_core_end_stolen(vc);
+
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
@@ -197,11 +220,9 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
- spin_lock_irqsave(&vc->stoltb_lock, flags);
- vc->preempt_tb = mftb();
- spin_unlock_irqrestore(&vc->stoltb_lock, flags);
- }
+ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+ kvmppc_core_start_stolen(vc);
+
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
vcpu->arch.busy_preempt = mftb();
@@ -214,12 +235,12 @@ static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
kvmppc_end_cede(vcpu);
}
-void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
+static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
vcpu->arch.pvr = pvr;
}
-int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
unsigned long pcr = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
@@ -259,7 +280,7 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
return 0;
}
-void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
{
int r;
@@ -292,7 +313,7 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
vcpu->arch.last_inst);
}
-struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
{
int r;
struct kvm_vcpu *v, *ret = NULL;
@@ -641,7 +662,8 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
spin_lock(&vcore->lock);
if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
- vcore->vcore_state != VCORE_INACTIVE)
+ vcore->vcore_state != VCORE_INACTIVE &&
+ vcore->runner)
target = vcore->runner;
spin_unlock(&vcore->lock);
@@ -1431,6 +1453,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
vcore->lpcr = kvm->arch.lpcr;
vcore->first_vcpuid = core * threads_per_subcore;
vcore->kvm = kvm;
+ INIT_LIST_HEAD(&vcore->preempt_list);
vcore->mpp_buffer_is_valid = false;
@@ -1655,6 +1678,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
+ vcpu->arch.thread_cpu = -1;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
@@ -1749,6 +1773,7 @@ static int kvmppc_grab_hwthread(int cpu)
/* Ensure the thread won't go into the kernel if it wakes */
tpaca->kvm_hstate.kvm_vcpu = NULL;
+ tpaca->kvm_hstate.kvm_vcore = NULL;
tpaca->kvm_hstate.napping = 0;
smp_wmb();
tpaca->kvm_hstate.hwthread_req = 1;
@@ -1780,26 +1805,32 @@ static void kvmppc_release_hwthread(int cpu)
tpaca = &paca[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
+ tpaca->kvm_hstate.kvm_vcore = NULL;
+ tpaca->kvm_hstate.kvm_split_mode = NULL;
}
-static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmppc_vcore *mvc = vc->master_vcore;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
+ cpu = vc->pcpu;
+ if (vcpu) {
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+ cpu += vcpu->arch.ptid;
+ vcpu->cpu = mvc->pcpu;
+ vcpu->arch.thread_cpu = cpu;
}
- cpu = vc->pcpu + vcpu->arch.ptid;
tpaca = &paca[cpu];
- tpaca->kvm_hstate.kvm_vcore = vc;
- tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
- vcpu->cpu = vc->pcpu;
- /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
- smp_wmb();
tpaca->kvm_hstate.kvm_vcpu = vcpu;
+ tpaca->kvm_hstate.ptid = cpu - mvc->pcpu;
+ /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
+ smp_wmb();
+ tpaca->kvm_hstate.kvm_vcore = mvc;
if (cpu != smp_processor_id())
kvmppc_ipi_thread(cpu);
}
@@ -1812,12 +1843,12 @@ static void kvmppc_wait_for_nap(void)
for (loops = 0; loops < 1000000; ++loops) {
/*
* Check if all threads are finished.
- * We set the vcpu pointer when starting a thread
+ * We set the vcore pointer when starting a thread
* and the thread clears it when finished, so we look
- * for any threads that still have a non-NULL vcpu ptr.
+ * for any threads that still have a non-NULL vcore ptr.
*/
for (i = 1; i < threads_per_subcore; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+ if (paca[cpu + i].kvm_hstate.kvm_vcore)
break;
if (i == threads_per_subcore) {
HMT_medium();
@@ -1827,7 +1858,7 @@ static void kvmppc_wait_for_nap(void)
}
HMT_medium();
for (i = 1; i < threads_per_subcore; ++i)
- if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+ if (paca[cpu + i].kvm_hstate.kvm_vcore)
pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
@@ -1890,6 +1921,278 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
}
+/*
+ * A list of virtual cores for each physical CPU.
+ * These are vcores that could run but their runner VCPU tasks are
+ * (or may be) preempted.
+ */
+struct preempted_vcore_list {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
+
+static void init_vcore_lists(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
+ spin_lock_init(&lp->lock);
+ INIT_LIST_HEAD(&lp->list);
+ }
+}
+
+static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
+{
+ struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+
+ vc->vcore_state = VCORE_PREEMPT;
+ vc->pcpu = smp_processor_id();
+ if (vc->num_threads < threads_per_subcore) {
+ spin_lock(&lp->lock);
+ list_add_tail(&vc->preempt_list, &lp->list);
+ spin_unlock(&lp->lock);
+ }
+
+ /* Start accumulating stolen time */
+ kvmppc_core_start_stolen(vc);
+}
+
+static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
+{
+ struct preempted_vcore_list *lp;
+
+ kvmppc_core_end_stolen(vc);
+ if (!list_empty(&vc->preempt_list)) {
+ lp = &per_cpu(preempted_vcores, vc->pcpu);
+ spin_lock(&lp->lock);
+ list_del_init(&vc->preempt_list);
+ spin_unlock(&lp->lock);
+ }
+ vc->vcore_state = VCORE_INACTIVE;
+}
+
+/*
+ * This stores information about the virtual cores currently
+ * assigned to a physical core.
+ */
+struct core_info {
+ int n_subcores;
+ int max_subcore_threads;
+ int total_threads;
+ int subcore_threads[MAX_SUBCORES];
+ struct kvm *subcore_vm[MAX_SUBCORES];
+ struct list_head vcs[MAX_SUBCORES];
+};
+
+/*
+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
+ * respectively in 2-way micro-threading (split-core) mode.
+ */
+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
+
+static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
+{
+ int sub;
+
+ memset(cip, 0, sizeof(*cip));
+ cip->n_subcores = 1;
+ cip->max_subcore_threads = vc->num_threads;
+ cip->total_threads = vc->num_threads;
+ cip->subcore_threads[0] = vc->num_threads;
+ cip->subcore_vm[0] = vc->kvm;
+ for (sub = 0; sub < MAX_SUBCORES; ++sub)
+ INIT_LIST_HEAD(&cip->vcs[sub]);
+ list_add_tail(&vc->preempt_list, &cip->vcs[0]);
+}
+
+static bool subcore_config_ok(int n_subcores, int n_threads)
+{
+ /* Can only dynamically split if unsplit to begin with */
+ if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
+ return false;
+ if (n_subcores > MAX_SUBCORES)
+ return false;
+ if (n_subcores > 1) {
+ if (!(dynamic_mt_modes & 2))
+ n_subcores = 4;
+ if (n_subcores > 2 && !(dynamic_mt_modes & 4))
+ return false;
+ }
+
+ return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
+}
+
+static void init_master_vcore(struct kvmppc_vcore *vc)
+{
+ vc->master_vcore = vc;
+ vc->entry_exit_map = 0;
+ vc->in_guest = 0;
+ vc->napping_threads = 0;
+ vc->conferring_threads = 0;
+}
+
+/*
+ * See if the existing subcores can be split into 3 (or fewer) subcores
+ * of at most two threads each, so we can fit in another vcore. This
+ * assumes there are at most two subcores and at most 6 threads in total.
+ */
+static bool can_split_piggybacked_subcores(struct core_info *cip)
+{
+ int sub, new_sub;
+ int large_sub = -1;
+ int thr;
+ int n_subcores = cip->n_subcores;
+ struct kvmppc_vcore *vc, *vcnext;
+ struct kvmppc_vcore *master_vc = NULL;
+
+ for (sub = 0; sub < cip->n_subcores; ++sub) {
+ if (cip->subcore_threads[sub] <= 2)
+ continue;
+ if (large_sub >= 0)
+ return false;
+ large_sub = sub;
+ vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+ preempt_list);
+ if (vc->num_threads > 2)
+ return false;
+ n_subcores += (cip->subcore_threads[sub] - 1) >> 1;
+ }
+ if (n_subcores > 3 || large_sub < 0)
+ return false;
+
+ /*
+ * Seems feasible, so go through and move vcores to new subcores.
+ * Note that when we have two or more vcores in one subcore,
+ * all those vcores must have only one thread each.
+ */
+ new_sub = cip->n_subcores;
+ thr = 0;
+ sub = large_sub;
+ list_for_each_entry_safe(vc, vcnext, &cip->vcs[sub], preempt_list) {
+ if (thr >= 2) {
+ list_del(&vc->preempt_list);
+ list_add_tail(&vc->preempt_list, &cip->vcs[new_sub]);
+ /* vc->num_threads must be 1 */
+ if (++cip->subcore_threads[new_sub] == 1) {
+ cip->subcore_vm[new_sub] = vc->kvm;
+ init_master_vcore(vc);
+ master_vc = vc;
+ ++cip->n_subcores;
+ } else {
+ vc->master_vcore = master_vc;
+ ++new_sub;
+ }
+ }
+ thr += vc->num_threads;
+ }
+ cip->subcore_threads[large_sub] = 2;
+ cip->max_subcore_threads = 2;
+
+ return true;
+}
+
+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+{
+ int n_threads = vc->num_threads;
+ int sub;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false;
+
+ if (n_threads < cip->max_subcore_threads)
+ n_threads = cip->max_subcore_threads;
+ if (subcore_config_ok(cip->n_subcores + 1, n_threads)) {
+ cip->max_subcore_threads = n_threads;
+ } else if (cip->n_subcores <= 2 && cip->total_threads <= 6 &&
+ vc->num_threads <= 2) {
+ /*
+ * We may be able to fit another subcore in by
+ * splitting an existing subcore with 3 or 4
+ * threads into two 2-thread subcores, or one
+ * with 5 or 6 threads into three subcores.
+ * We can only do this if those subcores have
+ * piggybacked virtual cores.
+ */
+ if (!can_split_piggybacked_subcores(cip))
+ return false;
+ } else {
+ return false;
+ }
+
+ sub = cip->n_subcores;
+ ++cip->n_subcores;
+ cip->total_threads += vc->num_threads;
+ cip->subcore_threads[sub] = vc->num_threads;
+ cip->subcore_vm[sub] = vc->kvm;
+ init_master_vcore(vc);
+ list_del(&vc->preempt_list);
+ list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
+
+ return true;
+}
+
+static bool can_piggyback_subcore(struct kvmppc_vcore *pvc,
+ struct core_info *cip, int sub)
+{
+ struct kvmppc_vcore *vc;
+ int n_thr;
+
+ vc = list_first_entry(&cip->vcs[sub], struct kvmppc_vcore,
+ preempt_list);
+
+ /* require same VM and same per-core reg values */
+ if (pvc->kvm != vc->kvm ||
+ pvc->tb_offset != vc->tb_offset ||
+ pvc->pcr != vc->pcr ||
+ pvc->lpcr != vc->lpcr)
+ return false;
+
+ /* P8 guest with > 1 thread per core would see wrong TIR value */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ (vc->num_threads > 1 || pvc->num_threads > 1))
+ return false;
+
+ n_thr = cip->subcore_threads[sub] + pvc->num_threads;
+ if (n_thr > cip->max_subcore_threads) {
+ if (!subcore_config_ok(cip->n_subcores, n_thr))
+ return false;
+ cip->max_subcore_threads = n_thr;
+ }
+
+ cip->total_threads += pvc->num_threads;
+ cip->subcore_threads[sub] = n_thr;
+ pvc->master_vcore = vc;
+ list_del(&pvc->preempt_list);
+ list_add_tail(&pvc->preempt_list, &cip->vcs[sub]);
+
+ return true;
+}
+
+/*
+ * Work out whether it is possible to piggyback the execution of
+ * vcore *pvc onto the execution of the other vcores described in *cip.
+ */
+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
+ int target_threads)
+{
+ int sub;
+
+ if (cip->total_threads + pvc->num_threads > target_threads)
+ return false;
+ for (sub = 0; sub < cip->n_subcores; ++sub)
+ if (cip->subcore_threads[sub] &&
+ can_piggyback_subcore(pvc, cip, sub))
+ return true;
+
+ if (can_dynamic_split(pvc, cip))
+ return true;
+
+ return false;
+}
+
static void prepare_threads(struct kvmppc_vcore *vc)
{
struct kvm_vcpu *vcpu, *vnext;
@@ -1909,12 +2212,45 @@ static void prepare_threads(struct kvmppc_vcore *vc)
}
}
-static void post_guest_process(struct kvmppc_vcore *vc)
+static void collect_piggybacks(struct core_info *cip, int target_threads)
+{
+ struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+ struct kvmppc_vcore *pvc, *vcnext;
+
+ spin_lock(&lp->lock);
+ list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
+ if (!spin_trylock(&pvc->lock))
+ continue;
+ prepare_threads(pvc);
+ if (!pvc->n_runnable) {
+ list_del_init(&pvc->preempt_list);
+ if (pvc->runner == NULL) {
+ pvc->vcore_state = VCORE_INACTIVE;
+ kvmppc_core_end_stolen(pvc);
+ }
+ spin_unlock(&pvc->lock);
+ continue;
+ }
+ if (!can_piggyback(pvc, cip, target_threads)) {
+ spin_unlock(&pvc->lock);
+ continue;
+ }
+ kvmppc_core_end_stolen(pvc);
+ pvc->vcore_state = VCORE_PIGGYBACK;
+ if (cip->total_threads >= target_threads)
+ break;
+ }
+ spin_unlock(&lp->lock);
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
{
+ int still_running = 0;
u64 now;
long ret;
struct kvm_vcpu *vcpu, *vnext;
+ spin_lock(&vc->lock);
now = get_tb();
list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
@@ -1933,17 +2269,36 @@ static void post_guest_process(struct kvmppc_vcore *vc)
vcpu->arch.ret = ret;
vcpu->arch.trap = 0;
- if (vcpu->arch.ceded) {
- if (!is_kvmppc_resume_guest(ret))
- kvmppc_end_cede(vcpu);
- else
+ if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
+ if (vcpu->arch.pending_exceptions)
+ kvmppc_core_prepare_to_enter(vcpu);
+ if (vcpu->arch.ceded)
kvmppc_set_timer(vcpu);
- }
- if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+ else
+ ++still_running;
+ } else {
kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run);
}
}
+ list_del_init(&vc->preempt_list);
+ if (!is_master) {
+ if (still_running > 0) {
+ kvmppc_vcore_preempt(vc);
+ } else if (vc->runner) {
+ vc->vcore_state = VCORE_PREEMPT;
+ kvmppc_core_start_stolen(vc);
+ } else {
+ vc->vcore_state = VCORE_INACTIVE;
+ }
+ if (vc->n_runnable > 0 && vc->runner == NULL) {
+ /* make sure there's a candidate runner awake */
+ vcpu = list_first_entry(&vc->runnable_threads,
+ struct kvm_vcpu, arch.run_list);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+ spin_unlock(&vc->lock);
}
/*
@@ -1955,6 +2310,15 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
struct kvm_vcpu *vcpu, *vnext;
int i;
int srcu_idx;
+ struct core_info core_info;
+ struct kvmppc_vcore *pvc, *vcnext;
+ struct kvm_split_mode split_info, *sip;
+ int split, subcore_size, active;
+ int sub;
+ bool thr0_done;
+ unsigned long cmd_bit, stat_bit;
+ int pcpu, thr;
+ int target_threads;
/*
* Remove from the list any threads that have a signal pending
@@ -1969,11 +2333,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
/*
* Initialize *vc.
*/
- vc->entry_exit_map = 0;
+ init_master_vcore(vc);
vc->preempt_tb = TB_NIL;
- vc->in_guest = 0;
- vc->napping_threads = 0;
- vc->conferring_threads = 0;
/*
* Make sure we are running on primary threads, and that secondary
@@ -1991,24 +2352,120 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
goto out;
}
+ /*
+ * See if we could run any other vcores on the physical core
+ * along with this one.
+ */
+ init_core_info(&core_info, vc);
+ pcpu = smp_processor_id();
+ target_threads = threads_per_subcore;
+ if (target_smt_mode && target_smt_mode < target_threads)
+ target_threads = target_smt_mode;
+ if (vc->num_threads < target_threads)
+ collect_piggybacks(&core_info, target_threads);
+
+ /* Decide on micro-threading (split-core) mode */
+ subcore_size = threads_per_subcore;
+ cmd_bit = stat_bit = 0;
+ split = core_info.n_subcores;
+ sip = NULL;
+ if (split > 1) {
+ /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
+ if (split == 2 && (dynamic_mt_modes & 2)) {
+ cmd_bit = HID0_POWER8_1TO2LPAR;
+ stat_bit = HID0_POWER8_2LPARMODE;
+ } else {
+ split = 4;
+ cmd_bit = HID0_POWER8_1TO4LPAR;
+ stat_bit = HID0_POWER8_4LPARMODE;
+ }
+ subcore_size = MAX_SMT_THREADS / split;
+ sip = &split_info;
+ memset(&split_info, 0, sizeof(split_info));
+ split_info.rpr = mfspr(SPRN_RPR);
+ split_info.pmmar = mfspr(SPRN_PMMAR);
+ split_info.ldbar = mfspr(SPRN_LDBAR);
+ split_info.subcore_size = subcore_size;
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ split_info.master_vcs[sub] =
+ list_first_entry(&core_info.vcs[sub],
+ struct kvmppc_vcore, preempt_list);
+ /* order writes to split_info before kvm_split_mode pointer */
+ smp_wmb();
+ }
+ pcpu = smp_processor_id();
+ for (thr = 0; thr < threads_per_subcore; ++thr)
+ paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
+
+ /* Initiate micro-threading (split-core) if required */
+ if (cmd_bit) {
+ unsigned long hid0 = mfspr(SPRN_HID0);
+
+ hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
+ mb();
+ mtspr(SPRN_HID0, hid0);
+ isync();
+ for (;;) {
+ hid0 = mfspr(SPRN_HID0);
+ if (hid0 & stat_bit)
+ break;
+ cpu_relax();
+ }
+ }
- vc->pcpu = smp_processor_id();
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- kvmppc_start_thread(vcpu);
- kvmppc_create_dtl_entry(vcpu, vc);
- trace_kvm_guest_enter(vcpu);
+ /* Start all the threads */
+ active = 0;
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ thr = subcore_thread_map[sub];
+ thr0_done = false;
+ active |= 1 << thr;
+ list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list) {
+ pvc->pcpu = pcpu + thr;
+ list_for_each_entry(vcpu, &pvc->runnable_threads,
+ arch.run_list) {
+ kvmppc_start_thread(vcpu, pvc);
+ kvmppc_create_dtl_entry(vcpu, pvc);
+ trace_kvm_guest_enter(vcpu);
+ if (!vcpu->arch.ptid)
+ thr0_done = true;
+ active |= 1 << (thr + vcpu->arch.ptid);
+ }
+ /*
+ * We need to start the first thread of each subcore
+ * even if it doesn't have a vcpu.
+ */
+ if (pvc->master_vcore == pvc && !thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+ thr += pvc->num_threads;
+ }
}
- /* Set this explicitly in case thread 0 doesn't have a vcpu */
- get_paca()->kvm_hstate.kvm_vcore = vc;
- get_paca()->kvm_hstate.ptid = 0;
+ /*
+ * Ensure that split_info.do_nap is set after setting
+ * the vcore pointer in the PACA of the secondaries.
+ */
+ smp_mb();
+ if (cmd_bit)
+ split_info.do_nap = 1; /* ask secondaries to nap when done */
+
+ /*
+ * When doing micro-threading, poke the inactive threads as well.
+ * This gets them to the nap instruction after kvm_do_nap,
+ * which reduces the time taken to unsplit later.
+ */
+ if (split > 1)
+ for (thr = 1; thr < threads_per_subcore; ++thr)
+ if (!(active & (1 << thr)))
+ kvmppc_ipi_thread(pcpu + thr);
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
trace_kvmppc_run_core(vc, 0);
- spin_unlock(&vc->lock);
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ list_for_each_entry(pvc, &core_info.vcs[sub], preempt_list)
+ spin_unlock(&pvc->lock);
kvm_guest_enter();
@@ -2019,32 +2476,58 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
__kvmppc_vcore_entry();
- spin_lock(&vc->lock);
-
if (vc->mpp_buffer)
kvmppc_start_saving_l2_cache(vc);
- /* disable sending of IPIs on virtual external irqs */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
- vcpu->cpu = -1;
- /* wait for secondary threads to finish writing their state to memory */
- kvmppc_wait_for_nap();
- for (i = 0; i < threads_per_subcore; ++i)
- kvmppc_release_hwthread(vc->pcpu + i);
+ srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+
+ spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
- spin_unlock(&vc->lock);
- srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+ /* wait for secondary threads to finish writing their state to memory */
+ kvmppc_wait_for_nap();
+
+ /* Return to whole-core mode if we split the core earlier */
+ if (split > 1) {
+ unsigned long hid0 = mfspr(SPRN_HID0);
+ unsigned lo