]> git.hungrycats.org Git - linux/commitdiff
kvm/x86: Avoid async PF preempting the kernel incorrectly
authorBoqun Feng <boqun.feng@gmail.com>
Tue, 3 Oct 2017 13:36:51 +0000 (21:36 +0800)
committerBen Hutchings <ben@decadent.org.uk>
Sun, 26 Nov 2017 13:50:41 +0000 (13:50 +0000)
commit a2b7861bb33b2538420bb5d8554153484d3f961f upstream.

Currently, in PREEMPT_COUNT=n kernel, kvm_async_pf_task_wait() could call
schedule() to reschedule in some cases.  This could result in
accidentally ending the current RCU read-side critical section early,
causing random memory corruption in the guest, or otherwise preempting
the currently running task inside between preempt_disable and
preempt_enable.

The difficulty to handle this well is because we don't know whether an
async PF delivered in a preemptible section or RCU read-side critical section
for PREEMPT_COUNT=n, since preempt_disable()/enable() and rcu_read_lock/unlock()
are both no-ops in that case.

To cure this, we treat any async PF interrupting a kernel context as one
that cannot be preempted, preventing kvm_async_pf_task_wait() from choosing
the schedule() path in that case.

To do so, a second parameter for kvm_async_pf_task_wait() is introduced,
so that we know whether it's called from a context interrupting the
kernel, and the parameter is set properly in all the callsites.

Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
[bwh: Backported to 3.16:
 - Use user_mode_vm() as equivalent to upstream user_mode()
 - Adjust filename, context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
arch/x86/include/asm/kvm_para.h
arch/x86/kernel/kvm.c
arch/x86/kvm/svm.c

index e62cf897f7819bc9795ab096a15689b3543e07b2..9b06bd31db6a1a5c2d65644b059a7e5403692d0f 100644 (file)
@@ -95,7 +95,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
 void __init kvm_guest_init(void);
-void kvm_async_pf_task_wait(u32 token);
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
 extern void kvm_disable_steal_time(void);
@@ -110,7 +110,7 @@ static inline void kvm_spinlock_init(void)
 
 #else /* CONFIG_KVM_GUEST */
 #define kvm_guest_init() do {} while (0)
-#define kvm_async_pf_task_wait(T) do {} while(0)
+#define kvm_async_pf_task_wait(T, I) do {} while(0)
 #define kvm_async_pf_task_wake(T) do {} while(0)
 
 static inline bool kvm_para_available(void)
index 3ec44283b71d265338e7ebf7b036fb2165fbdbe7..26e265e3e8d770531ff7d4e5d57113c64d1f8bfc 100644 (file)
@@ -116,7 +116,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
        return NULL;
 }
 
-void kvm_async_pf_task_wait(u32 token)
+/*
+ * @interrupt_kernel: Is this called from a routine which interrupts the kernel
+ *                   (other than user space)?
+ */
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
 {
        u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
        struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -139,8 +143,10 @@ void kvm_async_pf_task_wait(u32 token)
 
        n.token = token;
        n.cpu = smp_processor_id();
-       n.halted = is_idle_task(current) || preempt_count() > 1 ||
-                  rcu_preempt_depth();
+       n.halted = is_idle_task(current) ||
+                  (IS_ENABLED(CONFIG_PREEMPT_COUNT)
+                   ? preempt_count() > 1 || rcu_preempt_depth()
+                   : interrupt_kernel);
        init_waitqueue_head(&n.wq);
        hlist_add_head(&n.link, &b->list);
        spin_unlock(&b->lock);
@@ -269,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
                /* page is swapped out by the host. */
                prev_state = exception_enter();
                exit_idle();
-               kvm_async_pf_task_wait((u32)read_cr2());
+               kvm_async_pf_task_wait((u32)read_cr2(), !user_mode_vm(regs));
                exception_exit(prev_state);
                break;
        case KVM_PV_REASON_PAGE_READY:
index 99601569acae91e8e57e212addef1bb854ae55d4..2003a07de9ad89041bf01403cfc3e1861bff9941 100644 (file)
@@ -1718,7 +1718,7 @@ static int pf_interception(struct vcpu_svm *svm)
        case KVM_PV_REASON_PAGE_NOT_PRESENT:
                svm->apf_reason = 0;
                local_irq_disable();
-               kvm_async_pf_task_wait(fault_address);
+               kvm_async_pf_task_wait(fault_address, 0);
                local_irq_enable();
                break;
        case KVM_PV_REASON_PAGE_READY: