KVM: Emulate hlt in the kernel

By sleeping in the kernel when hlt is executed, we simplify the in-kernel
guest interrupt path considerably.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c
index 40ad104..ee6030d 100644
--- a/drivers/kvm/i8259.c
+++ b/drivers/kvm/i8259.c
@@ -413,8 +413,11 @@
 static void pic_irq_request(void *opaque, int level)
 {
 	struct kvm *kvm = opaque;
+	struct kvm_vcpu *vcpu = kvm->vcpus[0];
 
 	pic_irqchip(kvm)->output = level;
+	if (vcpu)
+		kvm_vcpu_kick(vcpu);
 }
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index 5265f82..e09cd65 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -70,6 +70,10 @@
 {
 	int ipi_pcpu = vcpu->cpu;
 
+	if (waitqueue_active(&vcpu->wq)) {
+		wake_up_interruptible(&vcpu->wq);
+		++vcpu->stat.halt_wakeup;
+	}
 	if (vcpu->guest_mode)
 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
 }
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 8d07a99..bb506b7 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -231,6 +231,7 @@
 	u32 signal_exits;
 	u32 irq_window_exits;
 	u32 halt_exits;
+	u32 halt_wakeup;
 	u32 request_irq_exits;
 	u32 irq_exits;
 	u32 light_exits;
@@ -353,6 +354,7 @@
 	gva_t mmio_fault_cr2;
 	struct kvm_pio_request pio;
 	void *pio_data;
+	wait_queue_head_t wq;
 
 	int sigset_active;
 	sigset_t sigset;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ffbdadd8..4384364 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -76,6 +76,7 @@
 	{ "signal_exits", STAT_OFFSET(signal_exits) },
 	{ "irq_window", STAT_OFFSET(irq_window_exits) },
 	{ "halt_exits", STAT_OFFSET(halt_exits) },
+	{ "halt_wakeup", STAT_OFFSET(halt_wakeup) },
 	{ "request_irq", STAT_OFFSET(request_irq_exits) },
 	{ "irq_exits", STAT_OFFSET(irq_exits) },
 	{ "light_exits", STAT_OFFSET(light_exits) },
@@ -248,6 +249,7 @@
 	vcpu->mmu.root_hpa = INVALID_PAGE;
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
+	init_waitqueue_head(&vcpu->wq);
 
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
@@ -1307,15 +1309,41 @@
 }
 EXPORT_SYMBOL_GPL(emulate_instruction);
 
+/*
+ * The vCPU has executed a HLT instruction with in-kernel mode enabled.
+ */
+static void kvm_vcpu_kernel_halt(struct kvm_vcpu *vcpu)
+{
+	DECLARE_WAITQUEUE(wait, current);
+
+	add_wait_queue(&vcpu->wq, &wait);
+
+	/*
+	 * We will block until either an interrupt or a signal wakes us up
+	 */
+	while(!(irqchip_in_kernel(vcpu->kvm) && kvm_cpu_has_interrupt(vcpu))
+	      && !vcpu->irq_summary
+	      && !signal_pending(current)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		vcpu_put(vcpu);
+		schedule();
+		vcpu_load(vcpu);
+	}
+
+	remove_wait_queue(&vcpu->wq, &wait);
+	set_current_state(TASK_RUNNING);
+}
+
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->irq_summary ||
-		(irqchip_in_kernel(vcpu->kvm) && kvm_cpu_has_interrupt(vcpu)))
-		return 1;
-
-	vcpu->run->exit_reason = KVM_EXIT_HLT;
 	++vcpu->stat.halt_exits;
-	return 0;
+	if (irqchip_in_kernel(vcpu->kvm)) {
+		kvm_vcpu_kernel_halt(vcpu);
+		return 1;
+	} else {
+		vcpu->run->exit_reason = KVM_EXIT_HLT;
+		return 0;
+	}
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
@@ -2916,6 +2944,7 @@
 
 		switch (ext) {
 		case KVM_CAP_IRQCHIP:
+		case KVM_CAP_HLT:
 			r = 1;
 			break;
 		default:
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index d576451..a347b61 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1398,9 +1398,12 @@
 static void post_kvm_run_save(struct vcpu_svm *svm,
 			      struct kvm_run *kvm_run)
 {
-	kvm_run->ready_for_interrupt_injection
-		= (svm->vcpu.interrupt_window_open &&
-		   svm->vcpu.irq_summary == 0);
+	if (irqchip_in_kernel(svm->vcpu.kvm))
+		kvm_run->ready_for_interrupt_injection = 1;
+	else
+		kvm_run->ready_for_interrupt_injection =
+					 (svm->vcpu.interrupt_window_open &&
+					  svm->vcpu.irq_summary == 0);
 	kvm_run->if_flag = (svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = get_cr8(&svm->vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(&svm->vcpu);
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index c4cc17c..7ec8cf8 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1961,8 +1961,12 @@
 	kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
 	kvm_run->cr8 = get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
-	kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
-						  vcpu->irq_summary == 0);
+	if (irqchip_in_kernel(vcpu->kvm))
+		kvm_run->ready_for_interrupt_injection = 1;
+	else
+		kvm_run->ready_for_interrupt_injection =
+					(vcpu->interrupt_window_open &&
+					 vcpu->irq_summary == 0);
 }
 
 static int handle_interrupt_window(struct kvm_vcpu *vcpu,
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 997bb3e..b0a13d1 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -283,6 +283,7 @@
  * Extension capability list.
  */
 #define KVM_CAP_IRQCHIP	  0
+#define KVM_CAP_HLT	  1
 
 /*
  * ioctls for VM fds