Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (27 commits) lguest: use __PAGE_KERNEL instead of _PAGE_KERNEL lguest: Use explicit includes rateher than indirect lguest: get rid of lg variable assignments lguest: change gpte_addr header lguest: move changed bitmap to lg_cpu lguest: move last_pages to lg_cpu lguest: change last_guest to last_cpu lguest: change spte_addr header lguest: per-vcpu lguest pgdir management lguest: make pending notifications per-vcpu lguest: makes special fields be per-vcpu lguest: per-vcpu lguest task management lguest: replace lguest_arch with lg_cpu_arch. lguest: make registers per-vcpu lguest: make emulate_insn receive a vcpu struct. lguest: map_switcher_in_guest() per-vcpu lguest: per-vcpu interrupt processing. lguest: per-vcpu lguest timers lguest: make hypercalls use the vcpu struct lguest: make write() operation smp aware ... Manual conflict resolved (maybe even correctly, who knows) in drivers/lguest/x86/core.c
author: Linus Torvalds <torvalds@linux-foundation.org> 2008-01-31 09:35:32 +1100
committer: Linus Torvalds <torvalds@linux-foundation.org> 2008-01-31 09:35:32 +1100
commit: d145c7253c8cb2ed8a75a8839621b0bb8f778820 (patch)
tree: fac21920d149a2cddfdfbde65066ff98935a9c57
parent: 44c3b59102e3ecc7a01e9811862633e670595e51 (diff)
parent: 84f12e39c856a8b1ab407f8216ecebaf4204b94d (diff)
13 files changed, 571 insertions, 455 deletions
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 9b0e322118b5..6c8a2386cd50 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -79,6 +79,9 @@ static void *guest_base;
 /* The maximum guest physical address allowed, and maximum possible. */
 static unsigned long guest_limit, guest_max;
 
+/* a per-cpu variable indicating whose vcpu is currently running */
+static unsigned int __thread cpu_id;
+
 /* This is our list of devices. */
 struct device_list
 {
@@ -153,6 +156,9 @@ struct virtqueue
 	void (*handle_output)(int fd, struct virtqueue *me);
 };
 
+/* Remember the arguments to the program so we can "reboot" */
+static char **main_args;
+
 /* Since guest is UP and we don't run at the same time, we don't need barriers.
  * But I include them in the code in case others copy it. */
 #define wmb()
@@ -554,7 +560,7 @@ static void wake_parent(int pipefd, int lguest_fd)
 			else
 				FD_CLR(-fd - 1, &devices.infds);
 		} else /* Send LHREQ_BREAK command. */
-			write(lguest_fd, args, sizeof(args));
+			pwrite(lguest_fd, args, sizeof(args), cpu_id);
 	}
 }
 
@@ -1489,7 +1495,9 @@ static void setup_block_file(const char *filename)
 
 	/* Create stack for thread and run it */
 	stack = malloc(32768);
-	if (clone(io_thread, stack + 32768, CLONE_VM, dev) == -1)
+	/* SIGCHLD - We dont "wait" for our cloned thread, so prevent it from
+	 * becoming a zombie. */
+	if (clone(io_thread, stack + 32768,  CLONE_VM | SIGCHLD, dev) == -1)
 		err(1, "Creating clone");
 
 	/* We don't need to keep the I/O thread's end of the pipes open. */
@@ -1499,7 +1507,21 @@ static void setup_block_file(const char *filename)
 	verbose("device %u: virtblock %llu sectors\n",
 		devices.device_num, cap);
 }
-/* That's the end of device setup. */
+/* That's the end of device setup. :*/
+
+/* Reboot */
+static void __attribute__((noreturn)) restart_guest(void)
+{
+	unsigned int i;
+
+	/* Closing pipes causes the waker thread and io_threads to die, and
+	 * closing /dev/lguest cleans up the Guest.  Since we don't track all
+	 * open fds, we simply close everything beyond stderr. */
+	for (i = 3; i < FD_SETSIZE; i++)
+		close(i);
+	execv(main_args[0], main_args);
+	err(1, "Could not exec %s", main_args[0]);
+}
 
 /*L:220 Finally we reach the core of the Launcher, which runs the Guest, serves
  * its input and output, and finally, lays it to rest. */
@@ -1511,7 +1533,8 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
 		int readval;
 
 		/* We read from the /dev/lguest device to run the Guest. */
-		readval = read(lguest_fd, &notify_addr, sizeof(notify_addr));
+		readval = pread(lguest_fd, &notify_addr,
+				sizeof(notify_addr), cpu_id);
 
 		/* One unsigned long means the Guest did HCALL_NOTIFY */
 		if (readval == sizeof(notify_addr)) {
@@ -1521,16 +1544,23 @@ static void __attribute__((noreturn)) run_guest(int lguest_fd)
 		/* ENOENT means the Guest died.  Reading tells us why. */
 		} else if (errno == ENOENT) {
 			char reason[1024] = { 0 };
-			read(lguest_fd, reason, sizeof(reason)-1);
+			pread(lguest_fd, reason, sizeof(reason)-1, cpu_id);
 			errx(1, "%s", reason);
+		/* ERESTART means that we need to reboot the guest */
+		} else if (errno == ERESTART) {
+			restart_guest();
 		/* EAGAIN means the Waker wanted us to look at some input.
 		 * Anything else means a bug or incompatible change. */
 		} else if (errno != EAGAIN)
 			err(1, "Running guest failed");
 
+		/* Only service input on thread for CPU 0. */
+		if (cpu_id != 0)
+			continue;
+
 		/* Service input, then unset the BREAK to release the Waker. */
 		handle_input(lguest_fd);
-		if (write(lguest_fd, args, sizeof(args)) < 0)
+		if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0)
 			err(1, "Resetting break");
 	}
 }
@@ -1571,6 +1601,12 @@ int main(int argc, char *argv[])
 	/* If they specify an initrd file to load. */
 	const char *initrd_name = NULL;
 
+	/* Save the args: we "reboot" by execing ourselves again. */
+	main_args = argv;
+	/* We don't "wait" for the children, so prevent them from becoming
+	 * zombies. */
+	signal(SIGCHLD, SIG_IGN);
+
 	/* First we initialize the device list.  Since console and network
 	 * device receive input from a file descriptor, we keep an fdset
 	 * (infds) and the maximum fd number (max_infd) with the head of the
@@ -1582,6 +1618,7 @@ int main(int argc, char *argv[])
 	devices.lastdev = &devices.dev;
 	devices.next_irq = 1;
 
+	cpu_id = 0;
 	/* We need to know how much memory so we can set up the device
 	 * descriptor and memory pages for the devices as we parse the command
 	 * line.  So we quickly look through the arguments to find the amount
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a63373759f08..5afdde4895dc 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -67,6 +67,7 @@
 #include <asm/mce.h>
 #include <asm/io.h>
 #include <asm/i387.h>
+#include <asm/reboot.h>		/* for struct machine_ops */
 
 /*G:010 Welcome to the Guest!
  *
@@ -813,7 +814,7 @@ static void lguest_safe_halt(void)
  * rather than virtual addresses, so we use __pa() here. */
 static void lguest_power_off(void)
 {
-	hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
+	hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0);
 }
 
 /*
@@ -823,7 +824,7 @@ static void lguest_power_off(void)
  */
 static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
 {
-	hcall(LHCALL_CRASH, __pa(p), 0, 0);
+	hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0);
 	/* The hcall won't return, but to keep gcc happy, we're "done". */
 	return NOTIFY_DONE;
 }
@@ -927,6 +928,11 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
 	return insn_len;
 }
 
+static void lguest_restart(char *reason)
+{
+	hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+}
+
 /*G:030 Once we get to lguest_init(), we know we're a Guest.  The pv_ops
  * structures in the kernel provide points for (almost) every routine we have
  * to override to avoid privileged instructions. */
@@ -1060,6 +1066,7 @@ __init void lguest_init(void)
 	 * the Guest routine to power off. */
 	pm_power_off = lguest_power_off;
 
+	machine_ops.restart = lguest_restart;
 	/* Now we're set up, call start_kernel() in init/main.c and we proceed
 	 * to boot as normal.  It never returns. */
 	start_kernel();
diff --git a/drivers/Makefile b/drivers/Makefile
index 9e1f808e43cf..0ee9a8a4095e 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -72,7 +72,7 @@ obj-$(CONFIG_ISDN)		+= isdn/
 obj-$(CONFIG_EDAC)		+= edac/
 obj-$(CONFIG_MCA)		+= mca/
 obj-$(CONFIG_EISA)		+= eisa/
-obj-$(CONFIG_LGUEST_GUEST)	+= lguest/
+obj-y				+= lguest/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
 obj-$(CONFIG_MMC)		+= mmc/
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index cb4c67025d52..7743d73768df 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -151,43 +151,43 @@ int lguest_address_ok(const struct lguest *lg,
 /* This routine copies memory from the Guest.  Here we can see how useful the
  * kill_lguest() routine we met in the Launcher can be: we return a random
  * value (all zeroes) instead of needing to return an error. */
-void __lgread(struct lguest *lg, void *b, unsigned long addr, unsigned bytes)
+void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
 {
-	if (!lguest_address_ok(lg, addr, bytes)
-	    || copy_from_user(b, lg->mem_base + addr, bytes) != 0) {
+	if (!lguest_address_ok(cpu->lg, addr, bytes)
+	    || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) {
 		/* copy_from_user should do this, but as we rely on it... */
 		memset(b, 0, bytes);
-		kill_guest(lg, "bad read address %#lx len %u", addr, bytes);
+		kill_guest(cpu, "bad read address %#lx len %u", addr, bytes);
 	}
 }
 
 /* This is the write (copy into guest) version. */
-void __lgwrite(struct lguest *lg, unsigned long addr, const void *b,
+void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
 	       unsigned bytes)
 {
-	if (!lguest_address_ok(lg, addr, bytes)
-	    || copy_to_user(lg->mem_base + addr, b, bytes) != 0)
-		kill_guest(lg, "bad write address %#lx len %u", addr, bytes);
+	if (!lguest_address_ok(cpu->lg, addr, bytes)
+	    || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0)
+		kill_guest(cpu, "bad write address %#lx len %u", addr, bytes);
 }
 /*:*/
 
 /*H:030 Let's jump straight to the the main loop which runs the Guest.
  * Remember, this is called by the Launcher reading /dev/lguest, and we keep
  * going around and around until something interesting happens. */
-int run_guest(struct lguest *lg, unsigned long __user *user)
+int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 {
 	/* We stop running once the Guest is dead. */
-	while (!lg->dead) {
+	while (!cpu->lg->dead) {
 		/* First we run any hypercalls the Guest wants done. */
-		if (lg->hcall)
-			do_hypercalls(lg);
+		if (cpu->hcall)
+			do_hypercalls(cpu);
 
 		/* It's possible the Guest did a NOTIFY hypercall to the
 		 * Launcher, in which case we return from the read() now. */
-		if (lg->pending_notify) {
-			if (put_user(lg->pending_notify, user))
+		if (cpu->pending_notify) {
+			if (put_user(cpu->pending_notify, user))
 				return -EFAULT;
-			return sizeof(lg->pending_notify);
+			return sizeof(cpu->pending_notify);
 		}
 
 		/* Check for signals */
@@ -195,13 +195,13 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
 			return -ERESTARTSYS;
 
 		/* If Waker set break_out, return to Launcher. */
-		if (lg->break_out)
+		if (cpu->break_out)
 			return -EAGAIN;
 
 		/* Check if there are any interrupts which can be delivered
 		 * now: if so, this sets up the hander to be executed when we
 		 * next run the Guest. */
-		maybe_do_interrupt(lg);
+		maybe_do_interrupt(cpu);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -210,12 +210,12 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
 
 		/* Just make absolutely sure the Guest is still alive.  One of
 		 * those hypercalls could have been fatal, for example. */
-		if (lg->dead)
+		if (cpu->lg->dead)
 			break;
 
 		/* If the Guest asked to be stopped, we sleep.  The Guest's
 		 * clock timer or LHCALL_BREAK from the Waker will wake us. */
-		if (lg->halted) {
+		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
 			schedule();
 			continue;
@@ -226,15 +226,17 @@ int run_guest(struct lguest *lg, unsigned long __user *user)
 		local_irq_disable();
 
 		/* Actually run the Guest until something happens. */
-		lguest_arch_run_guest(lg);
+		lguest_arch_run_guest(cpu);
 
 		/* Now we're ready to be interrupted or moved to other CPUs */
 		local_irq_enable();
 
 		/* Now we deal with whatever happened to the Guest. */
-		lguest_arch_handle_trap(lg);
+		lguest_arch_handle_trap(cpu);
 	}
 
+	if (cpu->lg->dead == ERR_PTR(-ERESTART))
+		return -ERESTART;
 	/* The Guest is dead => "No such file or directory" */
 	return -ENOENT;
 }
@@ -253,7 +255,7 @@ static int __init init(void)
 
 	/* Lguest can't run under Xen, VMI or itself.  It does Tricky Stuff. */
 	if (paravirt_enabled()) {
-		printk("lguest is afraid of %s\n", pv_info.name);
+		printk("lguest is afraid of being a guest\n");
 		return -EPERM;
 	}
 
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index b478affe8f91..0f2cb4fd7c69 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -23,13 +23,14 @@
 #include <linux/uaccess.h>
 #include <linux/syscalls.h>
 #include <linux/mm.h>
+#include <linux/ktime.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include "lg.h"
 
 /*H:120 This is the core hypercall routine: where the Guest gets what it wants.
  * Or gets killed.  Or, in the case of LHCALL_CRASH, both. */
-static void do_hcall(struct lguest *lg, struct hcall_args *args)
+static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 {
 	switch (args->arg0) {
 	case LHCALL_FLUSH_ASYNC:
@@ -39,60 +40,62 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
-		kill_guest(lg, "already have lguest_data");
+		kill_guest(cpu, "already have lguest_data");
 		break;
-	case LHCALL_CRASH: {
-		/* Crash is such a trivial hypercall that we do it in four
+	case LHCALL_SHUTDOWN: {
+		/* Shutdown is such a trivial hypercall that we do it in four
 		 * lines right here. */
 		char msg[128];
 		/* If the lgread fails, it will call kill_guest() itself; the
 		 * kill_guest() with the message will be ignored. */
-		__lgread(lg, msg, args->arg1, sizeof(msg));
+		__lgread(cpu, msg, args->arg1, sizeof(msg));
 		msg[sizeof(msg)-1] = '\0';
-		kill_guest(lg, "CRASH: %s", msg);
+		kill_guest(cpu, "CRASH: %s", msg);
+		if (args->arg2 == LGUEST_SHUTDOWN_RESTART)
+			cpu->lg->dead = ERR_PTR(-ERESTART);
 		break;
 	}
 	case LHCALL_FLUSH_TLB:
 		/* FLUSH_TLB comes in two flavors, depending on the
 		 * argument: */
 		if (args->arg1)
-			guest_pagetable_clear_all(lg);
+			guest_pagetable_clear_all(cpu);
 		else
-			guest_pagetable_flush_user(lg);
+			guest_pagetable_flush_user(cpu);
 		break;
 
 	/* All these calls simply pass the arguments through to the right
 	 * routines. */
 	case LHCALL_NEW_PGTABLE:
-		guest_new_pagetable(lg, args->arg1);
+		guest_new_pagetable(cpu, args->arg1);
 		break;
 	case LHCALL_SET_STACK:
-		guest_set_stack(lg, args->arg1, args->arg2, args->arg3);
+		guest_set_stack(cpu, args->arg1, args->arg2, args->arg3);
 		break;
 	case LHCALL_SET_PTE:
-		guest_set_pte(lg, args->arg1, args->arg2, __pte(args->arg3));
+		guest_set_pte(cpu, args->arg1, args->arg2, __pte(args->arg3));
 		break;
 	case LHCALL_SET_PMD:
-		guest_set_pmd(lg, args->arg1, args->arg2);
+		guest_set_pmd(cpu->lg, args->arg1, args->arg2);
 		break;
 	case LHCALL_SET_CLOCKEVENT:
-		guest_set_clockevent(lg, args->arg1);
+		guest_set_clockevent(cpu, args->arg1);
 		break;
 	case LHCALL_TS:
 		/* This sets the TS flag, as we saw used in run_guest(). */
-		lg->ts = args->arg1;
+		cpu->ts = args->arg1;
 		break;
 	case LHCALL_HALT:
 		/* Similarly, this sets the halted flag for run_guest(). */
-		lg->halted = 1;
+		cpu->halted = 1;
 		break;
 	case LHCALL_NOTIFY:
-		lg->pending_notify = args->arg1;
+		cpu->pending_notify = args->arg1;
 		break;
 	default:
 		/* It should be an architecture-specific hypercall. */
-		if (lguest_arch_do_hcall(lg, args))
-			kill_guest(lg, "Bad hypercall %li\n", args->arg0);
+		if (lguest_arch_do_hcall(cpu, args))
+			kill_guest(cpu, "Bad hypercall %li\n", args->arg0);
 	}
 }
 /*:*/
@@ -104,13 +107,13 @@ static void do_hcall(struct lguest *lg, struct hcall_args *args)
  * Guest put them in the ring, but we also promise the Guest that they will
  * happen before any normal hypercall (which is why we check this before
  * checking for a normal hcall). */
-static void do_async_hcalls(struct lguest *lg)
+static void do_async_hcalls(struct lg_cpu *cpu)
 {
 	unsigned int i;
 	u8 st[LHCALL_RING_SIZE];
 
 	/* For simplicity, we copy the entire call status array in at once. */
-	if (copy_from_user(&st, &lg->lguest_data->hcall_status, sizeof(st)))
+	if (copy_from_user(&st, &cpu->lg->lguest_data->hcall_status, sizeof(st)))
 		return;
 
 	/* We process "struct lguest_data"s hcalls[] ring once. */
@@ -119,7 +122,7 @@ static void do_async_hcalls(struct lguest *lg)
 		/* We remember where we were up to from last time.  This makes
 		 * sure that the hypercalls are done in the order the Guest
 		 * places them in the ring. */
-		unsigned int n = lg->next_hcall;
+		unsigned int n = cpu->next_hcall;
 
 		/* 0xFF means there's no call here (yet). */
 		if (st[n] == 0xFF)
@@ -127,65 +130,65 @@ static void do_async_hcalls(struct lguest *lg)
 
 		/* OK, we have hypercall.  Increment the "next_hcall" cursor,
 		 * and wrap back to 0 if we reach the end. */
-		if (++lg->next_hcall == LHCALL_RING_SIZE)
-			lg->next_hcall = 0;
+		if (++cpu->next_hcall == LHCALL_RING_SIZE)
+			cpu->next_hcall = 0;
 
 		/* Copy the hypercall arguments into a local copy of
 		 * the hcall_args struct. */
-		if (copy_from_user(&args, &lg->lguest_data->hcalls[n],
+		if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n],
 				   sizeof(struct hcall_args))) {
-			kill_guest(lg, "Fetching async hypercalls");
+			kill_guest(cpu, "Fetching async hypercalls");
 			break;
 		}
 
 		/* Do the hypercall, same as a normal one. */
-		do_hcall(lg, &args);
+		do_hcall(cpu, &args);
 
 		/* Mark the hypercall done. */
-		if (put_user(0xFF, &lg->lguest_data->hcall_status[n])) {
-			kill_guest(lg, "Writing result for async hypercall");
+		if (put_user(0xFF, &cpu->lg->lguest_data->hcall_status[n])) {
+			kill_guest(cpu, "Writing result for async hypercall");
 			break;
 		}
 
 		/* Stop doing hypercalls if they want to notify the Launcher:
 		 * it needs to service this first. */
-		if (lg->pending_notify)
+		if (cpu->pending_notify)
 			break;
 	}
 }
 
 /* Last of all, we look at what happens first of all.  The very first time the
  * Guest makes a hypercall, we end up here to set things up: */
-static void initialize(struct lguest *lg)
+static void initialize(struct lg_cpu *cpu)
 {
 	/* You can't do anything until you're initialized.  The Guest knows the
 	 * rules, so we're unforgiving here. */
-	if (lg->hcall->arg0 != LHCALL_LGUEST_INIT) {
-		kill_guest(lg, "hypercall %li before INIT", lg->hcall->arg0);
+	if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) {
+		kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0);
 		return;
 	}
 
-	if (lguest_arch_init_hypercalls(lg))
-		kill_guest(lg, "bad guest page %p", lg->lguest_data);
+	if (lguest_arch_init_hypercalls(cpu))
+		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* The Guest tells us where we're not to deliver interrupts by putting
 	 * the range of addresses into "struct lguest_data". */
-	if (get_user(lg->noirq_start, &lg->lguest_data->noirq_start)
-	    || get_user(lg->noirq_end, &lg->lguest_data->noirq_end))
-		kill_guest(lg, "bad guest page %p", lg->lguest_data);
+	if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
+	    || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
+		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
 	/* We write the current time into the Guest's data page once so it can
 	 * set its clock. */
-	write_timestamp(lg);
+	write_timestamp(cpu);
 
 	/* page_tables.c will also do some setup. */
-	page_table_guest_data_init(lg);
+	page_table_guest_data_init(cpu);
 
 	/* This is the one case where the above accesses might have been the
 	 * first write to a Guest page.  This may have caused a copy-on-write
 	 * fault, but the old page might be (read-only) in the Guest
 	 * pagetable. */
-	guest_pagetable_clear_all(lg);
+	guest_pagetable_clear_all(cpu);
 }
 
 /*H:100
@@ -194,27 +197,27 @@ static void initialize(struct lguest *lg)
  * Remember from the Guest, hypercalls come in two flavors: normal and
  * asynchronous.  This file handles both of types.
  */
-void do_hypercalls(struct lguest *lg)
+void do_hypercalls(struct lg_cpu *cpu)
 {
 	/* Not initialized yet?  This hypercall must do it. */
-	if (unlikely(!lg->lguest_data)) {
+	if (unlikely(!cpu->lg->lguest_data)) {
 		/* Set up the "struct lguest_data" */
-		initialize(lg);
+		initialize(cpu);
 		/* Hcall is done. */
-		lg->hcall = NULL;
+		cpu->hcall = NULL;
 		return;
 	}
 
 	/* The Guest has initialized.
 	 *
 	 * Look in the hypercall ring for the async hypercalls: */
-	do_async_hcalls(lg);
+	do_async_hcalls(cpu);
 
 	/* If we stopped reading the hypercall ring because the Guest did a
 	 * NOTIFY to the Launcher, we want to return now.  Otherwise we do
 	 * the hypercall. */
-	if (!lg->pending_notify) {
-		do_hcall(lg, lg->hcall);
+	if (!cpu->pending_notify) {
+		do_hcall(cpu, cpu->hcall);
 		/* Tricky point: we reset the hcall pointer to mark the
 		 * hypercall as "done".  We use the hcall pointer rather than
 		 * the trap number to indicate a hypercall is pending.
@@ -225,16 +228,17 @@ void do_hypercalls(struct lguest *lg)
 		 * Launcher, the run_guest() loop will exit without running the
 		 * Guest.  When it comes back it would try to re-run the
 		 * hypercall. */
-		lg->hcall = NULL;
+		cpu->hcall = NULL;
 	}
 }
 
 /* This routine supplies the Guest with time: it's used for wallclock time at
  * initial boot and as a rough time source if the TSC isn't available. */
-void write_timestamp(struct lguest *lg)
+void write_timestamp(struct lg_cpu *cpu)
 {
 	struct timespec now;
 	ktime_get_real_ts(&now);
-	if (copy_to_user(&lg->lguest_data->time, &now, sizeof(struct timespec)))
-		kill_guest(lg, "Writing timestamp");
+	if (copy_to_user(&cpu->lg->lguest_data->time,
+			 &now, sizeof(struct timespec)))
+		kill_guest(cpu, "Writing timestamp");
 }
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 2b66f79c208b..32e97c1858e5 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -41,11 +41,11 @@ static int idt_present(u32 lo, u32 hi)
 
 /* We need a helper to "push" a value onto the Guest's stack, since that's a
  * big part of what delivering an interrupt does. */
-static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
+static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
 {
 	/* Stack grows upwards: move stack then write value. */
 	*gstack -= 4;
-	lgwrite(lg, *gstack, u32, val);
+	lgwrite(cpu, *gstack, u32, val);
 }
 
 /*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
@@ -60,7 +60,7 @@ static void push_guest_stack(struct lguest *lg, unsigned long *gstack, u32 val)
  * We set up the stack just like the CPU does for a real interrupt, so it's
  * identical for the Guest (and the standard "iret" instruction will undo
  * it). */
-static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
+static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
 {
 	unsigned long gstack, origstack;
 	u32 eflags, ss, irq_enable;
@@ -69,59 +69,59 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
 	/* There are two cases for interrupts: one where the Guest is already
 	 * in the kernel, and a more complex one where the Guest is in
 	 * userspace.  We check the privilege level to find out. */
-	if ((lg->regs->ss&0x3) != GUEST_PL) {
+	if ((cpu->regs->ss&0x3) != GUEST_PL) {
 		/* The Guest told us their kernel stack with the SET_STACK
 		 * hypercall: both the virtual address and the segment */
-		virtstack = lg->esp1;
-		ss = lg->ss1;
+		virtstack = cpu->esp1;
+		ss = cpu->ss1;
 
-		origstack = gstack = guest_pa(lg, virtstack);
+		origstack = gstack = guest_pa(cpu, virtstack);
 		/* We push the old stack segment and pointer onto the new
 		 * stack: when the Guest does an "iret" back from the interrupt
 		 * handler the CPU will notice they're dropping privilege
 		 * levels and expect these here. */
-		push_guest_stack(lg, &gstack, lg->regs->ss);
-		push_guest_stack(lg, &gstack, lg->regs->esp);
+		push_guest_stack(cpu, &gstack, cpu->regs->ss);
+		push_guest_stack(cpu, &gstack, cpu->regs->esp);
 	} else {
 		/* We're staying on the same Guest (kernel) stack. */
-		virtstack = lg->regs->esp;
-		ss = lg->regs->ss;
+		virtstack = cpu->regs->esp;
+		ss = cpu->regs->ss;
 
-		origstack = gstack = guest_pa(lg, virtstack);
+		origstack = gstack = guest_pa(cpu, virtstack);
 	}
 
 	/* Remember that we never let the Guest actually disable interrupts, so
 	 * the "Interrupt Flag" bit is always set.  We copy that bit from the
 	 * Guest's "irq_enabled" field into the eflags word: we saw the Guest
 	 * copy it back in "lguest_iret". */
-	eflags = lg->regs->eflags;
-	if (get_user(irq_enable, &lg->lguest_data->irq_enabled) == 0
+	eflags = cpu->regs->eflags;
+	if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
 	    && !(irq_enable & X86_EFLAGS_IF))
 		eflags &= ~X86_EFLAGS_IF;
 
 	/* An interrupt is expected to push three things on the stack: the old
 	 * "eflags" word, the old code segment, and the old instruction
 	 * pointer. */
-	push_guest_stack(lg, &gstack, eflags);
-	push_guest_stack(lg, &gstack, lg->regs->cs);
-	push_guest_stack(lg, &gstack, lg->regs->eip);
+	push_guest_stack(cpu, &gstack, eflags);
+	push_guest_stack(cpu, &gstack, cpu->regs->cs);
+	push_guest_stack(cpu, &gstack, cpu->regs->eip);
 
 	/* For the six traps which supply an error code, we push that, too. */
 	if (has_err)
-		push_guest_stack(lg, &gstack, lg->regs->errcode);
+		push_guest_stack(cpu, &gstack, cpu->regs->errcode);
 
 	/* Now we've pushed all the old state, we change the stack, the code
 	 * segment and the address to execute. */
-	lg->regs->ss = ss;
-	lg->regs->esp = virtstack + (gstack - origstack);
-	lg->regs->cs = (__KERNEL_CS|GUEST_PL);
-	lg->regs->eip = idt_address(lo, hi);
+	cpu->regs->ss = ss;
+	cpu->regs->esp = virtstack + (gstack - origstack);
+	cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
+	cpu->regs->eip = idt_address(lo, hi);
 
 	/* There are two kinds of interrupt handlers: 0xE is an "interrupt
 	 * gate" which expects interrupts to be disabled on entry. */
 	if (idt_type(lo, hi) == 0xE)
-		if (put_user(0, &lg->lguest_data->irq_enabled))
-			kill_guest(lg, "Disabling interrupts");
+		if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
+			kill_guest(cpu, "Disabling interrupts");
 }
 
 /*H:205
@@ -129,23 +129,23 @@ static void set_guest_interrupt(struct lguest *lg, u32 lo, u32 hi, int has_err)
  *
  * maybe_do_interrupt() gets called before every entry to the Guest, to see if
  * we should divert the Guest to running an interrupt handler. */
-void maybe_do_interrupt(struct lguest *lg)
+void maybe_do_interrupt(struct lg_cpu *cpu)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
 	struct desc_struct *idt;
 
 	/* If the Guest hasn't even initialized yet, we can do nothing. */
-	if (!lg->lguest_data)
+	if (!cpu->lg->lguest_data)
 		return;
 
 	/* Take our "irqs_pending" array and remove any interrupts the Guest
 	 * wants blocked: the result ends up in "blk". */
-	if (copy_from_user(&blk, lg->lguest_data->blocked_interrupts,
+	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
 		return;
 
-	bitmap_andnot(blk, lg->irqs_pending, blk, LGUEST_IRQS);
+	bitmap_andnot(blk, cpu->irqs_pending, blk, LGUEST_IRQS);
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
@@ -155,19 +155,20 @@ void maybe_do_interrupt(struct lguest *lg)
 
 	/* They may be in the middle of an iret, where they asked us never to
 	 * deliver interrupts. */
-	if (lg->regs->eip >= lg->noirq_start && lg->regs->eip < lg->noirq_end)
+	if (cpu->regs->eip >= cpu->lg->noirq_start &&
+	   (cpu->regs->eip < cpu->lg->noirq_end))
 		return;
 
 	/* If they're halted, interrupts restart them. */
-	if (lg->halted) {
+	if (cpu->halted) {
 		/* Re-enable interrupts. */
-		if (put_user(X86_EFLAGS_IF, &lg->lguest_data->irq_enabled))
-			kill_guest(lg, "Re-enabling interrupts");
-		lg->halted = 0;
+		if (put_user(X86_EFLAGS_IF, &cpu->lg->lguest_data->irq_enabled))
+			kill_guest(cpu, "Re-enabling interrupts");
+		cpu->halted = 0;
 	} else {
 		/* Otherwise we check if they have interrupts disabled. */
 		u32 irq_enabled;
-		if (get_user(irq_enabled, &lg->lguest_data->irq_enabled))
+		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
 		if (!irq_enabled)
 			return;
@@ -176,15 +177,15 @@ void maybe_do_interrupt(struct lguest *lg)
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
 	 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
 	 * over them. */
-	idt = &lg->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
+	idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
 	/* If they don't have a handler (yet?), we just ignore it */
 	if (idt_present(idt->a, idt->b)) {
 		/* OK, mark it no longer pending and deliver it. */
-		clear_bit(irq, lg->irqs_pending);
+		clear_bit(irq, cpu->irqs_pending);
 		/* set_guest_interrupt() takes the interrupt descriptor and a
 		 * flag to say whether this interrupt pushes an error code onto
 		 * the stack as well: virtual interrupts never do. */
-		set_guest_interrupt(lg, idt->a, idt->b, 0);
+		set_guest_interrupt(cpu, idt->a, idt->b, 0);
 	}
 
 	/* Every time we deliver an interrupt, we update the timestamp in the
@@ -192,7 +193,7 @@ void maybe_do_interrupt(struct lguest *lg)
 	 * did this more often, but it can actually be quite slow: doing it
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
-	write_timestamp(lg);
+	write_timestamp(cpu);
 }
 /*:*/
 
@@ -245,19 +246,19 @@ static int has_err(unsigned int trap)
 }
 
 /* deliver_trap() returns true if it could deliver the trap. */
-int deliver_trap(struct lguest *lg, unsigned int num)
+int deliver_trap(struct lg_cpu *cpu, unsigned int num)
 {
 	/* Trap numbers are always 8 bit, but we set an impossible trap number
 	 * for traps inside the Switcher, so check that here. */
-	if (num >= ARRAY_SIZE(lg->arch.idt))
+	if (num >= ARRAY_SIZE(cpu->arch.idt))
 		return 0;
 
 	/* Early on the Guest hasn't set the IDT entries (or maybe it put a
 	 * bogus one in): if we fail here, the Guest will be killed. */
-	if (!idt_present(lg->arch.idt[num].a, lg->arch.idt[num].b))
+	if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
 		return 0;
-	set_guest_interrupt(lg, lg->arch.idt[num].a, lg->arch.idt[num].b,
-			    has_err(num));
+	set_guest_interrupt(cpu, cpu->arch.idt[num].a,
+			    cpu->arch.idt[num].b, has_err(num));
 	return 1;
 }
 
@@ -309,18 +310,18 @@ static int direct_trap(unsigned int num)
  * the Guest.
  *
  * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
-void pin_stack_pages(struct lguest *lg)
+void pin_stack_pages(struct lg_cpu *cpu)
 {
 	unsigned int i;
 
 	/* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
 	 * two pages of stack space. */
-	for (i = 0; i < lg->stack_pages; i++)
+	for (i = 0; i < cpu->lg->stack_pages; i++)
 		/* The stack grows *upwards*, so the address we're given is the
 		 * start of the page after the kernel stack.  Subtract one to
 		 * get back onto the first stack page, and keep subtracting to
 		 * get to the rest of the stack pages. */
-		pin_page(lg, lg->esp1 - 1 - i * PAGE_SIZE);
+		pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
 }
 
 /* Direct traps also mean that we need to know whenever the Guest wants to use
@@ -331,21 +332,21 @@ void pin_stack_pages(struct lguest *lg)
  *
  * In Linux each process has its own kernel stack, so this happens a lot: we
  * change stacks on each context switch. */
-void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
+void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
 {
 	/* You are not allowed have a stack segment with privilege level 0: bad
 	 * Guest! */
 	if ((seg & 0x3) != GUEST_PL)
-		kill_guest(lg, "bad stack segment %i", seg);
+		kill_guest(cpu, "bad stack segment %i", seg);
 	/* We only expect one or two stack pages. */
 	if (pages > 2)
-		kill_guest(lg, "bad stack pages %u", pages);
+		kill_guest(cpu, "bad stack pages %u", pages);
 	/* Save where the stack is, and how many pages */
-	lg->ss1 = seg;
-	lg->esp1 = esp;
-	lg->stack_pages = pages;
+	cpu->ss1 = seg;
+	cpu->esp1 = esp;
+	cpu->lg->stack_pages = pages;
 	/* Make sure the new stack pages are mapped */
-	pin_stack_pages(lg);
+	pin_stack_pages(cpu);
 }
 
 /* All this reference to mapping stacks leads us neatly into the other complex
@@ -353,7 +354,7 @@ void guest_set_stack(struct lguest *lg, u32 seg, u32 esp, unsigned int pages)
author	Linus Torvalds <torvalds@linux-foundation.org>	2008-01-31 09:35:32 +1100
committer	Linus Torvalds <torvalds@linux-foundation.org>	2008-01-31 09:35:32 +1100
commit	d145c7253c8cb2ed8a75a8839621b0bb8f778820 (patch)
tree	fac21920d149a2cddfdfbde65066ff98935a9c57
parent	44c3b59102e3ecc7a01e9811862633e670595e51 (diff)
parent	84f12e39c856a8b1ab407f8216ecebaf4204b94d (diff)