37 files changed, 2296 insertions, 229 deletions
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 601f64fcc890..fdb8f3e10b6f 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -178,6 +178,20 @@ config IBM_BSR
 	  of threads across a large system which avoids bouncing a cacheline
 	  between several cores on a system
 
+config POWERNV_OP_PANEL
+	tristate "IBM POWERNV Operator Panel Display support"
+	depends on PPC_POWERNV
+	default m
+	help
+	  If you say Y here, a special character device node, /dev/op_panel,
+	  will be created which exposes the operator panel display on IBM
+	  Power Systems machines with FSPs.
+
+	  If you don't require access to the operator panel display from user
+	  space, say N.
+
+	  If unsure, say M here to build it as a module called powernv-op-panel.
+
 source "drivers/char/ipmi/Kconfig"
 
 config DS1620
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index d8a7579300d2..55d16bf3ccc5 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -60,3 +60,4 @@ js-rtc-y = rtc.o
 
 obj-$(CONFIG_TILE_SROM)		+= tile-srom.o
 obj-$(CONFIG_XILLYBUS)		+= xillybus/
+obj-$(CONFIG_POWERNV_OP_PANEL)	+= powernv-op-panel.o
diff --git a/drivers/char/powernv-op-panel.c b/drivers/char/powernv-op-panel.c
new file mode 100644
index 000000000000..a45dabcc8e10
--- /dev/null
+++ b/drivers/char/powernv-op-panel.c
@@ -0,0 +1,223 @@
+/*
+ * OPAL Operator Panel Display Driver
+ *
+ * Copyright 2016, Suraj Jitindar Singh, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/miscdevice.h>
+
+#include <asm/opal.h>
+
+/*
+ * This driver creates a character device (/dev/op_panel) which exposes the
+ * operator panel (character LCD display) on IBM Power Systems machines
+ * with FSPs.
+ * A character buffer written to the device will be displayed on the
+ * operator panel.
+ */
+
+static DEFINE_MUTEX(oppanel_mutex);
+
+static u32		num_lines, oppanel_size;
+static oppanel_line_t	*oppanel_lines;
+static char		*oppanel_data;
+
+static loff_t oppanel_llseek(struct file *filp, loff_t offset, int whence)
+{
+	return fixed_size_llseek(filp, offset, whence, oppanel_size);
+}
+
+static ssize_t oppanel_read(struct file *filp, char __user *userbuf, size_t len,
+			    loff_t *f_pos)
+{
+	return simple_read_from_buffer(userbuf, len, f_pos, oppanel_data,
+			oppanel_size);
+}
+
+static int __op_panel_update_display(void)
+{
+	struct opal_msg msg;
+	int rc, token;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_debug("Couldn't get OPAL async token [token=%d]\n",
+				token);
+		return token;
+	}
+
+	rc = opal_write_oppanel_async(token, oppanel_lines, num_lines);
+	switch (rc) {
+	case OPAL_ASYNC_COMPLETION:
+		rc = opal_async_wait_response(token, &msg);
+		if (rc) {
+			pr_debug("Failed to wait for async response [rc=%d]\n",
+				rc);
+			break;
+		}
+		rc = opal_get_async_rc(msg);
+		if (rc != OPAL_SUCCESS) {
+			pr_debug("OPAL async call returned failed [rc=%d]\n",
+				rc);
+			break;
+		}
+	case OPAL_SUCCESS:
+		break;
+	default:
+		pr_debug("OPAL write op-panel call failed [rc=%d]\n", rc);
+	}
+
+	opal_async_release_token(token);
+	return rc;
+}
+
+static ssize_t oppanel_write(struct file *filp, const char __user *userbuf,
+			     size_t len, loff_t *f_pos)
+{
+	loff_t f_pos_prev = *f_pos;
+	ssize_t ret;
+	int rc;
+
+	if (!*f_pos)
+		memset(oppanel_data, ' ', oppanel_size);
+	else if (*f_pos >= oppanel_size)
+		return -EFBIG;
+
+	ret = simple_write_to_buffer(oppanel_data, oppanel_size, f_pos, userbuf,
+			len);
+	if (ret > 0) {
+		rc = __op_panel_update_display();
+		if (rc != OPAL_SUCCESS) {
+			pr_err_ratelimited("OPAL call failed to write to op panel display [rc=%d]\n",
+				rc);
+			*f_pos = f_pos_prev;
+			return -EIO;
+		}
+	}
+	return ret;
+}
+
+static int oppanel_open(struct inode *inode, struct file *filp)
+{
+	if (!mutex_trylock(&oppanel_mutex)) {
+		pr_debug("Device Busy\n");
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static int oppanel_release(struct inode *inode, struct file *filp)
+{
+	mutex_unlock(&oppanel_mutex);
+	return 0;
+}
+
+static const struct file_operations oppanel_fops = {
+	.owner		= THIS_MODULE,
+	.llseek		= oppanel_llseek,
+	.read		= oppanel_read,
+	.write		= oppanel_write,
+	.open		= oppanel_open,
+	.release	= oppanel_release
+};
+
+static struct miscdevice oppanel_dev = {
+	.minor		= MISC_DYNAMIC_MINOR,
+	.name		= "op_panel",
+	.fops		= &oppanel_fops
+};
+
+static int oppanel_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	u32 line_len;
+	int rc, i;
+
+	rc = of_property_read_u32(np, "#length", &line_len);
+	if (rc) {
+		pr_err_ratelimited("Operator panel length property not found\n");
+		return rc;
+	}
+	rc = of_property_read_u32(np, "#lines", &num_lines);
+	if (rc) {
+		pr_err_ratelimited("Operator panel lines property not found\n");
+		return rc;
+	}
+	oppanel_size = line_len * num_lines;
+
+	pr_devel("Operator panel of size %u found with %u lines of length %u\n",
+			oppanel_size, num_lines, line_len);
+
+	oppanel_data = kcalloc(oppanel_size, sizeof(*oppanel_data), GFP_KERNEL);
+	if (!oppanel_data)
+		return -ENOMEM;
+
+	oppanel_lines = kcalloc(num_lines, sizeof(oppanel_line_t), GFP_KERNEL);
+	if (!oppanel_lines) {
+		rc = -ENOMEM;
+		goto free_oppanel_data;
+	}
+
+	memset(oppanel_data, ' ', oppanel_size);
+	for (i = 0; i < num_lines; i++) {
+		oppanel_lines[i].line_len = cpu_to_be64(line_len);
+		oppanel_lines[i].line = cpu_to_be64(__pa(&oppanel_data[i *
+						line_len]));
+	}
+
+	rc = misc_register(&oppanel_dev);
+	if (rc) {
+		pr_err_ratelimited("Failed to register as misc device\n");
+		goto free_oppanel;
+	}
+
+	return 0;
+
+free_oppanel:
+	kfree(oppanel_lines);
+free_oppanel_data:
+	kfree(oppanel_data);
+	return rc;
+}
+
+static int oppanel_remove(struct platform_device *pdev)
+{
+	misc_deregister(&oppanel_dev);
+	kfree(oppanel_lines);
+	kfree(oppanel_data);
+	return 0;
+}
+
+static const struct of_device_id oppanel_match[] = {
+	{ .compatible = "ibm,opal-oppanel" },
+	{ },
+};
+
+static struct platform_driver oppanel_driver = {
+	.driver	= {
+		.name		= "powernv-op-panel",
+		.of_match_table	= oppanel_match,
+	},
+	.probe	= oppanel_probe,
+	.remove	= oppanel_remove,
+};
+
+module_platform_driver(oppanel_driver);
+
+MODULE_DEVICE_TABLE(of, oppanel_match);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("PowerNV Operator Panel LCD Display Driver");
+MODULE_AUTHOR("Suraj Jitindar Singh <sjitindarsingh@gmail.com>");
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index e12dc30d8864..f7ca891b5b59 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -20,7 +20,7 @@
 #include <asm/opal.h>
 #include <asm/runlatch.h>
 
-#define MAX_POWERNV_IDLE_STATES	8
+#define POWERNV_THRESHOLD_LATENCY_NS 200000
 
 struct cpuidle_driver powernv_idle_driver = {
 	.name             = "powernv_idle",
@@ -29,6 +29,9 @@ struct cpuidle_driver powernv_idle_driver = {
 
 static int max_idle_state;
 static struct cpuidle_state *cpuidle_state_table;
+
+static u64 stop_psscr_table[CPUIDLE_STATE_MAX];
+
 static u64 snooze_timeout;
 static bool snooze_timeout_en;
 
@@ -93,16 +96,27 @@ static int fastsleep_loop(struct cpuidle_device *dev,
 	return index;
 }
 #endif
+
+static int stop_loop(struct cpuidle_device *dev,
+		     struct cpuidle_driver *drv,
+		     int index)
+{
+	ppc64_runlatch_off();
+	power9_idle_stop(stop_psscr_table[index]);
+	ppc64_runlatch_on();
+	return index;
+}
+
 /*
  * States for dedicated partition case.
  */
-static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = {
+static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
 	{ /* Snooze */
 		.name = "snooze",
 		.desc = "snooze",
 		.exit_latency = 0,
 		.target_residency = 0,
-		.enter = &snooze_loop },
+		.enter = snooze_loop },
 };
 
 static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
@@ -168,7 +182,11 @@ static int powernv_add_idle_states(void)
 	struct device_node *power_mgt;
 	int nr_idle_states = 1; /* Snooze */
 	int dt_idle_states;
-	u32 *latency_ns, *residency_ns, *flags;
+	u32 latency_ns[CPUIDLE_STATE_MAX];
+	u32 residency_ns[CPUIDLE_STATE_MAX];
+	u32 flags[CPUIDLE_STATE_MAX];
+	u64 psscr_val[CPUIDLE_STATE_MAX];
+	const char *names[CPUIDLE_STATE_MAX];
 	int i, rc;
 
 	/* Currently we have snooze statically defined */
@@ -186,26 +204,55 @@ static int powernv_add_idle_states(void)
 		goto out;
 	}
 
-	flags = kzalloc(sizeof(*flags) * dt_idle_states, GFP_KERNEL);
+	/*
+	 * Since snooze is used as first idle state, max idle states allowed is
+	 * CPUIDLE_STATE_MAX -1
+	 */
+	if (dt_idle_states > CPUIDLE_STATE_MAX - 1) {
+		pr_warn("cpuidle-powernv: discovered idle states more than allowed");
+		dt_idle_states = CPUIDLE_STATE_MAX - 1;
+	}
+
 	if (of_property_read_u32_array(power_mgt,
 			"ibm,cpu-idle-state-flags", flags, dt_idle_states)) {
 		pr_warn("cpuidle-powernv : missing ibm,cpu-idle-state-flags in DT\n");
-		goto out_free_flags;
+		goto out;
 	}
 
-	latency_ns = kzalloc(sizeof(*latency_ns) * dt_idle_states, GFP_KERNEL);
-	rc = of_property_read_u32_array(power_mgt,
-		"ibm,cpu-idle-state-latencies-ns", latency_ns, dt_idle_states);
-	if (rc) {
+	if (of_property_read_u32_array(power_mgt,
+		"ibm,cpu-idle-state-latencies-ns", latency_ns,
+		dt_idle_states)) {
 		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-latencies-ns in DT\n");
-		goto out_free_latency;
+		goto out;
+	}
+	if (of_property_read_string_array(power_mgt,
+		"ibm,cpu-idle-state-names", names, dt_idle_states) < 0) {
+		pr_warn("cpuidle-powernv: missing ibm,cpu-idle-state-names in DT\n");
+		goto out;
 	}
 
-	residency_ns = kzalloc(sizeof(*residency_ns) * dt_idle_states, GFP_KERNEL);
+	/*
+	 * If the idle states use stop instruction, probe for psscr values
+	 * which are necessary to specify required stop level.
+	 */
+	if (flags[0] & (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP))
+		if (of_property_read_u64_array(power_mgt,
+		    "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
+			pr_warn("cpuidle-powernv: missing ibm,cpu-idle-states-psscr in DT\n");
+			goto out;
+		}
+
 	rc = of_property_read_u32_array(power_mgt,
 		"ibm,cpu-idle-state-residency-ns", residency_ns, dt_idle_states);
 
 	for (i = 0; i < dt_idle_states; i++) {
+		/*
+		 * If an idle state has exit latency beyond
+		 * POWERNV_THRESHOLD_LATENCY_NS then don't use it
+		 * in cpu-idle.
+		 */
+		if (latency_ns[i] > POWERNV_THRESHOLD_LATENCY_NS)
+			continue;
 
 		/*
 		 * Cpuidle accepts exit_latency and target_residency in us.
@@ -217,7 +264,17 @@ static int powernv_add_idle_states(void)
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
 			powernv_states[nr_idle_states].flags = 0;
 			powernv_states[nr_idle_states].target_residency = 100;
-			powernv_states[nr_idle_states].enter = &nap_loop;
+			powernv_states[nr_idle_states].enter = nap_loop;
+		} else if ((flags[i] & OPAL_PM_STOP_INST_FAST) &&
+				!(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
+			strncpy(powernv_states[nr_idle_states].name,
+				names[i], CPUIDLE_NAME_LEN);
+			strncpy(powernv_states[nr_idle_states].desc,
+				names[i], CPUIDLE_NAME_LEN);
+			powernv_states[nr_idle_states].flags = 0;
+
+			powernv_states[nr_idle_states].enter = stop_loop;
+			stop_psscr_table[nr_idle_states] = psscr_val[i];
 		}
 
 		/*
@@ -232,7 +289,17 @@ static int powernv_add_idle_states(void)
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
 			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
 			powernv_states[nr_idle_states].target_residency = 300000;
-			powernv_states[nr_idle_states].enter = &fastsleep_loop;
+			powernv_states[nr_idle_states].enter = fastsleep_loop;
+		} else if ((flags[i] & OPAL_PM_STOP_INST_DEEP) &&
+				(flags[i] & OPAL_PM_TIMEBASE_STOP)) {
+			strncpy(powernv_states[nr_idle_states].name,
+				names[i], CPUIDLE_NAME_LEN);
+			strncpy(powernv_states[nr_idle_states].desc,
+				names[i], CPUIDLE_NAME_LEN);
+
+			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
+			powernv_states[nr_idle_states].enter = stop_loop;
+			stop_psscr_table[nr_idle_states] = psscr_val[i];
 		}
 #endif
 		powernv_states[nr_idle_states].exit_latency =
@@ -245,12 +312,6 @@ static int powernv_add_idle_states(void)
 
 		nr_idle_states++;
 	}
-
-	kfree(residency_ns);
-out_free_latency:
-	kfree(latency_ns);
-out_free_flags:
-	kfree(flags);
 out:
 	return nr_idle_states;
 }
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
index 89d8208d9851..a83ead109d5f 100644
--- a/drivers/crypto/vmx/Kconfig
+++ b/drivers/crypto/vmx/Kconfig
@@ -1,7 +1,7 @@
 config CRYPTO_DEV_VMX_ENCRYPT
 	tristate "Encryption acceleration support on P8 CPU"
 	depends on CRYPTO_DEV_VMX
-	default y
+	default m
 	help
 	  Support for VMX cryptographic acceleration instructions on Power8 CPU.
 	  This module supports acceleration for AES and GHASH in hardware. If you
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
index f688c32fbcc7..31a98dc6f849 100644
--- a/drivers/crypto/vmx/vmx.c
+++ b/drivers/crypto/vmx/vmx.c
@@ -23,6 +23,7 @@
 #include <linux/moduleparam.h>
 #include <linux/types.h>
 #include <linux/err.h>
+#include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <asm/cputable.h>
 #include <crypto/internal/hash.h>
@@ -45,9 +46,6 @@ int __init p8_init(void)
 	int ret = 0;
 	struct crypto_alg **alg_it;
 
-	if (!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
-		return -ENODEV;
-
 	for (alg_it = algs; *alg_it; alg_it++) {
 		ret = crypto_register_alg(*alg_it);
 		printk(KERN_INFO "crypto_register_alg '%s' = %d\n",
@@ -80,7 +78,7 @@ void __exit p8_exit(void)
 	crypto_unregister_shash(&p8_ghash_alg);
 }
 
-module_init(p8_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init);
 module_exit(p8_exit);
 
 MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c
index 75dd6d041241..11e2a1fc10e9 100644
--- a/drivers/i2c/busses/i2c-opal.c
+++ b/drivers/i2c/busses/i2c-opal.c
@@ -71,7 +71,7 @@ static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req)
 	if (rc)
 		goto exit;
 
-	rc = be64_to_cpu(msg.params[1]);
+	rc = opal_get_async_rc(msg);
 	if (rc != OPAL_SUCCESS) {
 		rc = i2c_opal_translate_error(rc);
 		goto exit;
diff --git a/drivers/leds/leds-powernv.c b/drivers/leds/leds-powernv.c
index dfb8bd390125..b2a98c7b521b 100644
--- a/drivers/leds/leds-powernv.c
+++ b/drivers/leds/leds-powernv.c
@@ -118,7 +118,7 @@ static int powernv_led_set(struct powernv_led_data *powernv_led,
 		goto out_token;
 	}
 
-	rc = be64_to_cpu(msg.params[1]);
+	rc = opal_get_async_rc(msg);
 	if (rc != OPAL_SUCCESS)
 		dev_err(dev, "%s : OAPL async call returned failed [rc=%d]\n",
 			__func__, rc);
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index d531f804455d..d6f72c826c1c 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -38,6 +38,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
+#include <linux/memblock.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
@@ -99,6 +100,7 @@ static DEFINE_MUTEX(smu_mutex);
 static struct smu_device	*smu;
 static DEFINE_MUTEX(smu_part_access);
 static int smu_irq_inited;
+static unsigned long smu_cmdbuf_abs;
 
 static void smu_i2c_retry(unsigned long data);
 
@@ -479,8 +481,13 @@ int __init smu_init (void)
 
 	printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR);
 
+	/*
+	 * SMU based G5s need some memory below 2Gb. Thankfully this is
+	 * called at a time where memblock is still available.
+	 */
+	smu_cmdbuf_abs = memblock_alloc_base(4096, 4096, 0x80000000UL);
 	if (smu_cmdbuf_abs == 0) {
-		printk(KERN_ERR "SMU: Command buffer not allocated !\n");
+		printk(KERN_ERR "SMU: Command buffer allocation failed !\n");
 		ret = -EINVAL;
 		goto fail_np;
 	}
diff --git a/drivers/misc/cxl/Kconfig b/drivers/misc/cxl/Kconfig
index 8756d06e2bb8..b75cf830d08a 100644
--- a/drivers/misc/cxl/Kconfig
+++ b/drivers/misc/cxl/Kconfig
@@ -7,11 +7,7 @@ config CXL_BASE
 	default n
 	select PPC_COPRO_BASE
 
-config CXL_KERNEL_API
-	bool
-	default n
-
-config CXL_EEH
+config CXL_AFU_DRIVER_OPS
 	bool
 	default n
 
@@ -19,8 +15,7 @@ config CXL
 	tristate "Support for IBM Coherent Accelerators (CXL)"
 	depends on PPC_POWERNV && PCI_MSI && EEH
 	select CXL_BASE
-	select CXL_KERNEL_API
-	select CXL_EEH
+	select CXL_AFU_DRIVER_OPS
 	default m
 	help
 	  Select this option to enable driver support for IBM Coherent
@@ -33,3 +28,11 @@ config CXL
 	  CAPI adapters are found in POWER8 based systems.
 
 	  If unsure, say N.
+
+config CXL_BIMODAL
+	bool "Support for bi-modal CAPI cards"
+	depends on HOTPLUG_PCI_POWERNV = y && CXL || HOTPLUG_PCI_POWERNV = m && CXL = m
+	default y
+	help
+	  Select this option to enable support for bi-modal CAPI cards, such as
+	  the Mellanox CX-4.
diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile
index 8a55c1aa11aa..56e9a4732ef0 100644
--- a/drivers/misc/cxl/Makefile
+++ b/drivers/misc/cxl/Makefile
@@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC_WERROR)	+= -Werror
 
 cxl-y				+= main.o file.o irq.o fault.o native.o
 cxl-y				+= context.o sysfs.o debugfs.o pci.o trace.o
-cxl-y				+= vphb.o api.o
+cxl-y				+= vphb.o phb.o api.o
 cxl-$(CONFIG_PPC_PSERIES)	+= flash.o guest.o of.o hcalls.o
 obj-$(CONFIG_CXL)		+= cxl.o
 obj-$(CONFIG_CXL_BASE)		+= base.o
diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c
index 6d228ccd884d..f3d34b941f85 100644
--- a/drivers/misc/cxl/api.c
+++ b/drivers/misc/cxl/api.c
@@ -13,6 +13,8 @@
 #include <linux/file.h>
 #include <misc/cxl.h>
 #include <linux/fs.h>
+#include <asm/pnv-pci.h>
+#include <linux/msi.h>
 
 #include "cxl.h"
 
@@ -24,6 +26,8 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev)
 	int rc;
 
 	afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return ERR_CAST(afu);
 
 	ctx = cxl_context_alloc();
 	if (IS_ERR(ctx)) {
@@ -94,6 +98,42 @@ static irq_hw_number_t cxl_find_afu_irq(struct cxl_context *ctx, int num)
 	return 0;
 }
 
+int _cxl_next_msi_hwirq(struct pci_dev *pdev, struct cxl_context **ctx, int *afu_irq)
+{
+	if (*ctx == NULL || *afu_irq == 0) {
+		*afu_irq = 1;
+		*ctx = cxl_get_context(pdev);
+	} else {
+		(*afu_irq)++;
+		if (*afu_irq > cxl_get_max_irqs_per_process(pdev)) {
+			*ctx = list_next_entry(*ctx, extra_irq_contexts);
+			*afu_irq = 1;
+		}
+	}
+	return cxl_find_afu_irq(*ctx, *afu_irq);
+}
+/* Exported via cxl_base */
+
+int cxl_set_priv(struct cxl_context *ctx, void *priv)
+{
+	if (!ctx)
+		return -EINVAL;
+
+	ctx->priv = priv;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_priv);
+
+void *cxl_get_priv(struct cxl_context *ctx)
+{
+	if (!ctx)
+		return ERR_PTR(-EINVAL);
+
+	return ctx->priv;
+}
+EXPORT_SYMBOL_GPL(cxl_get_priv);
+
 int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 {
 	int res;
@@ -102,7 +142,10 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 	if (num == 0)
 		num = ctx->afu->pp_irqs;
 	res = afu_allocate_irqs(ctx, num);
-	if (!res && !cpu_has_feature(CPU_FTR_HVMODE)) {
+	if (res)
+		return res;
+
+	if (!cpu_has_feature(CPU_FTR_HVMODE)) {
 		/* In a guest, the PSL interrupt is not multiplexed. It was
 		 * allocated above, and we need to set its handler
 		 */
@@ -110,6 +153,13 @@ int cxl_allocate_afu_irqs(struct cxl_context *ctx, int num)
 		if (hwirq)
 			cxl_map_irq(ctx->afu->adapter, hwirq, cxl_ops->psl_interrupt, ctx, "psl");
 	}
+
+	if (ctx->status == STARTED) {
+		if (cxl_ops->update_ivtes)
+			cxl_ops->update_ivtes(ctx);
+		else WARN(1, "BUG: cxl_allocate_afu_irqs must be called prior to starting the context on this platform\n");
+	}
+
 	return res;
 }
 EXPORT_SYMBOL_GPL(cxl_allocate_afu_irqs);
@@ -323,6 +373,23 @@ struct cxl_context *cxl_fops_get_context(struct file *file)
 }
 EXPORT_SYMBOL_GPL(cxl_fops_get_context);
 
+void cxl_set_driver_ops(struct cxl_context *ctx,
+			struct cxl_afu_driver_ops *ops)
+{
+	WARN_ON(!ops->fetch_event || !ops->event_delivered);
+	atomic_set(&ctx->afu_driver_events, 0);
+	ctx->afu_driver_ops = ops;
+}
+EXPORT_SYMBOL_GPL(cxl_set_driver_ops);
+
+void cxl_context_events_pending(struct cxl_context *ctx,
+				unsigned int new_events)
+{
+	atomic_add(new_events, &ctx->afu_driver_events);
+	wake_up_all(&ctx->wq);
+}
+EXPORT_SYMBOL_GPL(cxl_context_events_pending);
+
 int cxl_start_work(struct cxl_context *ctx,
 		   struct cxl_ioctl_start_work *work)
 {
@@ -390,7 +457,106 @@ EXPORT_SYMBOL_GPL(cxl_perst_reloads_same_image);
 ssize_t cxl_read_adapter_vpd(struct pci_dev *dev, void *buf, size_t count)
 {
 	struct cxl_afu *afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return -ENODEV;
 
 	return cxl_ops->read_adapter_vpd(afu->adapter, buf, count);
 }
 EXPORT_SYMBOL_GPL(cxl_read_adapter_vpd);
+
+int cxl_set_max_irqs_per_process(struct pci_dev *dev, int irqs)
+{
+	struct cxl_afu *afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return -ENODEV;
+
+	if (irqs > afu->adapter->user_irqs)
+		return -EINVAL;
+
+	/* Limit user_irqs to prevent the user increasing this via sysfs */
+	afu->adapter->user_irqs = irqs;
+	afu->irqs_max = irqs;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_set_max_irqs_per_process);
+
+int cxl_get_max_irqs_per_process(struct pci_dev *dev)
+{
+	struct cxl_afu *afu = cxl_pci_to_afu(dev);
+	if (IS_ERR(afu))
+		return -ENODEV;
+
+	return afu->irqs_max;
+}
+EXPORT_SYMBOL_GPL(cxl_get_max_irqs_per_process);
+
+/*
+ * This is a special interrupt allocation routine called from the PHB's MSI
+ * setup function. When capi interrupts are allocated in this manner they must
+ * still be associated with a running context, but since the MSI APIs have no
+ * way to specify this we use the default context associated with the device.
+ *
+ * The Mellanox CX4 has a hardware limitation that restricts the maximum AFU
+ * interrupt number, so in order to overcome this their driver informs us of
+ * the restriction by setting the maximum interrupts per context, and we
+ * allocate additional contexts as necessary so that we can keep the AFU
+ * interrupt number within the supported range.
+ */
+int _cxl_cx4_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+	struct cxl_context *ctx, *new_ctx, *default_ctx;
+	int remaining;
+	int rc;
+
+	ctx = default_ctx = cxl_get_context(pdev);
+	if (WARN_ON(!default_ctx))
+		return -ENODEV;
+
+	remaining = nvec;
+	while (remaining > 0) {
+		rc = cxl_allocate_afu_irqs(ctx, min(remaining, ctx->afu->irqs_max));
+		if (rc) {
+			pr_warn("%s: Failed to find enough free MSIs\n", pci_name(pdev));
+			return rc;
+		}
+		remaining -= ctx->afu->irqs_max;
+
+		if (ctx != default_ctx && default_ctx->status == STARTED) {
+			WARN_ON(cxl_start_context(ctx,
+				be64_to_cpu(default_ctx->elem->common.wed),
+				NULL));
+		}
+
+		if (remaining > 0) {
+			new_ctx = cxl_dev_context_init(pdev);
+			if (!new_ctx) {
+				pr_warn("%s: Failed to allocate enough contexts for MSIs\n", pci_name(pdev));
+				return -ENOSPC;
+			}
+			list_add(&new_ctx->extra_irq_contexts, &ctx->extra_irq_contexts);
+			ctx = new_ctx;
+		}
+	}
+
+	return 0;
+}
+/* Exported via cxl_base */
+
+void _cxl_cx4_teardown_msi_irqs(struct pci_dev *pdev)
+{
+	struct cxl_context *ctx, *pos, *tmp;
+
+	ctx = cxl_get_context(pdev);
+	if (WARN_ON(!ctx))
+		return;
+
+	cxl_free_afu_irqs(ctx);
+	list_for_each_entry_safe(pos, tmp, &ctx->extra_irq_contexts, extra_irq_contexts) {
+		cxl_stop_context(pos);
+		cxl_free_afu_irqs(pos);
+		list_del(&pos->extra_irq_contexts);
+		cxl_release_context(pos);
+	}
+}
+/* Exported via cxl_base */
diff --git a/drivers/misc/cxl/base.c b/drivers/misc/cxl/base.c
index 9b90ec6c07cd..cd54ce6f6230 100644
--- a/drivers/misc/cxl/base.c
+++ b/drivers/misc/cxl/base.c
@@ -54,6 +54,19 @@ static inline void cxl_calls_put(struct cxl_calls *calls) { }
 
 #endif /* CONFIG_CXL_MODULE */
 
+/* AFU refcount management */
+struct cxl_afu *cxl_afu_get(struct cxl_afu *afu)
+{
+	return (get_device(&afu->dev) == NULL) ? NULL : afu;
+}
+EXPORT_SYMBOL_GPL(cxl_afu_get);
+
+void cxl_afu_put(struct cxl_afu *afu)
+{
+	put_device(&afu->dev);
+}
+EXPORT_SYMBOL_GPL(cxl_afu_put);
+
 void cxl_slbia(struct mm_struct *mm)
 {
 	struct cxl_calls *calls;
@@ -93,9 +106,92 @@ int cxl_update_properties(struct device_node *dn,
 }
 EXPORT_SYMBOL_GPL(cxl_update_properties);
 
+/*
+ * API calls into the driver that may be called from the PHB code and must be
+ * built in.
+ */
+bool cxl_pci_associate_default_context(struct pci_dev *dev, struct cxl_afu *afu)
+{
+	bool ret;
+	struct cxl_cal