summaryrefslogtreecommitdiffstats
path: root/drivers/misc/ocxl
diff options
context:
space:
mode:
authorFrederic Barrat <fbarrat@linux.vnet.ibm.com>2018-01-23 12:31:41 +0100
committerMichael Ellerman <mpe@ellerman.id.au>2018-01-24 11:42:58 +1100
commit5ef3166e8a32d78dfa985a323aa45ed485ff663a (patch)
treee1321e75dc2f802294f94d71aff7509057d01077 /drivers/misc/ocxl
parent2cb3d64b26984703a6bb80e66adcc3727ad37f9f (diff)
ocxl: Driver code for 'generic' opencapi devices
Add an ocxl driver to handle generic opencapi devices. Of course, it's not meant to be the only opencapi driver, any device is free to implement its own. But if a host application only needs basic services like attaching to an opencapi adapter, have translation faults handled or allocate AFU interrupts, it should suffice. The AFU config space must follow the opencapi specification and use the expected vendor/device ID to be seen by the generic driver. The driver exposes the device AFUs as a char device in /dev/ocxl/ Note that the driver currently doesn't handle memory attached to the opencapi device. Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> Signed-off-by: Alastair D'Silva <alastair@d-silva.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'drivers/misc/ocxl')
-rw-r--r--drivers/misc/ocxl/config.c712
-rw-r--r--drivers/misc/ocxl/context.c230
-rw-r--r--drivers/misc/ocxl/file.c398
-rw-r--r--drivers/misc/ocxl/link.c603
-rw-r--r--drivers/misc/ocxl/main.c33
-rw-r--r--drivers/misc/ocxl/ocxl_internal.h193
-rw-r--r--drivers/misc/ocxl/pasid.c107
-rw-r--r--drivers/misc/ocxl/pci.c585
-rw-r--r--drivers/misc/ocxl/sysfs.c142
9 files changed, 3003 insertions, 0 deletions
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
new file mode 100644
index 000000000000..ea8cca50ea06
--- /dev/null
+++ b/drivers/misc/ocxl/config.c
@@ -0,0 +1,712 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/pci.h>
+#include <asm/pnv-ocxl.h>
+#include <misc/ocxl-config.h>
+#include "ocxl_internal.h"
+
+#define EXTRACT_BIT(val, bit) (!!(val & BIT(bit)))
+#define EXTRACT_BITS(val, s, e) ((val & GENMASK(e, s)) >> s)
+
+#define OCXL_DVSEC_AFU_IDX_MASK GENMASK(5, 0)
+#define OCXL_DVSEC_ACTAG_MASK GENMASK(11, 0)
+#define OCXL_DVSEC_PASID_MASK GENMASK(19, 0)
+#define OCXL_DVSEC_PASID_LOG_MASK GENMASK(4, 0)
+
+#define OCXL_DVSEC_TEMPL_VERSION 0x0
+#define OCXL_DVSEC_TEMPL_NAME 0x4
+#define OCXL_DVSEC_TEMPL_AFU_VERSION 0x1C
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL 0x20
+#define OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ 0x28
+#define OCXL_DVSEC_TEMPL_MMIO_PP 0x30
+#define OCXL_DVSEC_TEMPL_MMIO_PP_SZ 0x38
+#define OCXL_DVSEC_TEMPL_MEM_SZ 0x3C
+#define OCXL_DVSEC_TEMPL_WWID 0x40
+
+#define OCXL_MAX_AFU_PER_FUNCTION 64
+#define OCXL_TEMPL_LEN 0x58
+#define OCXL_TEMPL_NAME_LEN 24
+#define OCXL_CFG_TIMEOUT 3
+
+static int find_dvsec(struct pci_dev *dev, int dvsec_id)
+{
+ int vsec = 0;
+ u16 vendor, id;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+ if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
+ return vsec;
+ }
+ return 0;
+}
+
+static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
+{
+ int vsec = 0;
+ u16 vendor, id;
+ u8 idx;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ OCXL_EXT_CAP_ID_DVSEC))) {
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
+ &vendor);
+ pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
+
+ if (vendor == PCI_VENDOR_ID_IBM &&
+ id == OCXL_DVSEC_AFU_CTRL_ID) {
+ pci_read_config_byte(dev,
+ vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
+ &idx);
+ if (idx == afu_idx)
+ return vsec;
+ }
+ }
+ return 0;
+}
+
+static int read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ u16 val;
+ int pos;
+
+ pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_PASID);
+ if (!pos) {
+ /*
+ * PASID capability is not mandatory, but there
+ * shouldn't be any AFU
+ */
+ dev_dbg(&dev->dev, "Function doesn't require any PASID\n");
+ fn->max_pasid_log = -1;
+ goto out;
+ }
+ pci_read_config_word(dev, pos + PCI_PASID_CAP, &val);
+ fn->max_pasid_log = EXTRACT_BITS(val, 8, 12);
+
+out:
+ dev_dbg(&dev->dev, "PASID capability:\n");
+ dev_dbg(&dev->dev, " Max PASID log = %d\n", fn->max_pasid_log);
+ return 0;
+}
+
+static int read_dvsec_tl(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int pos;
+
+ pos = find_dvsec(dev, OCXL_DVSEC_TL_ID);
+ if (!pos && PCI_FUNC(dev->devfn) == 0) {
+ dev_err(&dev->dev, "Can't find TL DVSEC\n");
+ return -ENODEV;
+ }
+ if (pos && PCI_FUNC(dev->devfn) != 0) {
+ dev_err(&dev->dev, "TL DVSEC is only allowed on function 0\n");
+ return -ENODEV;
+ }
+ fn->dvsec_tl_pos = pos;
+ return 0;
+}
+
+static int read_dvsec_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int pos, afu_present;
+ u32 val;
+
+ pos = find_dvsec(dev, OCXL_DVSEC_FUNC_ID);
+ if (!pos) {
+ dev_err(&dev->dev, "Can't find function DVSEC\n");
+ return -ENODEV;
+ }
+ fn->dvsec_function_pos = pos;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
+ afu_present = EXTRACT_BIT(val, 31);
+ if (!afu_present) {
+ fn->max_afu_index = -1;
+ dev_dbg(&dev->dev, "Function doesn't define any AFU\n");
+ goto out;
+ }
+ fn->max_afu_index = EXTRACT_BITS(val, 24, 29);
+
+out:
+ dev_dbg(&dev->dev, "Function DVSEC:\n");
+ dev_dbg(&dev->dev, " Max AFU index = %d\n", fn->max_afu_index);
+ return 0;
+}
+
+static int read_dvsec_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int pos;
+
+ if (fn->max_afu_index < 0) {
+ fn->dvsec_afu_info_pos = -1;
+ return 0;
+ }
+
+ pos = find_dvsec(dev, OCXL_DVSEC_AFU_INFO_ID);
+ if (!pos) {
+ dev_err(&dev->dev, "Can't find AFU information DVSEC\n");
+ return -ENODEV;
+ }
+ fn->dvsec_afu_info_pos = pos;
+ return 0;
+}
+
+static int read_dvsec_vendor(struct pci_dev *dev)
+{
+ int pos;
+ u32 cfg, tlx, dlx;
+
+ /*
+ * vendor specific DVSEC is optional
+ *
+ * It's currently only used on function 0 to specify the
+ * version of some logic blocks. Some older images may not
+ * even have it so we ignore any errors
+ */
+ if (PCI_FUNC(dev->devfn) != 0)
+ return 0;
+
+ pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+ if (!pos)
+ return 0;
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+
+ dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
+ dev_dbg(&dev->dev, " CFG version = 0x%x\n", cfg);
+ dev_dbg(&dev->dev, " TLX version = 0x%x\n", tlx);
+ dev_dbg(&dev->dev, " DLX version = 0x%x\n", dlx);
+ return 0;
+}
+
+static int validate_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ if (fn->max_pasid_log == -1 && fn->max_afu_index >= 0) {
+ dev_err(&dev->dev,
+ "AFUs are defined but no PASIDs are requested\n");
+ return -EINVAL;
+ }
+
+ if (fn->max_afu_index > OCXL_MAX_AFU_PER_FUNCTION) {
+ dev_err(&dev->dev,
+ "Max AFU index out of architectural limit (%d vs %d)\n",
+ fn->max_afu_index, OCXL_MAX_AFU_PER_FUNCTION);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ocxl_config_read_function(struct pci_dev *dev, struct ocxl_fn_config *fn)
+{
+ int rc;
+
+ rc = read_pasid(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev, "Invalid PASID configuration: %d\n", rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_tl(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Invalid Transaction Layer DVSEC configuration: %d\n",
+ rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_function(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Invalid Function DVSEC configuration: %d\n", rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_afu_info(dev, fn);
+ if (rc) {
+ dev_err(&dev->dev, "Invalid AFU configuration: %d\n", rc);
+ return -ENODEV;
+ }
+
+ rc = read_dvsec_vendor(dev);
+ if (rc) {
+ dev_err(&dev->dev,
+ "Invalid vendor specific DVSEC configuration: %d\n",
+ rc);
+ return -ENODEV;
+ }
+
+ rc = validate_function(dev, fn);
+ return rc;
+}
+
+static int read_afu_info(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ int offset, u32 *data)
+{
+ u32 val;
+ unsigned long timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+ int pos = fn->dvsec_afu_info_pos;
+
+ /* Protect 'data valid' bit */
+ if (EXTRACT_BIT(offset, 31)) {
+ dev_err(&dev->dev, "Invalid offset in AFU info DVSEC\n");
+ return -EINVAL;
+ }
+
+ pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, offset);
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+ while (!EXTRACT_BIT(val, 31)) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_err(&dev->dev,
+ "Timeout while reading AFU info DVSEC (offset=%d)\n",
+ offset);
+ return -EBUSY;
+ }
+ cpu_relax();
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_OFF, &val);
+ }
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_INFO_DATA, data);
+ return 0;
+}
+
+int ocxl_config_check_afu_index(struct pci_dev *dev,
+ struct ocxl_fn_config *fn, int afu_idx)
+{
+ u32 val;
+ int rc, templ_major, templ_minor, len;
+
+ pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
+ if (rc)
+ return rc;
+
+ /* AFU index map can have holes */
+ if (!val)
+ return 0;
+
+ templ_major = EXTRACT_BITS(val, 8, 15);
+ templ_minor = EXTRACT_BITS(val, 0, 7);
+ dev_dbg(&dev->dev, "AFU descriptor template version %d.%d\n",
+ templ_major, templ_minor);
+
+ len = EXTRACT_BITS(val, 16, 31);
+ if (len != OCXL_TEMPL_LEN) {
+ dev_warn(&dev->dev,
+ "Unexpected template length in AFU information (%#x)\n",
+ len);
+ }
+ return 1;
+}
+
+static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu)
+{
+ int i, rc;
+ u32 val, *ptr;
+
+ BUILD_BUG_ON(OCXL_AFU_NAME_SZ < OCXL_TEMPL_NAME_LEN);
+ for (i = 0; i < OCXL_TEMPL_NAME_LEN; i += 4) {
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_NAME + i, &val);
+ if (rc)
+ return rc;
+ ptr = (u32 *) &afu->name[i];
+ *ptr = val;
+ }
+ afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
+ return 0;
+}
+
+static int read_afu_mmio(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu)
+{
+ int rc;
+ u32 val;
+
+ /*
+ * Global MMIO
+ */
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL, &val);
+ if (rc)
+ return rc;
+ afu->global_mmio_bar = EXTRACT_BITS(val, 0, 2);
+ afu->global_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL + 4, &val);
+ if (rc)
+ return rc;
+ afu->global_mmio_offset += (u64) val << 32;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_GLOBAL_SZ, &val);
+ if (rc)
+ return rc;
+ afu->global_mmio_size = val;
+
+ /*
+ * Per-process MMIO
+ */
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP, &val);
+ if (rc)
+ return rc;
+ afu->pp_mmio_bar = EXTRACT_BITS(val, 0, 2);
+ afu->pp_mmio_offset = EXTRACT_BITS(val, 16, 31) << 16;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP + 4, &val);
+ if (rc)
+ return rc;
+ afu->pp_mmio_offset += (u64) val << 32;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MMIO_PP_SZ, &val);
+ if (rc)
+ return rc;
+ afu->pp_mmio_stride = val;
+
+ return 0;
+}
+
+static int read_afu_control(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+ int pos;
+ u8 val8;
+ u16 val16;
+
+ pos = find_dvsec_afu_ctrl(dev, afu->idx);
+ if (!pos) {
+ dev_err(&dev->dev, "Can't find AFU control DVSEC for AFU %d\n",
+ afu->idx);
+ return -ENODEV;
+ }
+ afu->dvsec_afu_control_pos = pos;
+
+ pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_SUP, &val8);
+ afu->pasid_supported_log = EXTRACT_BITS(val8, 0, 4);
+
+ pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, &val16);
+ afu->actag_supported = EXTRACT_BITS(val16, 0, 11);
+ return 0;
+}
+
+static bool char_allowed(int c)
+{
+ /*
+ * Permitted Characters : Alphanumeric, hyphen, underscore, comma
+ */
+ if ((c >= 0x30 && c <= 0x39) /* digits */ ||
+ (c >= 0x41 && c <= 0x5A) /* upper case */ ||
+ (c >= 0x61 && c <= 0x7A) /* lower case */ ||
+ c == 0 /* NULL */ ||
+ c == 0x2D /* - */ ||
+ c == 0x5F /* _ */ ||
+ c == 0x2C /* , */)
+ return true;
+ return false;
+}
+
+static int validate_afu(struct pci_dev *dev, struct ocxl_afu_config *afu)
+{
+ int i;
+
+ if (!afu->name[0]) {
+ dev_err(&dev->dev, "Empty AFU name\n");
+ return -EINVAL;
+ }
+ for (i = 0; i < OCXL_TEMPL_NAME_LEN; i++) {
+ if (!char_allowed(afu->name[i])) {
+ dev_err(&dev->dev,
+ "Invalid character in AFU name\n");
+ return -EINVAL;
+ }
+ }
+
+ if (afu->global_mmio_bar != 0 &&
+ afu->global_mmio_bar != 2 &&
+ afu->global_mmio_bar != 4) {
+ dev_err(&dev->dev, "Invalid global MMIO bar number\n");
+ return -EINVAL;
+ }
+ if (afu->pp_mmio_bar != 0 &&
+ afu->pp_mmio_bar != 2 &&
+ afu->pp_mmio_bar != 4) {
+ dev_err(&dev->dev, "Invalid per-process MMIO bar number\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int ocxl_config_read_afu(struct pci_dev *dev, struct ocxl_fn_config *fn,
+ struct ocxl_afu_config *afu, u8 afu_idx)
+{
+ int rc;
+ u32 val32;
+
+ /*
+ * First, we need to write the AFU idx for the AFU we want to
+ * access.
+ */
+ WARN_ON((afu_idx & OCXL_DVSEC_AFU_IDX_MASK) != afu_idx);
+ afu->idx = afu_idx;
+ pci_write_config_byte(dev,
+ fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+ afu->idx);
+
+ rc = read_afu_name(dev, fn, afu);
+ if (rc)
+ return rc;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_AFU_VERSION, &val32);
+ if (rc)
+ return rc;
+ afu->version_major = EXTRACT_BITS(val32, 24, 31);
+ afu->version_minor = EXTRACT_BITS(val32, 16, 23);
+ afu->afuc_type = EXTRACT_BITS(val32, 14, 15);
+ afu->afum_type = EXTRACT_BITS(val32, 12, 13);
+ afu->profile = EXTRACT_BITS(val32, 0, 7);
+
+ rc = read_afu_mmio(dev, fn, afu);
+ if (rc)
+ return rc;
+
+ rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_MEM_SZ, &val32);
+ if (rc)
+ return rc;
+ afu->log_mem_size = EXTRACT_BITS(val32, 0, 7);
+
+ rc = read_afu_control(dev, afu);
+ if (rc)
+ return rc;
+
+ dev_dbg(&dev->dev, "AFU configuration:\n");
+ dev_dbg(&dev->dev, " name = %s\n", afu->name);
+ dev_dbg(&dev->dev, " version = %d.%d\n", afu->version_major,
+ afu->version_minor);
+ dev_dbg(&dev->dev, " global mmio bar = %hhu\n", afu->global_mmio_bar);
+ dev_dbg(&dev->dev, " global mmio offset = %#llx\n",
+ afu->global_mmio_offset);
+ dev_dbg(&dev->dev, " global mmio size = %#x\n", afu->global_mmio_size);
+ dev_dbg(&dev->dev, " pp mmio bar = %hhu\n", afu->pp_mmio_bar);
+ dev_dbg(&dev->dev, " pp mmio offset = %#llx\n", afu->pp_mmio_offset);
+ dev_dbg(&dev->dev, " pp mmio stride = %#x\n", afu->pp_mmio_stride);
+ dev_dbg(&dev->dev, " mem size (log) = %hhu\n", afu->log_mem_size);
+ dev_dbg(&dev->dev, " pasid supported (log) = %u\n",
+ afu->pasid_supported_log);
+ dev_dbg(&dev->dev, " actag supported = %u\n",
+ afu->actag_supported);
+
+ rc = validate_afu(dev, afu);
+ return rc;
+}
+
+int ocxl_config_get_actag_info(struct pci_dev *dev, u16 *base, u16 *enabled,
+ u16 *supported)
+{
+ int rc;
+
+ /*
+ * This is really a simple wrapper for the kernel API, to
+ * avoid an external driver using ocxl as a library to call
+ * platform-dependent code
+ */
+ rc = pnv_ocxl_get_actag(dev, base, enabled, supported);
+ if (rc) {
+ dev_err(&dev->dev, "Can't get actag for device: %d\n", rc);
+ return rc;
+ }
+ return 0;
+}
+
+void ocxl_config_set_afu_actag(struct pci_dev *dev, int pos, int actag_base,
+ int actag_count)
+{
+ u16 val;
+
+ val = actag_count & OCXL_DVSEC_ACTAG_MASK;
+ pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_EN, val);
+
+ val = actag_base & OCXL_DVSEC_ACTAG_MASK;
+ pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_BASE, val);
+}
+
+int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count)
+{
+ return pnv_ocxl_get_pasid_count(dev, count);
+}
+
+void ocxl_config_set_afu_pasid(struct pci_dev *dev, int pos, int pasid_base,
+ u32 pasid_count_log)
+{
+ u8 val8;
+ u32 val32;
+
+ val8 = pasid_count_log & OCXL_DVSEC_PASID_LOG_MASK;
+ pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_EN, val8);
+
+ pci_read_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+ &val32);
+ val32 &= ~OCXL_DVSEC_PASID_MASK;
+ val32 |= pasid_base & OCXL_DVSEC_PASID_MASK;
+ pci_write_config_dword(dev, pos + OCXL_DVSEC_AFU_CTRL_PASID_BASE,
+ val32);
+}
+
+void ocxl_config_set_afu_state(struct pci_dev *dev, int pos, int enable)
+{
+ u8 val;
+
+ pci_read_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, &val);
+ if (enable)
+ val |= 1;
+ else
+ val &= 0xFE;
+ pci_write_config_byte(dev, pos + OCXL_DVSEC_AFU_CTRL_ENABLE, val);
+}
+
+int ocxl_config_set_TL(struct pci_dev *dev, int tl_dvsec)
+{
+ u32 val;
+ __be32 *be32ptr;
+ u8 timers;
+ int i, rc;
+ long recv_cap;
+ char *recv_rate;
+
+ /*
+ * Skip on function != 0, as the TL can only be defined on 0
+ */
+ if (PCI_FUNC(dev->devfn) != 0)
+ return 0;
+
+ recv_rate = kzalloc(PNV_OCXL_TL_RATE_BUF_SIZE, GFP_KERNEL);
+ if (!recv_rate)
+ return -ENOMEM;
+ /*
+ * The spec defines 64 templates for messages in the
+ * Transaction Layer (TL).
+ *
+ * The host and device each support a subset, so we need to
+ * configure the transmitters on each side to send only
+ * templates the receiver understands, at a rate the receiver
+ * can process. Per the spec, template 0 must be supported by
+ * everybody. That's the template which has been used by the
+ * host and device so far.
+ *
+ * The sending rate limit must be set before the template is
+ * enabled.
+ */
+
+ /*
+ * Device -> host
+ */
+ rc = pnv_ocxl_get_tl_cap(dev, &recv_cap, recv_rate,
+ PNV_OCXL_TL_RATE_BUF_SIZE);
+ if (rc)
+ goto out;
+
+ for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+ be32ptr = (__be32 *) &recv_rate[i];
+ pci_write_config_dword(dev,
+ tl_dvsec + OCXL_DVSEC_TL_SEND_RATE + i,
+ be32_to_cpu(*be32ptr));
+ }
+ val = recv_cap >> 32;
+ pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP, val);
+ val = recv_cap & GENMASK(31, 0);
+ pci_write_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_SEND_CAP + 4, val);
+
+ /*
+ * Host -> device
+ */
+ for (i = 0; i < PNV_OCXL_TL_RATE_BUF_SIZE; i += 4) {
+ pci_read_config_dword(dev,
+ tl_dvsec + OCXL_DVSEC_TL_RECV_RATE + i,
+ &val);
+ be32ptr = (__be32 *) &recv_rate[i];
+ *be32ptr = cpu_to_be32(val);
+ }
+ pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP, &val);
+ recv_cap = (long) val << 32;
+ pci_read_config_dword(dev, tl_dvsec + OCXL_DVSEC_TL_RECV_CAP + 4, &val);
+ recv_cap |= val;
+
+ rc = pnv_ocxl_set_tl_conf(dev, recv_cap, __pa(recv_rate),
+ PNV_OCXL_TL_RATE_BUF_SIZE);
+ if (rc)
+ goto out;
+
+ /*
+ * Opencapi commands needing to be retried are classified per
+ * the TL in 2 groups: short and long commands.
+ *
+ * The short back off timer it not used for now. It will be
+ * for opencapi 4.0.
+ *
+ * The long back off timer is typically used when an AFU hits
+ * a page fault but the NPU is already processing one. So the
+ * AFU needs to wait before it can resubmit. Having a value
+ * too low doesn't break anything, but can generate extra
+ * traffic on the link.
+ * We set it to 1.6 us for now. It's shorter than, but in the
+ * same order of magnitude as the time spent to process a page
+ * fault.
+ */
+ timers = 0x2 << 4; /* long timer = 1.6 us */
+ pci_write_config_byte(dev, tl_dvsec + OCXL_DVSEC_TL_BACKOFF_TIMERS,
+ timers);
+
+ rc = 0;
+out:
+ kfree(recv_rate);
+ return rc;
+}
+
+int ocxl_config_terminate_pasid(struct pci_dev *dev, int afu_control, int pasid)
+{
+ u32 val;
+ unsigned long timeout;
+
+ pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ &val);
+ if (EXTRACT_BIT(val, 20)) {
+ dev_err(&dev->dev,
+ "Can't terminate PASID %#x, previous termination didn't complete\n",
+ pasid);
+ return -EBUSY;
+ }
+
+ val &= ~OCXL_DVSEC_PASID_MASK;
+ val |= pasid & OCXL_DVSEC_PASID_MASK;
+ val |= BIT(20);
+ pci_write_config_dword(dev,
+ afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ val);
+
+ timeout = jiffies + (HZ * OCXL_CFG_TIMEOUT);
+ pci_read_config_dword(dev, afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ &val);
+ while (EXTRACT_BIT(val, 20)) {
+ if (time_after_eq(jiffies, timeout)) {
+ dev_err(&dev->dev,
+ "Timeout while waiting for AFU to terminate PASID %#x\n",
+ pasid);
+ return -EBUSY;
+ }
+ cpu_relax();
+ pci_read_config_dword(dev,
+ afu_control + OCXL_DVSEC_AFU_CTRL_TERM_PASID,
+ &val);
+ }
+ return 0;
+}
+
+void ocxl_config_set_actag(struct pci_dev *dev, int func_dvsec, u32 tag_first,
+ u32 tag_count)
+{
+ u32 val;
+
+ val = (tag_first & OCXL_DVSEC_ACTAG_MASK) << 16;
+ val |= tag_count & OCXL_DVSEC_ACTAG_MASK;
+ pci_write_config_dword(dev, func_dvsec + OCXL_DVSEC_FUNC_OFF_ACTAG,
+ val);
+}
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
new file mode 100644
index 000000000000..b34b836f924c
--- /dev/null
+++ b/drivers/misc/ocxl/context.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/sched/mm.h>
+#include "ocxl_internal.h"
+
+struct ocxl_context *ocxl_context_alloc(void)
+{
+ return kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
+}
+
+int ocxl_context_init(struct ocxl_context *ctx, struct ocxl_afu *afu,
+ struct address_space *mapping)
+{
+ int pasid;
+
+ ctx->afu = afu;
+ mutex_lock(&afu->contexts_lock);
+ pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
+ afu->pasid_base + afu->pasid_max, GFP_KERNEL);
+ if (pasid < 0) {
+ mutex_unlock(&afu->contexts_lock);
+ return pasid;
+ }
+ afu->pasid_count++;
+ mutex_unlock(&afu->contexts_lock);
+
+ ctx->pasid = pasid;
+ ctx->status = OPENED;
+ mutex_init(&ctx->status_mutex);
+ ctx->mapping = mapping;
+ mutex_init(&ctx->mapping_lock);
+ init_waitqueue_head(&ctx->events_wq);
+ mutex_init(&ctx->xsl_error_lock);
+ /*
+ * Keep a reference on the AFU to make sure it's valid for the
+ * duration of the life of the context
+ */
+ ocxl_afu_get(afu);
+ return 0;
+}
+
+/*
+ * Callback for when a translation fault triggers an error
+ * data: a pointer to the context which triggered the fault
+ * addr: the address that triggered the error
+ * dsisr: the value of the PPC64 dsisr register
+ */
+static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
+{
+ struct ocxl_context *ctx = (struct ocxl_context *) data;
+
+ mutex_lock(&ctx->xsl_error_lock);
+ ctx->xsl_error.addr = addr;
+ ctx->xsl_error.dsisr = dsisr;
+ ctx->xsl_error.count++;
+ mutex_unlock(&ctx->xsl_error_lock);
+
+ wake_up_all(&ctx->events_wq);
+}
+
+int ocxl_context_attach(struct ocxl_context *ctx, u64 amr)
+{
+ int rc;
+
+ mutex_lock(&ctx->status_mutex);
+ if (ctx->status != OPENED) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
+ current->mm->context.id, 0, amr, current->mm,
+ xsl_fault_error, ctx);
+ if (rc)
+ goto out;
+
+ ctx->status = ATTACHED;
+out:
+ mutex_unlock(&ctx->status_mutex);
+ return rc;
+}
+
+static int map_pp_mmio(struct vm_area_struct *vma, unsigned long address,
+ u64 offset, struct ocxl_context *ctx)
+{
+ u64 pp_mmio_addr;
+ int pasid_off;
+
+ if (offset >= ctx->afu->config.pp_mmio_stride)
+ return VM_FAULT_SIGBUS;
+
+ mutex_lock(&ctx->status_mutex);
+ if (ctx->status != ATTACHED) {
+ mutex_unlock(&ctx->status_mutex);
+ pr_debug("%s: Context not attached, failing mmio mmap\n",
+ __func__);
+ return VM_FAULT_SIGBUS;
+ }
+
+ pasid_off = ctx->pasid - ctx->afu->pasid_base;
+ pp_mmio_addr = ctx->afu->pp_mmio_start +
+ pasid_off * ctx->afu->config.pp_mmio_stride +
+ offset;
+
+ vm_insert_pfn(vma, address, pp_mmio_addr >> PAGE_SHIFT);
+ mutex_unlock(&ctx->status_mutex);
+ return VM_FAULT_NOPAGE;
+}
+
+static int ocxl_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ocxl_context *ctx = vma->vm_file->private_data;
+ u64 offset;
+ int rc;
+
+ offset = vmf->pgoff << PAGE_SHIFT;
+ pr_debug("%s: pasid %d address 0x%lx offset 0x%llx\n", __func__,
+ ctx->pasid, vmf->address, offset);
+
+ rc = map_pp_mmio(vma, vmf->address, offset, ctx);
+ return rc;
+}
+
+static const struct vm_operations_struct ocxl_vmops = {
+ .fault = ocxl_mmap_fault,
+};
+
+static int check_mmap_mmio(struct ocxl_context *ctx,
+ struct vm_area_struct *vma)
+{
+ if ((vma_pages(vma) + vma->vm_pgoff) >
+ (ctx->afu->config.pp_mmio_stride >> PAGE_SHIFT))
+ return -EINVAL;
+ return 0;
+}
+
+int ocxl_context_mmap(struct ocxl_context *ctx, struct vm_area_struct *vma)
+{
+ int rc;
+
+ rc = check_mmap_mmio(ctx, vma);
+ if (rc)
+ return rc;
+
+ vma->vm_flags |= VM_IO | VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_ops = &ocxl_vmops;
+ return 0;
+}
+
+int ocxl_context_detach(struct ocxl_context *ctx)
+{
+ struct pci_dev *dev;
+ int afu_control_pos;
+ enum ocxl_context_status status;
+ int rc;
+
+ mutex_lock(&ctx->status_mutex);
+ status = ctx->status;
+ ctx->status = CLOSED;
+ mutex_unlock(&ctx->status_mutex);
+ if (status != ATTACHED)
+ return 0;
+
+ dev = to_pci_dev(ctx->afu->fn->dev.parent);
+ afu_control_pos = ctx->afu->config.dvsec_afu_control_pos;
+
+ mutex_lock(&ctx->afu->afu_control_lock);
+ rc = ocxl_config_terminate_pasid(dev, afu_control_pos, ctx->pasid);
+ mutex_unlock(&ctx->afu->afu_control_lock);
+ if (rc) {
+ /*
+ * If we timeout waiting for the AFU to terminate the
+ * pasid, then it's dangerous to clean up the Process
+ * Element entry in the SPA, as it may be referenced
+ * in the future by the AFU. In which case, we would
+ * checkstop because of an invalid PE access (FIR
+ * register 2, bit 42). So leave the PE
+ * defined. Caller shouldn't free the context so that
+ * PASID remains allocated.
+ *
+ * A link reset will be required to cleanup the AFU
+ * and the SPA.
+ */
+ if (rc == -EBUSY)
+ return rc;
+ }
+ rc = ocxl_link_remove_pe(ctx->afu->fn->link, ctx->pasid);
+ if (rc) {
+ dev_warn(&ctx->afu->dev,
+ "Couldn't remove PE entry cleanly: %d\n", rc);
+ }
+ return 0;
+}
+
+void ocxl_context_detach_all(struct ocxl_afu *afu)
+{
+ struct ocxl_context *ctx;
+ int tmp;
+
+ mutex_lock(&afu->contexts_lock);
+ idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
+ ocxl_context_detach(ctx);
+ /*
+ * We are force detaching - remove any active mmio
+ * mappings so userspace cannot interfere with the
+ * card if it comes back. Easiest way to exercise
+ * this is to unbind and rebind the driver via sysfs
+ * while it is in use.
+ */
+ mutex_lock(&ctx->mapping_lock);
+ if (ctx->mapping)
+ unmap_mapping_range(ctx->mapping, 0, 0, 1);
+ mutex_unlock(&ctx->mapping_lock);
+ }
+ mutex_unlock(&afu->contexts_lock);
+}
+
+void ocxl_context_free(struct ocxl_context *ctx)
+{
+ mutex_lock(&ctx->afu->contexts_lock);
+ ctx->afu->pasid_count--;
+ idr_remove(&ctx->afu->contexts_idr, ctx->pasid);
+ mutex_unlock(&ctx->afu->contexts_lock);
+
+ /* reference to the AFU taken in ocxl_context_init */
+ ocxl_afu_put(ctx->afu);
+ kfree(ctx);
+}
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
new file mode 100644
index 000000000000..6f0befda6a8a
--- /dev/null
+++ b/drivers/misc/ocxl/file.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright 2017 IBM Corp.
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+#include <uapi/misc/ocxl.h>
+#include "ocxl_internal.h"
+
+
+#define OCXL_NUM_MINORS 256 /* Total to reserve */
+
+static dev_t ocxl_dev;
+static struct class *ocxl_class;
+static struct mutex minors_idr_lock;
+static struct idr minors_idr;
+
+static struct ocxl_afu *find_and_get_afu(dev_t devno)
+{
+ struct ocxl_afu *afu;
+ int afu_minor;
+
+ afu_minor = MINOR(devno);
+ /*
+ * We don't declare an RCU critical section here, as our AFU
+ * is protected by a reference counter on the device. By the time the
+ * minor number of a device is removed from the idr, the ref count of
+ * the device is already at 0, so no user API will access that AFU and
+ * this function can't return it.
+ */
+ afu = idr_find(&minors_idr, afu_minor);
+ if (afu)
+ ocxl_afu_get(afu);
+ return afu;
+}
+
+static int allocate_afu_minor(struct ocxl_afu *afu)
+{
+ int minor;
+
+ mutex_lock(&minors_idr_lock);
+ minor = idr_alloc(&minors_idr, afu, 0, OCXL_NUM_MINORS, GFP_KERNEL);
+ mutex_unlock(&minors_idr_lock);
+ return minor;
+}
+
+static void free_afu_minor(struct ocxl_afu *afu)
+{
+ mutex_lock(&minors_idr_lock);
+ idr_remove(&minors_idr, MINOR(afu->dev.devt));
+ mutex_unlock(&minors_idr_lock);
+}
+
+static int afu_open(struct inode *inode, struct file *file)
+{
+ struct ocxl_afu *afu;
+ struct ocxl_context *ctx;
+ int rc;
+
+ pr_debug("%s for device %x\n", __func__, inode->i_rdev);
+
+ afu = find_and_get_afu(inode->i_rdev);
+ if (!afu)
+ return -ENODEV;
+
+ ctx = ocxl_context_alloc();
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto put_afu;
+ }
+
+ rc = ocxl_context_init(ctx, afu, inode->i_mapping);
+ if (rc)
+ goto put_afu;
+ file->private_data = ctx;
+ ocxl_afu_put(afu);
+ return 0;
+
+put_afu:
+ ocxl_afu_put(afu);
+ return rc;
+}
+
+static long afu_ioctl_attach(struct ocxl_context *ctx,
+ struct ocxl_ioctl_attach __user *uarg)
+{
+ struct ocxl_ioctl_attach arg;
+ u64 amr = 0;
+ int rc;
+
+ pr_debug("%s for context %d\n", __func__, ctx->pasid);
+
+ if (copy_from_user(&arg, uarg, sizeof(arg)))
+ return -EFAULT;
+
+ /* Make sure reserved fields are not set for forward compatibility */
+ if (arg.reserved1 || arg.reserved2 || arg.reserved3)
+ return -EINVAL;
+
+ amr = arg.amr & mfspr(SPRN_UAMOR);
+ rc = ocxl_context_attach(ctx, amr);
+ return rc;
+}
+
+#define CMD_STR(x) (x == OCXL_IOCTL_ATTACH ? "ATTACH" : \
+ "UNKNOWN")
+
+static long afu_ioctl(struct file *file, unsigned int cmd,
+ unsigned long args)
+{
+ struct ocxl_context *ctx = file->private_data;
+ long rc;
+
+ pr_debug("%s for context %d, command %s\n", __func__, ctx->pasid,
+ CMD_STR(cmd));
+
+ if (ctx->status == CLOSED)
+ return -EIO;
+
+ switch (cmd) {
+ case OCXL_IOCTL_ATTACH:
+ rc = afu_ioctl_attach(ctx,
+ (struct ocxl_ioctl_attach __user *) args);
+ break;
+
+ default: