summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSergey Temerkhanov <s.temerkhanov@gmail.com>2017-03-24 22:28:37 +0000
committerBorislav Petkov <bp@suse.de>2017-03-27 11:43:56 +0200
commit41003396f932d7f027725c7acebb6a7caa41dc3e (patch)
tree85fb52f0424c586df0344c3a58704e233d13dccc
parent819f60fb7db169d851186d04e571e9bca27321e8 (diff)
EDAC, thunderx: Add Cavium ThunderX EDAC driver
Add support for Cavium ThunderX EDAC capable on-chip peripherals, namely the DRAM controller (LMC), cache coherent processor interconnect (CCPI) and level 2 cache blocks (L2C-TAD, L2C-MCI, L2C-CBC) Signed-off-by: Sergey Temerkhanov <s.temerkhanov@gmail.com> Cc: David.Daney@cavium.com Cc: Jan.Glauber@cavium.com Cc: linux-edac <linux-edac@vger.kernel.org> Link: http://lkml.kernel.org/r/20170324222837.60583-1-s.temerkhanov@gmail.com Signed-off-by: Borislav Petkov <bp@suse.de>
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/edac/Kconfig11
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/thunderx_edac.c2183
4 files changed, 2196 insertions, 0 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 572fe4252ac4..af89e5d0912b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4695,6 +4695,7 @@ L: linux-edac@vger.kernel.org
L: linux-mips@linux-mips.org
S: Supported
F: drivers/edac/octeon_edac*
+F: drivers/edac/thunderx_edac*
EDAC-E752X
M: Mark Gross <mark.gross@intel.com>
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 4773f2867234..7c68e6f955c7 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -392,6 +392,17 @@ config EDAC_OCTEON_PCI
Support for error detection and correction on the
Cavium Octeon family of SOCs.
+config EDAC_THUNDERX
+ tristate "Cavium ThunderX EDAC"
+ depends on EDAC_MM_EDAC
+ depends on ARM64
+ depends on PCI
+ help
+ Support for error detection and correction on the
+ Cavium ThunderX memory controllers (LMC), Cache
+ Coherent Processor Interconnect (CCPI) and L2 cache
+ blocks (TAD, CBC, MCI).
+
config EDAC_ALTERA
bool "Altera SOCFPGA ECC"
depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 587107e90996..52d735f29073 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_EDAC_OCTEON_PC) += octeon_edac-pc.o
obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o
obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o
obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o
+obj-$(CONFIG_EDAC_THUNDERX) += thunderx_edac.o
obj-$(CONFIG_EDAC_ALTERA) += altera_edac.o
obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c
new file mode 100644
index 000000000000..b5fe6894df5e
--- /dev/null
+++ b/drivers/edac/thunderx_edac.c
@@ -0,0 +1,2183 @@
+/*
+ * Cavium ThunderX memory controller kernel module
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/string.h>
+#include <linux/stop_machine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+
+#include <asm/page.h>
+
+#include "edac_module.h"
+
+#define phys_to_pfn(phys) (PFN_DOWN(phys))
+
+#define THUNDERX_NODE GENMASK(45, 44)
+
+enum {
+ ERR_CORRECTED = 1,
+ ERR_UNCORRECTED = 2,
+ ERR_UNKNOWN = 3,
+};
+
+#define MAX_SYNDROME_REGS 4
+
+struct error_syndrome {
+ u64 reg[MAX_SYNDROME_REGS];
+};
+
+struct error_descr {
+ int type;
+ u64 mask;
+ char *descr;
+};
+
+static void decode_register(char *str, size_t size,
+ const struct error_descr *descr,
+ const uint64_t reg)
+{
+ int ret = 0;
+
+ while (descr->type && descr->mask && descr->descr) {
+ if (reg & descr->mask) {
+ ret = snprintf(str, size, "\n\t%s, %s",
+ descr->type == ERR_CORRECTED ?
+ "Corrected" : "Uncorrected",
+ descr->descr);
+ str += ret;
+ size -= ret;
+ }
+ descr++;
+ }
+}
+
+static unsigned long get_bits(unsigned long data, int pos, int width)
+{
+ return (data >> pos) & ((1 << width) - 1);
+}
+
+#define L2C_CTL 0x87E080800000
+#define L2C_CTL_DISIDXALIAS BIT(0)
+
+#define PCI_DEVICE_ID_THUNDER_LMC 0xa022
+
+#define LMC_FADR 0x20
+#define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1)
+#define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1)
+#define LMC_FADR_FBANK(x) ((x >> 32) & 0xf)
+#define LMC_FADR_FROW(x) ((x >> 14) & 0xffff)
+#define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff)
+
+#define LMC_NXM_FADR 0x28
+#define LMC_ECC_SYND 0x38
+
+#define LMC_ECC_PARITY_TEST 0x108
+
+#define LMC_INT_W1S 0x150
+
+#define LMC_INT_ENA_W1C 0x158
+#define LMC_INT_ENA_W1S 0x160
+
+#define LMC_CONFIG 0x188
+
+#define LMC_CONFIG_BG2 BIT(62)
+#define LMC_CONFIG_RANK_ENA BIT(42)
+#define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF)
+#define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7)
+
+#define LMC_CONTROL 0x190
+#define LMC_CONTROL_XOR_BANK BIT(16)
+
+#define LMC_INT 0x1F0
+
+#define LMC_INT_DDR_ERR BIT(11)
+#define LMC_INT_DED_ERR (0xFUL << 5)
+#define LMC_INT_SEC_ERR (0xFUL << 1)
+#define LMC_INT_NXM_WR_MASK BIT(0)
+
+#define LMC_DDR_PLL_CTL 0x258
+#define LMC_DDR_PLL_CTL_DDR4 BIT(29)
+
+#define LMC_FADR_SCRAMBLED 0x330
+
+#define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
+ LMC_INT_NXM_WR_MASK)
+
+#define LMC_INT_CE (LMC_INT_SEC_ERR)
+
+static const struct error_descr lmc_errors[] = {
+ {
+ .type = ERR_CORRECTED,
+ .mask = LMC_INT_SEC_ERR,
+ .descr = "Single-bit ECC error",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = LMC_INT_DDR_ERR,
+ .descr = "DDR chip error",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = LMC_INT_DED_ERR,
+ .descr = "Double-bit ECC error",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = LMC_INT_NXM_WR_MASK,
+ .descr = "Non-existent memory write",
+ },
+ {0, 0, NULL},
+};
+
+#define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
+#define LMC_INT_EN_DLCRAM_DED_ERR BIT(4)
+#define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3)
+#define LMC_INT_INTR_DED_ENA BIT(2)
+#define LMC_INT_INTR_SEC_ENA BIT(1)
+#define LMC_INT_INTR_NXM_WR_ENA BIT(0)
+
+#define LMC_INT_ENA_ALL GENMASK(5, 0)
+
+#define LMC_DDR_PLL_CTL 0x258
+#define LMC_DDR_PLL_CTL_DDR4 BIT(29)
+
+#define LMC_CONTROL 0x190
+#define LMC_CONTROL_RDIMM BIT(0)
+
+#define LMC_SCRAM_FADR 0x330
+
+#define LMC_CHAR_MASK0 0x228
+#define LMC_CHAR_MASK2 0x238
+
+#define RING_ENTRIES 8
+
+struct debugfs_entry {
+ const char *name;
+ umode_t mode;
+ const struct file_operations fops;
+};
+
+struct lmc_err_ctx {
+ u64 reg_int;
+ u64 reg_fadr;
+ u64 reg_nxm_fadr;
+ u64 reg_scram_fadr;
+ u64 reg_ecc_synd;
+};
+
+struct thunderx_lmc {
+ void __iomem *regs;
+ struct pci_dev *pdev;
+ struct msix_entry msix_ent;
+
+ atomic_t ecc_int;
+
+ u64 mask0;
+ u64 mask2;
+ u64 parity_test;
+ u64 node;
+
+ int xbits;
+ int bank_width;
+ int pbank_lsb;
+ int dimm_lsb;
+ int rank_lsb;
+ int bank_lsb;
+ int row_lsb;
+ int col_hi_lsb;
+
+ int xor_bank;
+ int l2c_alias;
+
+ struct page *mem;
+
+ struct lmc_err_ctx err_ctx[RING_ENTRIES];
+ unsigned long ring_head;
+ unsigned long ring_tail;
+};
+
+#define ring_pos(pos, size) ((pos) & (size - 1))
+
+#define DEBUGFS_STRUCT(_name, _mode, _write, _read) \
+static struct debugfs_entry debugfs_##_name = { \
+ .name = __stringify(_name), \
+ .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
+ .fops = { \
+ .open = simple_open, \
+ .write = _write, \
+ .read = _read, \
+ .llseek = generic_file_llseek, \
+ }, \
+}
+
+#define DEBUGFS_FIELD_ATTR(_type, _field) \
+static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \
+ char __user *data, \
+ size_t count, loff_t *ppos) \
+{ \
+ struct thunderx_##_type *pdata = file->private_data; \
+ char buf[20]; \
+ \
+ snprintf(buf, count, "0x%016llx", pdata->_field); \
+ return simple_read_from_buffer(data, count, ppos, \
+ buf, sizeof(buf)); \
+} \
+ \
+static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \
+ const char __user *data, \
+ size_t count, loff_t *ppos) \
+{ \
+ struct thunderx_##_type *pdata = file->private_data; \
+ int res; \
+ \
+ res = kstrtoull_from_user(data, count, 0, &pdata->_field); \
+ \
+ return res ? res : count; \
+} \
+ \
+DEBUGFS_STRUCT(_field, 0600, \
+ thunderx_##_type##_##_field##_write, \
+ thunderx_##_type##_##_field##_read) \
+
+#define DEBUGFS_REG_ATTR(_type, _name, _reg) \
+static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \
+ char __user *data, \
+ size_t count, loff_t *ppos) \
+{ \
+ struct thunderx_##_type *pdata = file->private_data; \
+ char buf[20]; \
+ \
+ sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \
+ return simple_read_from_buffer(data, count, ppos, \
+ buf, sizeof(buf)); \
+} \
+ \
+static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \
+ const char __user *data, \
+ size_t count, loff_t *ppos) \
+{ \
+ struct thunderx_##_type *pdata = file->private_data; \
+ u64 val; \
+ int res; \
+ \
+ res = kstrtoull_from_user(data, count, 0, &val); \
+ \
+ if (!res) { \
+ writeq(val, pdata->regs + _reg); \
+ res = count; \
+ } \
+ \
+ return res; \
+} \
+ \
+DEBUGFS_STRUCT(_name, 0600, \
+ thunderx_##_type##_##_name##_write, \
+ thunderx_##_type##_##_name##_read)
+
+#define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field)
+
+/*
+ * To get an ECC error injected, the following steps are needed:
+ * - Setup the ECC injection by writing the appropriate parameters:
+ * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
+ * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
+ * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
+ * - Do the actual injection:
+ * echo 1 > /sys/kernel/debug/<device number>/inject_ecc
+ */
+static ssize_t thunderx_lmc_inject_int_write(struct file *file,
+ const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct thunderx_lmc *lmc = file->private_data;
+ u64 val;
+ int res;
+
+ res = kstrtoull_from_user(data, count, 0, &val);
+
+ if (!res) {
+ /* Trigger the interrupt */
+ writeq(val, lmc->regs + LMC_INT_W1S);
+ res = count;
+ }
+
+ return res;
+}
+
+static ssize_t thunderx_lmc_int_read(struct file *file,
+ char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct thunderx_lmc *lmc = file->private_data;
+ char buf[20];
+ u64 lmc_int = readq(lmc->regs + LMC_INT);
+
+ snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
+ return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
+}
+
+#define TEST_PATTERN 0xa5
+
+static int inject_ecc_fn(void *arg)
+{
+ struct thunderx_lmc *lmc = arg;
+ uintptr_t addr, phys;
+ unsigned int cline_size = cache_line_size();
+ const unsigned int lines = PAGE_SIZE / cline_size;
+ unsigned int i, cl_idx;
+
+ addr = (uintptr_t)page_address(lmc->mem);
+ phys = (uintptr_t)page_to_phys(lmc->mem);
+
+ cl_idx = (phys & 0x7f) >> 4;
+ lmc->parity_test &= ~(7ULL << 8);
+ lmc->parity_test |= (cl_idx << 8);
+
+ writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
+ writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
+ writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
+
+ readq(lmc->regs + LMC_CHAR_MASK0);
+ readq(lmc->regs + LMC_CHAR_MASK2);
+ readq(lmc->regs + LMC_ECC_PARITY_TEST);
+
+ for (i = 0; i < lines; i++) {
+ memset((void *)addr, TEST_PATTERN, cline_size);
+ barrier();
+
+ /*
+ * Flush L1 cachelines to the PoC (L2).
+ * This will cause cacheline eviction to the L2.
+ */
+ asm volatile("dc civac, %0\n"
+ "dsb sy\n"
+ : : "r"(addr + i * cline_size));
+ }
+
+ for (i = 0; i < lines; i++) {
+ /*
+ * Flush L2 cachelines to the DRAM.
+ * This will cause cacheline eviction to the DRAM
+ * and ECC corruption according to the masks set.
+ */
+ __asm__ volatile("sys #0,c11,C1,#2, %0\n"
+ : : "r"(phys + i * cline_size));
+ }
+
+ for (i = 0; i < lines; i++) {
+ /*
+ * Invalidate L2 cachelines.
+ * The subsequent load will cause cacheline fetch
+ * from the DRAM and an error interrupt
+ */
+ __asm__ volatile("sys #0,c11,C1,#1, %0"
+ : : "r"(phys + i * cline_size));
+ }
+
+ for (i = 0; i < lines; i++) {
+ /*
+ * Invalidate L1 cachelines.
+ * The subsequent load will cause cacheline fetch
+ * from the L2 and/or DRAM
+ */
+ asm volatile("dc ivac, %0\n"
+ "dsb sy\n"
+ : : "r"(addr + i * cline_size));
+ }
+
+ return 0;
+}
+
+static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
+ const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct thunderx_lmc *lmc = file->private_data;
+
+ unsigned int cline_size = cache_line_size();
+
+ u8 tmp[cline_size];
+ void __iomem *addr;
+ unsigned int offs, timeout = 100000;
+
+ atomic_set(&lmc->ecc_int, 0);
+
+ lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
+
+ if (!lmc->mem)
+ return -ENOMEM;
+
+ addr = page_address(lmc->mem);
+
+ while (!atomic_read(&lmc->ecc_int) && timeout--) {
+ stop_machine(inject_ecc_fn, lmc, NULL);
+
+ for (offs = 0; offs < PAGE_SIZE; offs += sizeof(tmp)) {
+ /*
+ * Do a load from the previously rigged location
+ * This should generate an error interrupt.
+ */
+ memcpy(tmp, addr + offs, cline_size);
+ asm volatile("dsb ld\n");
+ }
+ }
+
+ __free_pages(lmc->mem, 0);
+
+ return count;
+}
+
+LMC_DEBUGFS_ENT(mask0);
+LMC_DEBUGFS_ENT(mask2);
+LMC_DEBUGFS_ENT(parity_test);
+
+DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
+DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
+DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
+
+struct debugfs_entry *lmc_dfs_ents[] = {
+ &debugfs_mask0,
+ &debugfs_mask2,
+ &debugfs_parity_test,
+ &debugfs_inject_ecc,
+ &debugfs_inject_int,
+ &debugfs_int_w1c,
+};
+
+static int thunderx_create_debugfs_nodes(struct dentry *parent,
+ struct debugfs_entry *attrs[],
+ void *data,
+ size_t num)
+{
+ int i;
+ struct dentry *ent;
+
+ if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
+ return 0;
+
+ if (!parent)
+ return -ENOENT;
+
+ for (i = 0; i < num; i++) {
+ ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
+ parent, data, &attrs[i]->fops);
+
+ if (!ent)
+ break;
+ }
+
+ return i;
+}
+
+static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
+{
+ phys_addr_t addr = 0;
+ int bank, xbits;
+
+ addr |= lmc->node << 40;
+ addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
+ addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
+ addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
+ addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
+
+ bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
+
+ if (lmc->xor_bank)
+ bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
+
+ addr |= bank << lmc->bank_lsb;
+
+ xbits = PCI_FUNC(lmc->pdev->devfn);
+
+ if (lmc->l2c_alias)
+ xbits ^= get_bits(addr, 20, lmc->xbits) ^
+ get_bits(addr, 12, lmc->xbits);
+
+ addr |= xbits << 7;
+
+ return addr;
+}
+
+static unsigned int thunderx_get_num_lmcs(unsigned int node)
+{
+ unsigned int number = 0;
+ struct pci_dev *pdev = NULL;
+
+ do {
+ pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+ PCI_DEVICE_ID_THUNDER_LMC,
+ pdev);
+ if (pdev) {
+#ifdef CONFIG_NUMA
+ if (pdev->dev.numa_node == node)
+ number++;
+#else
+ number++;
+#endif
+ }
+ } while (pdev);
+
+ return number;
+}
+
+#define LMC_MESSAGE_SIZE 120
+#define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors))
+
+static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
+{
+ struct mem_ctl_info *mci = dev_id;
+ struct thunderx_lmc *lmc = mci->pvt_info;
+
+ unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
+ struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
+
+ writeq(0, lmc->regs + LMC_CHAR_MASK0);
+ writeq(0, lmc->regs + LMC_CHAR_MASK2);
+ writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
+
+ ctx->reg_int = readq(lmc->regs + LMC_INT);
+ ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
+ ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
+ ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
+ ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
+
+ lmc->ring_head++;
+
+ atomic_set(&lmc->ecc_int, 1);
+
+ /* Clear the interrupt */
+ writeq(ctx->reg_int, lmc->regs + LMC_INT);
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
+{
+ struct mem_ctl_info *mci = dev_id;
+ struct thunderx_lmc *lmc = mci->pvt_info;
+ phys_addr_t phys_addr;
+
+ unsigned long tail;
+ struct lmc_err_ctx *ctx;
+
+ irqreturn_t ret = IRQ_NONE;
+
+ char *msg;
+ char *other;
+
+ msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
+ other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
+
+ if (!msg || !other)
+ goto err_free;
+
+ while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
+ ARRAY_SIZE(lmc->err_ctx))) {
+ tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
+
+ ctx = &lmc->err_ctx[tail];
+
+ dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
+ ctx->reg_int);
+ dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
+ ctx->reg_fadr);
+ dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
+ ctx->reg_nxm_fadr);
+ dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
+ ctx->reg_scram_fadr);
+ dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
+ ctx->reg_ecc_synd);
+
+ snprintf(msg, LMC_MESSAGE_SIZE,
+ "DIMM %lld rank %lld bank %lld row %lld col %lld",
+ LMC_FADR_FDIMM(ctx->reg_scram_fadr),
+ LMC_FADR_FBUNK(ctx->reg_scram_fadr),
+ LMC_FADR_FBANK(ctx->reg_scram_fadr),
+ LMC_FADR_FROW(ctx->reg_scram_fadr),
+ LMC_FADR_FCOL(ctx->reg_scram_fadr));
+
+ decode_register(other, LMC_OTHER_SIZE, lmc_errors,
+ ctx->reg_int);
+
+ phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
+
+ if (ctx->reg_int & LMC_INT_UE)
+ edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+ phys_to_pfn(phys_addr),
+ offset_in_page(phys_addr),
+ 0, -1, -1, -1, msg, other);
+ else if (ctx->reg_int & LMC_INT_CE)
+ edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+ phys_to_pfn(phys_addr),
+ offset_in_page(phys_addr),
+ 0, -1, -1, -1, msg, other);
+
+ lmc->ring_tail++;
+ }
+
+ ret = IRQ_HANDLED;
+
+err_free:
+ kfree(msg);
+ kfree(other);
+
+ return ret;
+}
+
+#ifdef CONFIG_PM
+static int thunderx_lmc_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ pci_save_state(pdev);
+ pci_disable_device(pdev);
+
+ pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+ return 0;
+}
+
+static int thunderx_lmc_resume(struct pci_dev *pdev)
+{
+ pci_set_power_state(pdev, PCI_D0);
+ pci_enable_wake(pdev, PCI_D0, 0);
+ pci_restore_state(pdev);
+
+ return 0;
+}
+#endif
+
+static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
+ { 0, },
+};
+
+static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
+{
+ int node = dev_to_node(&pdev->dev);
+ int ret = PCI_FUNC(pdev->devfn);
+
+ ret += max(node, 0) << 8;
+
+ return ret;
+}
+
+static int thunderx_lmc_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct thunderx_lmc *lmc;
+ struct edac_mc_layer layer;
+ struct mem_ctl_info *mci;
+ u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
+ int ret;
+ u64 lmc_int;
+ void *l2c_ioaddr;
+
+ layer.type = EDAC_MC_LAYER_SLOT;
+ layer.size = 2;
+ layer.is_virt_csrow = false;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
+ return ret;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
+ return ret;
+ }
+
+ mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
+ sizeof(struct thunderx_lmc));
+ if (!mci)
+ return -ENOMEM;
+
+ mci->pdev = &pdev->dev;
+ lmc = mci->pvt_info;
+
+ pci_set_drvdata(pdev, mci);
+
+ lmc->regs = pcim_iomap_table(pdev)[0];
+
+ lmc_control = readq(lmc->regs + LMC_CONTROL);
+ lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
+ lmc_config = readq(lmc->regs + LMC_CONFIG);
+
+ if (lmc_control & LMC_CONTROL_RDIMM) {
+ mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
+ lmc_ddr_pll_ctl) ?
+ MEM_RDDR4 : MEM_RDDR3;
+ } else {
+ mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
+ lmc_ddr_pll_ctl) ?
+ MEM_DDR4 : MEM_DDR3;
+ }
+
+ mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+ mci->edac_cap = EDAC_FLAG_SECDED;
+
+ mci->mod_name = "thunderx-lmc";
+ mci->mod_ver = "1";
+ mci->ctl_name = "thunderx-lmc";
+ mci->dev_name = dev_name(&pdev->dev);
+ mci->scrub_mode = SCRUB_NONE;
+
+ lmc->pdev = pdev;
+ lmc->msix_ent.entry = 0;
+
+ lmc->ring_head = 0;
+ lmc->ring_tail = 0;
+
+ ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
+ thunderx_lmc_err_isr,
+ thunderx_lmc_threaded_isr, 0,
+ "[EDAC] ThunderX LMC", mci);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
+ goto err_free;
+ }
+
+ lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
+
+ lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
+ lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
+ FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
+
+ lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
+ lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits;
+ lmc->rank_lsb = lmc->dimm_lsb;
+ lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
+ lmc->bank_lsb = 7 + lmc->xbits;
+ lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
+
+ lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
+
+ lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
+
+ l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node),
+ PAGE_SIZE);
+
+ if (!l2c_ioaddr) {
+ dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
+ goto err_free;
+ }
+
+ lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
+
+ iounmap(l2c_ioaddr);
+
+ ret = edac_mc_add_mc(mci);
+ if (ret) {
+ dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
+ goto err_free;
+ }
+
+ lmc_int = readq(lmc->regs + LMC_INT);
+ writeq(lmc_int, lmc->regs + LMC_INT);
+
+ writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
+
+ if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
+ ret = thunderx_create_debugfs_nodes(mci->debugfs,
+ lmc_dfs_ents,
+ lmc,
+ ARRAY_SIZE(lmc_dfs_ents));
+
+ if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
+ dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
+ ret, ret >= 0 ? " created" : "");
+ }
+ }
+
+ return 0;
+
+err_free:
+ pci_set_drvdata(pdev, NULL);
+ edac_mc_free(mci);
+
+ return ret;
+}
+
+static void thunderx_lmc_remove(struct pci_dev *pdev)
+{
+ struct mem_ctl_info *mci = pci_get_drvdata(pdev);
+ struct thunderx_lmc *lmc = mci->pvt_info;
+
+ writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
+
+ edac_mc_del_mc(&pdev->dev);
+ edac_mc_free(mci);
+}
+
+MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
+
+static struct pci_driver thunderx_lmc_driver = {
+ .name = "thunderx_lmc_edac",
+ .probe = thunderx_lmc_probe,
+ .remove = thunderx_lmc_remove,
+#ifdef CONFIG_PM
+ .suspend = thunderx_lmc_suspend,
+ .resume = thunderx_lmc_resume,
+#endif
+ .id_table = thunderx_lmc_pci_tbl,
+};
+
+/*---------------------- OCX driver ---------------------------------*/
+
+#define PCI_DEVICE_ID_THUNDER_OCX 0xa013
+
+#define OCX_LINK_INTS 3
+#define OCX_INTS (OCX_LINK_INTS + 1)
+#define OCX_RX_LANES 24
+#define OCX_RX_LANE_STATS 15
+
+#define OCX_COM_INT 0x100
+#define OCX_COM_INT_W1S 0x108
+#define OCX_COM_INT_ENA_W1S 0x110
+#define OCX_COM_INT_ENA_W1C 0x118
+
+#define OCX_COM_IO_BADID BIT(54)
+#define OCX_COM_MEM_BADID BIT(53)
+#define OCX_COM_COPR_BADID BIT(52)
+#define OCX_COM_WIN_REQ_BADID BIT(51)
+#define OCX_COM_WIN_REQ_TOUT BIT(50)
+#define OCX_COM_RX_LANE GENMASK(23, 0)
+
+#define OCX_COM_INT_UE (0)
+
+#define OCX_COM_INT_CE (OCX_COM_IO_BADID | \
+ OCX_COM_MEM_BADID | \
+ OCX_COM_COPR_BADID | \
+ OCX_COM_WIN_REQ_BADID | \
+ OCX_COM_WIN_REQ_TOUT)
+
+static const struct error_descr ocx_com_errors[] = {
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_IO_BADID,
+ .descr = "Invalid IO transaction node ID",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_MEM_BADID,
+ .descr = "Invalid memory transaction node ID",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_COPR_BADID,
+ .descr = "Invalid coprocessor transaction node ID",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_WIN_REQ_BADID,
+ .descr = "Invalid SLI transaction node ID",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_WIN_REQ_TOUT,
+ .descr = "Window/core request timeout",
+ },
+ {0, 0, NULL},
+};
+
+#define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8)
+#define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8)
+#define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8)
+#define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8)
+
+#define OCX_COM_LINK_BAD_WORD BIT(13)
+#define OCX_COM_LINK_ALIGN_FAIL BIT(12)
+#define OCX_COM_LINK_ALIGN_DONE BIT(11)
+#define OCX_COM_LINK_UP BIT(10)
+#define OCX_COM_LINK_STOP BIT(9)
+#define OCX_COM_LINK_BLK_ERR BIT(8)
+#define OCX_COM_LINK_REINIT BIT(7)
+#define OCX_COM_LINK_LNK_DATA BIT(6)
+#define OCX_COM_LINK_RXFIFO_DBE BIT(5)
+#define OCX_COM_LINK_RXFIFO_SBE BIT(4)
+#define OCX_COM_LINK_TXFIFO_DBE BIT(3)
+#define OCX_COM_LINK_TXFIFO_SBE BIT(2)
+#define OCX_COM_LINK_REPLAY_DBE BIT(1)
+#define OCX_COM_LINK_REPLAY_SBE BIT(0)
+
+static const struct error_descr ocx_com_link_errors[] = {
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_LINK_REPLAY_SBE,
+ .descr = "Replay buffer single-bit error",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_LINK_TXFIFO_SBE,
+ .descr = "TX FIFO single-bit error",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_LINK_RXFIFO_SBE,
+ .descr = "RX FIFO single-bit error",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_LINK_BLK_ERR,
+ .descr = "Block code error",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_LINK_ALIGN_FAIL,
+ .descr = "Link alignment failure",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_COM_LINK_BAD_WORD,
+ .descr = "Bad code word",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = OCX_COM_LINK_REPLAY_DBE,
+ .descr = "Replay buffer double-bit error",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = OCX_COM_LINK_TXFIFO_DBE,
+ .descr = "TX FIFO double-bit error",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = OCX_COM_LINK_RXFIFO_DBE,
+ .descr = "RX FIFO double-bit error",
+ },
+ {
+ .type = ERR_UNCORRECTED,
+ .mask = OCX_COM_LINK_STOP,
+ .descr = "Link stopped",
+ },
+ {0, 0, NULL},
+};
+
+#define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \
+ OCX_COM_LINK_TXFIFO_DBE | \
+ OCX_COM_LINK_RXFIFO_DBE | \
+ OCX_COM_LINK_STOP)
+
+#define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \
+ OCX_COM_LINK_TXFIFO_SBE | \
+ OCX_COM_LINK_RXFIFO_SBE | \
+ OCX_COM_LINK_BLK_ERR | \
+ OCX_COM_LINK_ALIGN_FAIL | \
+ OCX_COM_LINK_BAD_WORD)
+
+#define OCX_LNE_INT(x) (0x8018 + (x) * 0x100)
+#define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100)
+#define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100)
+#define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100)
+#define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8)
+
+#define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8)
+#define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2)
+#define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1)
+#define OCX_LNE_CFG_RX_STAT_ENA BIT(0)
+
+
+#define OCX_LANE_BAD_64B67B BIT(8)
+#define OCX_LANE_DSKEW_FIFO_OVFL BIT(5)
+#define OCX_LANE_SCRM_SYNC_LOSS BIT(4)
+#define OCX_LANE_UKWN_CNTL_WORD BIT(3)
+#define OCX_LANE_CRC32_ERR BIT(2)
+#define OCX_LANE_BDRY_SYNC_LOSS BIT(1)
+#define OCX_LANE_SERDES_LOCK_LOSS BIT(0)
+
+#define OCX_COM_LANE_INT_UE (0)
+#define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \
+ OCX_LANE_BDRY_SYNC_LOSS | \
+ OCX_LANE_CRC32_ERR | \
+ OCX_LANE_UKWN_CNTL_WORD | \
+ OCX_LANE_SCRM_SYNC_LOSS | \
+ OCX_LANE_DSKEW_FIFO_OVFL | \
+ OCX_LANE_BAD_64B67B)
+
+static const struct error_descr ocx_lane_errors[] = {
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_SERDES_LOCK_LOSS,
+ .descr = "RX SerDes lock lost",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_BDRY_SYNC_LOSS,
+ .descr = "RX word boundary lost",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_CRC32_ERR,
+ .descr = "CRC32 error",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_UKWN_CNTL_WORD,
+ .descr = "Unknown control word",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_SCRM_SYNC_LOSS,
+ .descr = "Scrambler synchronization lost",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_DSKEW_FIFO_OVFL,
+ .descr = "RX deskew FIFO overflow",
+ },
+ {
+ .type = ERR_CORRECTED,
+ .mask = OCX_LANE_BAD_64B67B,
+ .descr = "Bad 64B/67B codeword",
+ },
+ {0, 0, NULL},
+};
+
+#define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0))
+#define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0))
+#define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \
+ GENMASK(9, 7) | GENMASK(5, 0))
+
+#define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000)
+#define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000)
+
+struct ocx_com_err_ctx {
+ u64 reg_com_int;
+ u64 reg_lane_int[OCX_RX_LANES];
+ u64 reg_lane_stat11[OCX_RX_LANES];
+};
+
+struct ocx_link_err_ctx {
+ u64 reg_com_link_int;
+ int link;
+};
+
+struct thunderx_ocx {
+ void __iomem *regs;
+ int com_link;
+ struct pci_dev *pdev;
+ struct edac_device_ctl_info *edac_dev;
+
+ struct dentry *debugfs;
+ struct msix_entry msix_ent[OCX_INTS];
+
+ struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
+ struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
+
+ unsigned long com_ring_head;
+ unsigned long com_ring_tail;
+
+ unsigned long link_ring_head;
+ unsigned long link_ring_tail;
+};
+
+#define OCX_MESSAGE_SIZE SZ_1K
+#define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors))
+
+/* This handler is threaded */
+static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
+{
+ struct msix_entry *msix = irq_id;
+ struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
+ msix_ent[msix->entry]);
+
+ int lane;
+ unsigned long head = ring_pos(ocx->com_ring_head,
+ ARRAY_SIZE(ocx->com_err_ctx));
+ struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
+
+ ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
+
+ for (lane = 0; lane < OCX_RX_LANES; lane++) {
+ ctx->reg_lane_int[lane] =
+ readq(ocx->regs + OCX_LNE_INT(lane));
+ ctx->reg_lane_stat11[lane] =
+ readq(ocx->regs + OCX_LNE_STAT(lane, 11));
+
+ writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
+ }
+
+ writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
+
+ ocx->com_ring_head++;
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
+{
+ struct msix_entry *msix = irq_id;
+ struct thunderx_ocx *ocx = container_of(m