diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-08 09:07:07 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-08 09:07:07 -0800 |
commit | 1b37b8c48d2c2d8553f116ec2a75d21056f1fb35 (patch) | |
tree | adf2855d311440fb4c48b2a96a13bdaae28d2b63 | |
parent | c6400e5cef5eafc259e649ceedc4c7eecc9069d8 (diff) | |
parent | 580b5cf50ca8f4781961382d54959683341b3126 (diff) |
Merge tag 'edac_for_5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
Pull EDAC updates from Borislav Petkov:
- A new EDAC AST 2500 SoC driver (Stefan M Schaeckeler)
- New i10nm EDAC driver for Intel 10nm CPUs (Qiuxu Zhuo and Tony Luck)
- Altera SDRAM functionality carveout for separate enablement of RAS
and SDRAM capabilities on some Altera chips. (Thor Thayer)
- The usual round of cleanups and fixes
And last but not least: recruit James Morse as a reviewer for the ARM
side.
* tag 'edac_for_5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
EDAC/altera: Add separate SDRAM EDAC config
EDAC, altera: Add missing of_node_put()
EDAC, skx_common: Add code to recognise new compound error code
EDAC, i10nm: Fix randconfig builds
EDAC, i10nm: Add a driver for Intel 10nm server processors
EDAC, skx_edac: Delete duplicated code
EDAC, skx_common: Separate common code out from skx_edac
EDAC: Do not check return value of debugfs_create() functions
EDAC: Add James Morse as a reviewer
dt-bindings, EDAC: Add Aspeed AST2500
EDAC, aspeed: Add an Aspeed AST2500 EDAC driver
-rw-r--r-- | Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt | 25 | ||||
-rw-r--r-- | MAINTAINERS | 7 | ||||
-rw-r--r-- | arch/arm/boot/dts/aspeed-g5.dtsi | 7 | ||||
-rw-r--r-- | drivers/edac/Kconfig | 35 | ||||
-rw-r--r-- | drivers/edac/Makefile | 8 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 72 | ||||
-rw-r--r-- | drivers/edac/aspeed_edac.c | 421 | ||||
-rw-r--r-- | drivers/edac/debugfs.c | 48 | ||||
-rw-r--r-- | drivers/edac/edac_module.h | 8 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 275 | ||||
-rw-r--r-- | drivers/edac/skx_base.c | 650 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 691 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 152 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 1358 |
14 files changed, 2323 insertions, 1434 deletions
diff --git a/Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt b/Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt new file mode 100644 index 000000000000..6a0f3d90d682 --- /dev/null +++ b/Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt @@ -0,0 +1,25 @@ +Aspeed AST2500 SoC EDAC node + +The Aspeed AST2500 SoC supports DDR3 and DDR4 memory with and without ECC (error +correction check). + +The memory controller supports SECDED (single bit error correction, double bit +error detection) and single bit error auto scrubbing by reserving 8 bits for +every 64 bit word (effectively reducing available memory to 8/9). + +Note, the bootloader must configure ECC mode in the memory controller. + + +Required properties: +- compatible: should be "aspeed,ast2500-sdram-edac" +- reg: sdram controller register set should be <0x1e6e0000 0x174> +- interrupts: should be AVIC interrupt #0 + + +Example: + + edac: sdram@1e6e0000 { + compatible = "aspeed,ast2500-sdram-edac"; + reg = <0x1e6e0000 0x174>; + interrupts = <0>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index 275e9bb5b722..f758445317c5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5503,6 +5503,12 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/amd64_edac* +EDAC-AST2500 +M: Stefan Schaeckeler <sschaeck@cisco.com> +S: Supported +F: drivers/edac/aspeed_edac.c +F: Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt + EDAC-CALXEDA M: Robert Richter <rric@kernel.org> L: linux-edac@vger.kernel.org @@ -5527,6 +5533,7 @@ F: drivers/edac/thunderx_edac* EDAC-CORE M: Borislav Petkov <bp@alien8.de> M: Mauro Carvalho Chehab <mchehab@kernel.org> +R: James Morse <james.morse@arm.com> L: linux-edac@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index 3e4ed081505c..85ed9dbec196 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -47,6 +47,13 @@ reg = <0x80000000 0>; }; + edac: sdram@1e6e0000 { + compatible = "aspeed,ast2500-sdram-edac"; + reg = <0x1e6e0000 0x174>; + interrupts = <0>; + status = "disabled"; + }; + ahb { compatible = "simple-bus"; #address-cells = <1>; diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index e286b5b99003..47eb4d13ed5f 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -241,6 +241,18 @@ config EDAC_SKX system has non-volatile DIMMs you should also manually select CONFIG_ACPI_NFIT. +config EDAC_I10NM + tristate "Intel 10nm server Integrated MC" + depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG && ACPI + depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_I10NM can't be y + select DMI + select ACPI_ADXL + help + Support for error detection and correction the Intel + 10nm server Integrated Memory Controllers. If your + system has non-volatile DIMMs you should also manually + select CONFIG_ACPI_NFIT. + config EDAC_PND2 tristate "Intel Pondicherry2" depends on PCI && X86_64 && X86_MCE_INTEL @@ -379,9 +391,17 @@ config EDAC_ALTERA depends on EDAC=y && (ARCH_SOCFPGA || ARCH_STRATIX10) help Support for error detection and correction on the - Altera SOCs. This must be selected for SDRAM ECC. - Note that the preloader must initialize the SDRAM - before loading the kernel. + Altera SOCs. This is the global enable for the + various Altera peripherals. + +config EDAC_ALTERA_SDRAM + bool "Altera SDRAM ECC" + depends on EDAC_ALTERA=y + help + Support for error detection and correction on the + Altera SDRAM Memory for Altera SoCs. Note that the + preloader must initialize the SDRAM before loading + the kernel. config EDAC_ALTERA_L2C bool "Altera L2 Cache ECC" @@ -475,4 +495,13 @@ config EDAC_QCOM For debugging issues having to do with stability and overall system health, you should probably say 'Y' here. +config EDAC_ASPEED + tristate "Aspeed AST 2500 SoC" + depends on MACH_ASPEED_G5 + help + Support for error detection and correction on the Aspeed AST 2500 SoC. + + First, ECC must be configured in the bootloader. Then, this driver + will expose error counters via the EDAC kernel framework. + endif # EDAC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 716096d08ea0..89ad4a84a0f6 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o -obj-$(CONFIG_EDAC_SKX) += skx_edac.o obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o @@ -58,6 +57,12 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o +skx_edac-y := skx_common.o skx_base.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o + +i10nm_edac-y := skx_common.o i10nm_base.o +obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o + obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o obj-$(CONFIG_EDAC_CELL) += cell_edac.o obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o @@ -78,3 +83,4 @@ obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o obj-$(CONFIG_EDAC_TI) += ti_edac.o obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o +obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index c89d82aa2776..1bcf9aea0cdf 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -29,6 +29,7 @@ #define EDAC_MOD_STR "altera_edac" #define EDAC_DEVICE "Altera" +#ifdef CONFIG_EDAC_ALTERA_SDRAM static const struct altr_sdram_prv_data c5_data = { .ecc_ctrl_offset = CV_CTLCFG_OFST, .ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN, @@ -468,6 +469,39 @@ static int altr_sdram_remove(struct platform_device *pdev) return 0; } +/* + * If you want to suspend, need to disable EDAC by removing it + * from the device tree or defconfig. + */ +#ifdef CONFIG_PM +static int altr_sdram_prepare(struct device *dev) +{ + pr_err("Suspend not allowed when EDAC is enabled.\n"); + + return -EPERM; +} + +static const struct dev_pm_ops altr_sdram_pm_ops = { + .prepare = altr_sdram_prepare, +}; +#endif + +static struct platform_driver altr_sdram_edac_driver = { + .probe = altr_sdram_probe, + .remove = altr_sdram_remove, + .driver = { + .name = "altr_sdram_edac", +#ifdef CONFIG_PM + .pm = &altr_sdram_pm_ops, +#endif + .of_match_table = altr_sdram_ctrl_of_match, + }, +}; + +module_platform_driver(altr_sdram_edac_driver); + +#endif /* CONFIG_EDAC_ALTERA_SDRAM */ + /**************** Stratix 10 EDAC Memory Controller Functions ************/ /** @@ -530,37 +564,6 @@ static const struct regmap_config s10_sdram_regmap_cfg = { /************** </Stratix10 EDAC Memory Controller Functions> ***********/ -/* - * If you want to suspend, need to disable EDAC by removing it - * from the device tree or defconfig. - */ -#ifdef CONFIG_PM -static int altr_sdram_prepare(struct device *dev) -{ - pr_err("Suspend not allowed when EDAC is enabled.\n"); - - return -EPERM; -} - -static const struct dev_pm_ops altr_sdram_pm_ops = { - .prepare = altr_sdram_prepare, -}; -#endif - -static struct platform_driver altr_sdram_edac_driver = { - .probe = altr_sdram_probe, - .remove = altr_sdram_remove, - .driver = { - .name = "altr_sdram_edac", -#ifdef CONFIG_PM - .pm = &altr_sdram_pm_ops, -#endif - .of_match_table = altr_sdram_ctrl_of_match, - }, -}; - -module_platform_driver(altr_sdram_edac_driver); - /************************* EDAC Parent Probe *************************/ static const struct of_device_id altr_edac_device_of_match[]; @@ -1046,14 +1049,17 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, return -ENODEV; } - if (of_address_to_resource(sysmgr_np, 0, &res)) + if (of_address_to_resource(sysmgr_np, 0, &res)) { + of_node_put(sysmgr_np); return -ENOMEM; + } /* Need physical address for SMCC call */ base = res.start; ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, &s10_sdram_regmap_cfg); + of_node_put(sysmgr_np); } of_node_put(np_eccmgr); if (IS_ERR(ecc_mgr_map)) { @@ -2140,11 +2146,13 @@ static int altr_edac_a10_probe(struct platform_device *pdev) altr_edac_a10_device_add(edac, child); +#ifdef CONFIG_EDAC_ALTERA_SDRAM else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || (of_device_is_compatible(child, "altr,sdram-edac-s10"))) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); +#endif } return 0; diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c new file mode 100644 index 000000000000..11833c0a5d07 --- /dev/null +++ b/drivers/edac/aspeed_edac.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2018, 2019 Cisco Systems + */ + +#include <linux/edac.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/stop_machine.h> +#include <linux/io.h> +#include <linux/of_address.h> +#include <linux/regmap.h> +#include "edac_module.h" + + +#define DRV_NAME "aspeed-edac" + + +#define ASPEED_MCR_PROT 0x00 /* protection key register */ +#define ASPEED_MCR_CONF 0x04 /* configuration register */ +#define ASPEED_MCR_INTR_CTRL 0x50 /* interrupt control/status register */ +#define ASPEED_MCR_ADDR_UNREC 0x58 /* address of first un-recoverable error */ +#define ASPEED_MCR_ADDR_REC 0x5c /* address of last recoverable error */ +#define ASPEED_MCR_LAST ASPEED_MCR_ADDR_REC + + +#define ASPEED_MCR_PROT_PASSWD 0xfc600309 +#define ASPEED_MCR_CONF_DRAM_TYPE BIT(4) +#define ASPEED_MCR_CONF_ECC BIT(7) +#define ASPEED_MCR_INTR_CTRL_CLEAR BIT(31) +#define ASPEED_MCR_INTR_CTRL_CNT_REC GENMASK(23, 16) +#define ASPEED_MCR_INTR_CTRL_CNT_UNREC GENMASK(15, 12) +#define ASPEED_MCR_INTR_CTRL_ENABLE (BIT(0) | BIT(1)) + + +static struct regmap *aspeed_regmap; + + +static int regmap_reg_write(void *context, unsigned int reg, unsigned int val) +{ + void __iomem *regs = (void __iomem *)context; + + /* enable write to MCR register set */ + writel(ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT); + + writel(val, regs + reg); + + /* disable write to MCR register set */ + writel(~ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT); + + return 0; +} + + +static int regmap_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + void __iomem *regs = (void __iomem *)context; + + *val = readl(regs + reg); + + return 0; +} + +static bool regmap_is_volatile(struct device *dev, unsigned int reg) +{ + switch (reg) { + case ASPEED_MCR_PROT: + case ASPEED_MCR_INTR_CTRL: + case ASPEED_MCR_ADDR_UNREC: + case ASPEED_MCR_ADDR_REC: + return true; + default: + return false; + } +} + + +static const struct regmap_config aspeed_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = ASPEED_MCR_LAST, + .reg_write = regmap_reg_write, + .reg_read = regmap_reg_read, + .volatile_reg = regmap_is_volatile, + .fast_io = true, +}; + + +static void count_rec(struct mem_ctl_info *mci, u8 rec_cnt, u32 rec_addr) +{ + struct csrow_info *csrow = mci->csrows[0]; + u32 page, offset, syndrome; + + if (!rec_cnt) + return; + + /* report first few errors (if there are) */ + /* note: no addresses are recorded */ + if (rec_cnt > 1) { + /* page, offset and syndrome are not available */ + page = 0; + offset = 0; + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, rec_cnt-1, + page, offset, syndrome, 0, 0, -1, + "address(es) not available", ""); + } + + /* report last error */ + /* note: rec_addr is the last recoverable error addr */ + page = rec_addr >> PAGE_SHIFT; + offset = rec_addr & ~PAGE_MASK; + /* syndrome is not available */ + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + csrow->first_page + page, offset, syndrome, + 0, 0, -1, "", ""); +} + + +static void count_un_rec(struct mem_ctl_info *mci, u8 un_rec_cnt, + u32 un_rec_addr) +{ + struct csrow_info *csrow = mci->csrows[0]; + u32 page, offset, syndrome; + + if (!un_rec_cnt) + return; + + /* report 1. error */ + /* note: un_rec_addr is the first unrecoverable error addr */ + page = un_rec_addr >> PAGE_SHIFT; + offset = un_rec_addr & ~PAGE_MASK; + /* syndrome is not available */ + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + csrow->first_page + page, offset, syndrome, + 0, 0, -1, "", ""); + + /* report further errors (if there are) */ + /* note: no addresses are recorded */ + if (un_rec_cnt > 1) { + /* page, offset and syndrome are not available */ + page = 0; + offset = 0; + syndrome = 0; + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, un_rec_cnt-1, + page, offset, syndrome, 0, 0, -1, + "address(es) not available", ""); + } +} + + +static irqreturn_t mcr_isr(int irq, void *arg) +{ + struct mem_ctl_info *mci = arg; + u32 rec_addr, un_rec_addr; + u32 reg50, reg5c, reg58; + u8 rec_cnt, un_rec_cnt; + + regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, ®50); + dev_dbg(mci->pdev, "received edac interrupt w/ mcr register 50: 0x%x\n", + reg50); + + /* collect data about recoverable and unrecoverable errors */ + rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_REC) >> 16; + un_rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_UNREC) >> 12; + + dev_dbg(mci->pdev, "%d recoverable interrupts and %d unrecoverable interrupts\n", + rec_cnt, un_rec_cnt); + + regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_UNREC, ®58); + un_rec_addr = reg58; + + regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_REC, ®5c); + rec_addr = reg5c; + + /* clear interrupt flags and error counters: */ + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_CLEAR, + ASPEED_MCR_INTR_CTRL_CLEAR); + + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_CLEAR, 0); + + /* process recoverable and unrecoverable errors */ + count_rec(mci, rec_cnt, rec_addr); + count_un_rec(mci, un_rec_cnt, un_rec_addr); + + if (!rec_cnt && !un_rec_cnt) + dev_dbg(mci->pdev, "received edac interrupt, but did not find any ECC counters\n"); + + regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, ®50); + dev_dbg(mci->pdev, "edac interrupt handled. mcr reg 50 is now: 0x%x\n", + reg50); + + return IRQ_HANDLED; +} + + +static int config_irq(void *ctx, struct platform_device *pdev) +{ + int irq; + int rc; + + /* register interrupt handler */ + irq = platform_get_irq(pdev, 0); + dev_dbg(&pdev->dev, "got irq %d\n", irq); + if (!irq) + return -ENODEV; + + rc = devm_request_irq(&pdev->dev, irq, mcr_isr, IRQF_TRIGGER_HIGH, + DRV_NAME, ctx); + if (rc) { + dev_err(&pdev->dev, "unable to request irq %d\n", irq); + return rc; + } + + /* enable interrupts */ + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_ENABLE, + ASPEED_MCR_INTR_CTRL_ENABLE); + + return 0; +} + + +static int init_csrows(struct mem_ctl_info *mci) +{ + struct csrow_info *csrow = mci->csrows[0]; + u32 nr_pages, dram_type; + struct dimm_info *dimm; + struct device_node *np; + struct resource r; + u32 reg04; + int rc; + + /* retrieve info about physical memory from device tree */ + np = of_find_node_by_path("/memory"); + if (!np) { + dev_err(mci->pdev, "dt: missing /memory node\n"); + return -ENODEV; + }; + + rc = of_address_to_resource(np, 0, &r); + + of_node_put(np); + + if (rc) { + dev_err(mci->pdev, "dt: failed requesting resource for /memory node\n"); + return rc; + }; + + dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n", + r.start, resource_size(&r), PAGE_SHIFT); + + csrow->first_page = r.start >> PAGE_SHIFT; + nr_pages = resource_size(&r) >> PAGE_SHIFT; + csrow->last_page = csrow->first_page + nr_pages - 1; + + regmap_read(aspeed_regmap, ASPEED_MCR_CONF, ®04); + dram_type = (reg04 & ASPEED_MCR_CONF_DRAM_TYPE) ? MEM_DDR4 : MEM_DDR3; + + dimm = csrow->channels[0]->dimm; + dimm->mtype = dram_type; + dimm->edac_mode = EDAC_SECDED; + dimm->nr_pages = nr_pages / csrow->nr_channels; + + dev_dbg(mci->pdev, "initialized dimm with first_page=0x%lx and nr_pages=0x%x\n", + csrow->first_page, nr_pages); + + return 0; +} + + +static int aspeed_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct edac_mc_layer layers[2]; + struct mem_ctl_info *mci; + struct device_node *np; + struct resource *res; + void __iomem *regs; + u32 reg04; + int rc; + + /* setup regmap */ + np = dev->of_node; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENOENT; + + regs = devm_ioremap_resource(dev, res); + if (IS_ERR(regs)) + return PTR_ERR(regs); + + aspeed_regmap = devm_regmap_init(dev, NULL, (__force void *)regs, + &aspeed_regmap_config); + if (IS_ERR(aspeed_regmap)) + return PTR_ERR(aspeed_regmap); + + /* bail out if ECC mode is not configured */ + regmap_read(aspeed_regmap, ASPEED_MCR_CONF, ®04); + if (!(reg04 & ASPEED_MCR_CONF_ECC)) { + dev_err(&pdev->dev, "ECC mode is not configured in u-boot\n"); + return -EPERM; + } + + edac_op_state = EDAC_OPSTATE_INT; + + /* allocate & init EDAC MC data structure */ + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 1; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + + mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0); + if (!mci) + return -ENOMEM; + + mci->pdev = &pdev->dev; + mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->scrub_cap = SCRUB_FLAG_HW_SRC; + mci->scrub_mode = SCRUB_HW_SRC; + mci->mod_name = DRV_NAME; + mci->ctl_name = "MIC"; + mci->dev_name = dev_name(&pdev->dev); + + rc = init_csrows(mci); + if (rc) { + dev_err(&pdev->dev, "failed to init csrows\n"); + goto probe_exit02; + } + + platform_set_drvdata(pdev, mci); + + /* register with edac core */ + rc = edac_mc_add_mc(mci); + if (rc) { + dev_err(&pdev->dev, "failed to register with EDAC core\n"); + goto probe_exit02; + } + + /* register interrupt handler and enable interrupts */ + rc = config_irq(mci, pdev); + if (rc) { + dev_err(&pdev->dev, "failed setting up irq\n"); + goto probe_exit01; + } + + return 0; + +probe_exit01: + edac_mc_del_mc(&pdev->dev); +probe_exit02: + edac_mc_free(mci); + return rc; +} + + +static int aspeed_remove(struct platform_device *pdev) +{ + struct mem_ctl_info *mci; + + /* disable interrupts */ + regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL, + ASPEED_MCR_INTR_CTRL_ENABLE, 0); + + /* free resources */ + mci = edac_mc_del_mc(&pdev->dev); + if (mci) + edac_mc_free(mci); + + return 0; +} + + +static const struct of_device_id aspeed_of_match[] = { + { .compatible = "aspeed,ast2500-sdram-edac" }, + {}, +}; + + +static struct platform_driver aspeed_driver = { + .driver = { + .name = DRV_NAME, + .of_match_table = aspeed_of_match + }, + .probe = aspeed_probe, + .remove = aspeed_remove +}; + + +static int __init aspeed_init(void) +{ + return platform_driver_register(&aspeed_driver); +} + + +static void __exit aspeed_exit(void) +{ + platform_driver_unregister(&aspeed_driver); +} + + +module_init(aspeed_init); +module_exit(aspeed_exit); + + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Stefan Schaeckeler <sschaeck@cisco.com>"); +MODULE_DESCRIPTION("Aspeed AST2500 EDAC driver"); +MODULE_VERSION("1.0"); diff --git a/drivers/edac/debugfs.c b/drivers/edac/debugfs.c index 92dbb7e2320c..0a9277228c50 100644 --- a/drivers/edac/debugfs.c +++ b/drivers/edac/debugfs.c @@ -41,14 +41,9 @@ static const struct file_operations debug_fake_inject_fops = { .llseek = generic_file_llseek, }; -int __init edac_debugfs_init(void) +void __init edac_debugfs_init(void) { edac_debugfs = debugfs_create_dir("edac", NULL); - if (IS_ERR(edac_debugfs)) { - edac_debugfs = NULL; - return -ENOMEM; - } - return 0; } void edac_debugfs_exit(void) @@ -56,50 +51,31 @@ void edac_debugfs_exit(void) debugfs_remove_recursive(edac_debugfs); } -int edac_create_debugfs_nodes(struct mem_ctl_info *mci) +void edac_create_debugfs_nodes(struct mem_ctl_info *mci) { - struct dentry *d, *parent; + struct dentry *parent; char name[80]; int i; - if (!edac_debugfs) - return -ENODEV; - - d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs); - if (!d) - return -ENOMEM; - parent = d; + parent = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs); for (i = 0; i < mci->n_layers; i++) { sprintf(name, "fake_inject_%s", edac_layer_name[mci->layers[i].type]); - d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent, - &mci->fake_inject_layer[i]); - if (!d) - goto nomem; + debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_layer[i]); } - d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent, - &mci->fake_inject_ue); - if (!d) - goto nomem; + debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_ue); - d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent, - &mci->fake_inject_count); - if (!d) - goto nomem; + debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent, + &mci->fake_inject_count); - d = debugfs_create_file("fake_inject", S_IWUSR, parent, - &mci->dev, - &debug_fake_inject_fops); - if (!d) - goto nomem; + debugfs_create_file("fake_inject", S_IWUSR, parent, &mci->dev, + &debug_fake_inject_fops); mci->debugfs = parent; - return 0; -nomem: - edac_debugfs_remove_recursive(mci->debugfs); - return -ENOMEM; } /* Create a toplevel dir under EDAC's debugfs hierarchy */ diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index dec88dcea036..dd7d0b509aa3 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -69,9 +69,9 @@ extern void *edac_align_ptr(void **p, unsigned size, int n_elems); #define edac_debugfs_remove_recursive debugfs_remove_recursive #define edac_debugfs_remove debugfs_remove #ifdef CONFIG_EDAC_DEBUG -int edac_debugfs_init(void); +void edac_debugfs_init(void); void edac_debugfs_exit(void); -int edac_create_debugfs_nodes(struct mem_ctl_info *mci); +void edac_create_debugfs_nodes(struct mem_ctl_info *mci); struct dentry *edac_debugfs_create_dir(const char *dirname); struct dentry * edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent); @@ -83,9 +83,9 @@ edac_debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 struct dentry * edac_debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); #else -static inline int edac_debugfs_init(void) { return -ENODEV; } +static inline void edac_debugfs_init(void) { } static inline void edac_debugfs_exit(void) { } -static inline int edac_create_debugfs_nodes(struct mem_ctl_info *mci) { return 0; } +static inline void edac_create_debugfs_nodes(struct mem_ctl_info *mci) { } static inline struct dentry *edac_debugfs_create_dir(const char *dirname) { return NULL; } static inline struct dentry * edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent) { return NULL; } diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c new file mode 100644 index 000000000000..c334fb7c63df --- /dev/null +++ b/drivers/edac/i10nm_base.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Intel(R) 10nm server memory controller. + * Copyright (c) 2019, Intel Corporation. + * + */ + +#include <linux/kernel.h> +#include <asm/cpu_device_id.h> +#include <asm/intel-family.h> +#include <asm/mce.h> +#include "edac_module.h" +#include "skx_common.h" + +#define I10NM_REVISION "v0.0.3" +#define EDAC_MOD_STR "i10nm_edac" + +/* Debug macros */ +#define i10nm_printk(level, fmt, arg...) \ + edac_printk(level, "i10nm", fmt, ##arg) + +#define I10NM_GET_SCK_BAR(d, reg) \ + pci_read_config_dword((d)->uracu, 0xd0, &(reg)) +#define I10NM_GET_IMC_BAR(d, i, reg) \ + pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg)) +#define I10NM_GET_DIMMMTR(m, i, j) \ + (*(u32 *)((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4)) +#define I10NM_GET_MCDDRTCFG(m, i, j) \ + (*(u32 *)((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4)) + +#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23) +#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12) +#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \ + GET_BITFIELD(reg, 0, 10) + 1) << 12) + +static struct list_head *i10nm_edac_list; + +static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus, + unsigned int dev, unsigned int fun) +{ + struct pci_dev *pdev; + + pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun)); + if (!pdev) { + edac_dbg(2, "No device %02x:%02x.%x\n", + bus, dev, fun); + return NULL; + } + + if (unlikely(pci_enable_device(pdev) < 0)) { + edac_dbg(2, "Failed to enable device %02x:%02x.%x\n", + bus, dev, fun); + return NULL; + } + + pci_dev_get(pdev); + + return pdev; +} + +static int i10nm_get_all_munits(void) +{ + struct pci_dev *mdev; + void __iomem *mbase; + unsigned long size; + struct skx_dev *d; + int i, j = 0; + u32 reg, off; + u64 base; + + list_for_each_entry(d, i10nm_edac_list, list) { + d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1); + if (!d->util_all) + return -ENODEV; + + d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1); + if (!d->uracu) + return -ENODEV; + + if (I10NM_GET_SCK_BAR(d, reg)) { + i10nm_printk(KERN_ERR, "Failed to socket bar\n"); + return -ENODEV; + } + + base = I10NM_GET_SCK_MMIO_BASE(reg); + edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n", + j++, base, reg); + + for (i = 0; i < I10NM_NUM_IMC; i++) { + mdev = pci_get_dev_wrapper(d->seg, d->bus[0], + 12 + i, 0); + if (i == 0 && !mdev) { + i10nm_printk(KERN_ERR, "No IMC found\n"); + return -ENODEV; + } + if (!mdev) + continue; + + d->imc[i].mdev = mdev; + + if (I10NM_GET_IMC_BAR(d, i, reg)) { + i10nm_printk(KERN_ERR, "Failed to get mc bar\n"); + return -ENODEV; + } + + off = I10NM_GET_IMC_MMIO_OFFSET(reg); + size = I10NM_GET_IMC_MMIO_SIZE(reg); + edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n", + i, base + off, size, reg); + + mbase = ioremap(base + off, size); + if (!mbase) { + i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", + base + off); + return -ENODEV; + } + + d->imc[i].mbase = mbase; + } + } + + return 0; +} + +static const struct x86_cpu_id i10nm_cpuids[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_X, 0, 0 }, + { } +}; +MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids); + +static bool i10nm_check_ecc(struct skx_imc *imc, int chan) +{ + u32 mcmtr; + + mcmtr = *(u32 *)(imc->mbase + 0x20ef8 + chan * 0x4000); + edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr); + + return !!GET_BITFIELD(mcmtr, 2, 2); +} + +static int i10nm_get_dimm_config(struct mem_ctl_info *mci) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + u32 mtr, mcddrtcfg; + int i, j, ndimms; + + for (i = 0; i < I10NM_NUM_CHANNELS; i++) { + if (!imc->mbase) + continue; + + ndimms = 0; + for (j = 0; j < I10NM_NUM_DIMMS; j++) { + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); + mtr = I10NM_GET_DIMMMTR(imc, i, j); + mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j); + edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n", + mtr, mcddrtcfg, imc->mc, i, j); + + if (IS_DIMM_PRESENT(mtr)) + ndimms += skx_get_dimm_info(mtr, 0, dimm, + imc, i, j); + else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) + ndimms += skx_get_nvdimm_info(dimm, imc, i, j, + EDAC_MOD_STR); + } + if (ndimms && !i10nm_check_ecc(imc, 0)) { + i10nm_printk(KERN_ERR, "ECC is disabled on imc %d\n", + imc->mc); + return -ENODEV; + } + } + + return 0; +} + +static struct notifier_block i10nm_mce_dec = { + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, +}; + +static int __init i10nm_init(void) +{ + u8 mc = 0, src_id = 0, node_id = 0; + const struct x86_cpu_id *id; + const char *owner; + struct skx_dev *d; + int rc, i, off[3] = {0xd0, 0xc8, 0xcc}; + u64 |