diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-25 18:07:36 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-11-25 18:07:36 -0800 |
commit | 9c91e6a5befb89d1494dd156dd4563f9e948a74f (patch) | |
tree | 8e3b4bf856d682c72b1b6c20f594a0763f044ddd /drivers | |
parent | 752272f16dd18f2cac58a583a8673c8e2fb93abb (diff) | |
parent | 5781823fd0d39082bfe2bbc20408aaa85a6e06ad (diff) |
Merge tag 'edac_for_5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov:
"A lot of changes this time around, details below.
From the next cycle onwards, we'll switch the EDAC tree to topic
branches (instead of a single edac-for-next branch) which should make
the changes handling more flexible, hopefully. We'll see.
Summary:
- Rework error logging functions to accept a count of errors
parameter (Hanna Hawa)
- Part one of substantial EDAC core + ghes_edac driver cleanup
(Robert Richter)
- Print additional useful logging information in skx_* (Tony Luck)
- Improve amd64_edac hw detection + cleanups (Yazen Ghannam)
- Misc cleanups, fixes and code improvements"
* tag 'edac_for_5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: (35 commits)
EDAC/altera: Use the Altera System Manager driver
EDAC/altera: Cleanup the ECC Manager
EDAC/altera: Use fast register IO for S10 IRQs
EDAC/ghes: Do not warn when incrementing refcount on 0
EDAC/Documentation: Describe CPER module definition and DIMM ranks
EDAC: Unify the mc_event tracepoint call
EDAC/ghes: Remove intermediate buffer pvt->detail_location
EDAC/ghes: Fix grain calculation
EDAC/ghes: Use standard kernel macros for page calculations
EDAC: Remove misleading comment in struct edac_raw_error_desc
EDAC/mc: Reduce indentation level in edac_mc_handle_error()
EDAC/mc: Remove needless zero string termination
EDAC/mc: Do not BUG_ON() in edac_mc_alloc()
EDAC: Introduce an mci_for_each_dimm() iterator
EDAC: Remove EDAC_DIMM_OFF() macro
EDAC: Replace EDAC_DIMM_PTR() macro with edac_get_dimm() function
EDAC/amd64: Get rid of the ECC disabled long message
EDAC/ghes: Fix locking and memory barrier issues
EDAC/amd64: Check for memory before fully initializing an instance
EDAC/amd64: Use cached data when checking for ECC
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/edac/altera_edac.c | 152 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 217 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 2 | ||||
-rw-r--r-- | drivers/edac/aspeed_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/edac_device.c | 50 | ||||
-rw-r--r-- | drivers/edac/edac_device.h | 54 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 138 | ||||
-rw-r--r-- | drivers/edac/edac_mc_sysfs.c | 49 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 128 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 3 | ||||
-rw-r--r-- | drivers/edac/i3200_edac.c | 3 | ||||
-rw-r--r-- | drivers/edac/i5000_edac.c | 5 | ||||
-rw-r--r-- | drivers/edac/i5100_edac.c | 16 | ||||
-rw-r--r-- | drivers/edac/i5400_edac.c | 18 | ||||
-rw-r--r-- | drivers/edac/i7300_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/i7core_edac.c | 3 | ||||
-rw-r--r-- | drivers/edac/ie31200_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/pnd2_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 23 | ||||
-rw-r--r-- | drivers/edac/skx_base.c | 54 | ||||
-rw-r--r-- | drivers/edac/skx_common.c | 65 | ||||
-rw-r--r-- | drivers/edac/skx_common.h | 4 | ||||
-rw-r--r-- | drivers/edac/ti_edac.c | 2 |
23 files changed, 468 insertions, 543 deletions
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index fbda4b876afd..e91cf1147a4e 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -14,6 +14,7 @@ #include <linux/interrupt.h> #include <linux/irqchip/chained_irq.h> #include <linux/kernel.h> +#include <linux/mfd/altera-sysmgr.h> #include <linux/mfd/syscon.h> #include <linux/notifier.h> #include <linux/of_address.h> @@ -275,7 +276,6 @@ release: return ret; } -static int socfpga_is_a10(void); static int altr_sdram_probe(struct platform_device *pdev) { const struct of_device_id *id; @@ -399,7 +399,7 @@ static int altr_sdram_probe(struct platform_device *pdev) goto err; /* Only the Arria10 has separate IRQs */ - if (socfpga_is_a10()) { + if (of_machine_is_compatible("altr,socfpga-arria10")) { /* Arria10 specific initialization */ res = a10_init(mc_vbase); if (res < 0) @@ -502,68 +502,6 @@ module_platform_driver(altr_sdram_edac_driver); #endif /* CONFIG_EDAC_ALTERA_SDRAM */ -/**************** Stratix 10 EDAC Memory Controller Functions ************/ - -/** - * s10_protected_reg_write - * Write to a protected SMC register. - * @context: Not used. - * @reg: Address of register - * @value: Value to write - * Return: INTEL_SIP_SMC_STATUS_OK (0) on success - * INTEL_SIP_SMC_REG_ERROR on error - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported - */ -static int s10_protected_reg_write(void *context, unsigned int reg, - unsigned int val) -{ - struct arm_smccc_res result; - unsigned long offset = (unsigned long)context; - - arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, offset + reg, val, 0, 0, - 0, 0, 0, &result); - - return (int)result.a0; -} - -/** - * s10_protected_reg_read - * Read the status of a protected SMC register - * @context: Not used. - * @reg: Address of register - * @value: Value read. - * Return: INTEL_SIP_SMC_STATUS_OK (0) on success - * INTEL_SIP_SMC_REG_ERROR on error - * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported - */ -static int s10_protected_reg_read(void *context, unsigned int reg, - unsigned int *val) -{ - struct arm_smccc_res result; - unsigned long offset = (unsigned long)context; - - arm_smccc_smc(INTEL_SIP_SMC_REG_READ, offset + reg, 0, 0, 0, - 0, 0, 0, &result); - - *val = (unsigned int)result.a1; - - return (int)result.a0; -} - -static const struct regmap_config s10_sdram_regmap_cfg = { - .name = "s10_ddr", - .reg_bits = 32, - .reg_stride = 4, - .val_bits = 32, - .max_register = 0xffd12228, - .reg_read = s10_protected_reg_read, - .reg_write = s10_protected_reg_write, - .use_single_read = true, - .use_single_write = true, -}; - -/************** </Stratix10 EDAC Memory Controller Functions> ***********/ - /************************* EDAC Parent Probe *************************/ static const struct of_device_id altr_edac_device_of_match[]; @@ -1008,16 +946,6 @@ static int __maybe_unused altr_init_memory_port(void __iomem *ioaddr, int port) return ret; } -static int socfpga_is_a10(void) -{ - return of_machine_is_compatible("altr,socfpga-arria10"); -} - -static int socfpga_is_s10(void) -{ - return of_machine_is_compatible("altr,socfpga-stratix10"); -} - static __init int __maybe_unused altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, u32 ecc_ctrl_en_mask, bool dual_port) @@ -1033,34 +961,10 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, /* Get the ECC Manager - parent of the device EDACs */ np_eccmgr = of_get_parent(np); - if (socfpga_is_a10()) { - ecc_mgr_map = syscon_regmap_lookup_by_phandle(np_eccmgr, - "altr,sysmgr-syscon"); - } else { - struct device_node *sysmgr_np; - struct resource res; - uintptr_t base; - - sysmgr_np = of_parse_phandle(np_eccmgr, - "altr,sysmgr-syscon", 0); - if (!sysmgr_np) { - edac_printk(KERN_ERR, EDAC_DEVICE, - "Unable to find altr,sysmgr-syscon\n"); - return -ENODEV; - } - - if (of_address_to_resource(sysmgr_np, 0, &res)) { - of_node_put(sysmgr_np); - return -ENOMEM; - } + ecc_mgr_map = + altr_sysmgr_regmap_lookup_by_phandle(np_eccmgr, + "altr,sysmgr-syscon"); - /* Need physical address for SMCC call */ - base = res.start; - - ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, - &s10_sdram_regmap_cfg); - of_node_put(sysmgr_np); - } of_node_put(np_eccmgr); if (IS_ERR(ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -1125,9 +1029,6 @@ static int __init __maybe_unused altr_init_a10_ecc_device_type(char *compat) int irq; struct device_node *child, *np; - if (!socfpga_is_a10() && !socfpga_is_s10()) - return -ENODEV; - np = of_find_compatible_node(NULL, NULL, "altr,socfpga-a10-ecc-manager"); if (!np) { @@ -2178,33 +2079,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev) platform_set_drvdata(pdev, edac); INIT_LIST_HEAD(&edac->a10_ecc_devices); - if (socfpga_is_a10()) { - edac->ecc_mgr_map = - syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "altr,sysmgr-syscon"); - } else { - struct device_node *sysmgr_np; - struct resource res; - uintptr_t base; - - sysmgr_np = of_parse_phandle(pdev->dev.of_node, - "altr,sysmgr-syscon", 0); - if (!sysmgr_np) { - edac_printk(KERN_ERR, EDAC_DEVICE, - "Unable to find altr,sysmgr-syscon\n"); - return -ENODEV; - } - - if (of_address_to_resource(sysmgr_np, 0, &res)) - return -ENOMEM; - - /* Need physical address for SMCC call */ - base = res.start; - - edac->ecc_mgr_map = devm_regmap_init(&pdev->dev, NULL, - (void *)base, - &s10_sdram_regmap_cfg); - } + edac->ecc_mgr_map = + altr_sysmgr_regmap_lookup_by_phandle(pdev->dev.of_node, + "altr,sysmgr-syscon"); if (IS_ERR(edac->ecc_mgr_map)) { edac_printk(KERN_ERR, EDAC_DEVICE, @@ -2270,18 +2147,7 @@ static int altr_edac_a10_probe(struct platform_device *pdev) if (!of_device_is_available(child)) continue; - if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc") || - of_device_is_compatible(child, "altr,socfpga-a10-ocram-ecc") || - of_device_is_compatible(child, "altr,socfpga-eth-mac-ecc") || - of_device_is_compatible(child, "altr,socfpga-nand-ecc") || - of_device_is_compatible(child, "altr,socfpga-dma-ecc") || - of_device_is_compatible(child, "altr,socfpga-usb-ecc") || - of_device_is_compatible(child, "altr,socfpga-qspi-ecc") || -#ifdef CONFIG_EDAC_ALTERA_SDRAM - of_device_is_compatible(child, "altr,sdram-edac-s10") || -#endif - of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc")) - + if (of_match_node(altr_edac_a10_device_of_match, child)) altr_edac_a10_device_add(edac, child); #ifdef CONFIG_EDAC_ALTERA_SDRAM diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c1d4536ae466..428ce98f6776 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -16,12 +16,11 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; +static struct amd64_family_type *fam_type; + /* Per-node stuff */ static struct ecc_settings **ecc_stngs; -/* Number of Unified Memory Controllers */ -static u8 num_umcs; - /* * Valid scrub rates for the K8 hardware memory scrubber. We map the scrubbing * bandwidth to a valid bit pattern. The 'set' operation finds the 'matching- @@ -454,7 +453,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < num_umcs; i++) + for (i = 0; i < fam_type->max_mcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -2224,6 +2223,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "K8", .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, + .max_mcs = 2, .ops = { .early_channel_count = k8_early_channel_count, .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, @@ -2234,6 +2234,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F10h", .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2244,6 +2245,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h", .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2254,6 +2256,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M30h", .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2264,6 +2267,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F15h_M60h", .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2274,6 +2278,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2284,6 +2289,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F16h_M30h", .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, + .max_mcs = 2, .ops = { .early_channel_count = f1x_early_channel_count, .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, @@ -2294,6 +2300,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h", .f0_id = PCI_DEVICE_ID_AMD_17H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2303,6 +2310,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h_M10h", .f0_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M10H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2312,6 +2320,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h_M30h", .f0_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M30H_DF_F6, + .max_mcs = 8, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2321,6 +2330,7 @@ static struct amd64_family_type family_types[] = { .ctl_name = "F17h_M70h", .f0_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F0, .f6_id = PCI_DEVICE_ID_AMD_17H_M70H_DF_F6, + .max_mcs = 2, .ops = { .early_channel_count = f17_early_channel_count, .dbam_to_cs = f17_addr_mask_to_cs_size, @@ -2838,8 +2848,6 @@ skip: edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); - - dump_misc_regs(pvt); } /* @@ -2936,6 +2944,7 @@ static int init_csrows_df(struct mem_ctl_info *mci) dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; + dimm->grain = 64; } } @@ -3012,6 +3021,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } @@ -3178,43 +3188,27 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, amd64_warn("Error restoring NB MCGCTL settings!\n"); } -/* - * EDAC requires that the BIOS have ECC enabled before - * taking over the processing of ECC errors. A command line - * option allows to force-enable hardware ECC later in - * enable_ecc_error_reporting(). - */ -static const char *ecc_msg = - "ECC disabled in the BIOS or no ECC capability, module will not load.\n" - " Either enable ECC checking or force module loading by setting " - "'ecc_enable_override'.\n" - " (Note that use of the override may cause unknown side effects.)\n"; - -static bool ecc_enabled(struct pci_dev *F3, u16 nid) +static bool ecc_enabled(struct amd64_pvt *pvt) { + u16 nid = pvt->mc_node_id; bool nb_mce_en = false; u8 ecc_en = 0, i; u32 value; if (boot_cpu_data.x86 >= 0x17) { u8 umc_en_mask = 0, ecc_en_mask = 0; + struct amd64_umc *umc; for_each_umc(i) { - u32 base = get_umc_base(i); + umc = &pvt->umc[i]; /* Only check enabled UMCs. */ - if (amd_smn_read(nid, base + UMCCH_SDP_CTRL, &value)) - continue; - - if (!(value & UMC_SDP_INIT)) + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) continue; umc_en_mask |= BIT(i); - if (amd_smn_read(nid, base + UMCCH_UMC_CAP_HI, &value)) - continue; - - if (value & UMC_ECC_ENABLED) + if (umc->umc_cap_hi & UMC_ECC_ENABLED) ecc_en_mask |= BIT(i); } @@ -3227,7 +3221,7 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) /* Assume UMC MCA banks are enabled. */ nb_mce_en = true; } else { - amd64_read_pci_cfg(F3, NBCFG, &value); + amd64_read_pci_cfg(pvt->F3, NBCFG, &value); ecc_en = !!(value & NBCFG_ECC_ENABLE); @@ -3240,11 +3234,10 @@ static bool ecc_enabled(struct pci_dev *F3, u16 nid) amd64_info("Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); - if (!ecc_en || !nb_mce_en) { - amd64_info("%s", ecc_msg); + if (!ecc_en || !nb_mce_en) return false; - } - return true; + else + return true; } static inline void @@ -3278,8 +3271,7 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci, - struct amd64_family_type *fam) +static void setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; @@ -3298,7 +3290,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = fam->ctl_name; + mci->ctl_name = fam_type->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; @@ -3312,8 +3304,6 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci, */ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) { - struct amd64_family_type *fam_type = NULL; - pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; @@ -3401,51 +3391,15 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -/* Set the number of Unified Memory Controllers in the system. */ -static void compute_num_umcs(void) -{ - u8 model = boot_cpu_data.x86_model; - - if (boot_cpu_data.x86 < 0x17) - return; - - if (model >= 0x30 && model <= 0x3f) - num_umcs = 8; - else - num_umcs = 2; - - edac_dbg(1, "Number of UMCs: %x", num_umcs); -} - -static int init_one_instance(unsigned int nid) +static int hw_info_get(struct amd64_pvt *pvt) { - struct pci_dev *F3 = node_to_amd_nb(nid)->misc; - struct amd64_family_type *fam_type = NULL; - struct mem_ctl_info *mci = NULL; - struct edac_mc_layer layers[2]; - struct amd64_pvt *pvt = NULL; u16 pci_id1, pci_id2; - int err = 0, ret; - - ret = -ENOMEM; - pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); - if (!pvt) - goto err_ret; - - pvt->mc_node_id = nid; - pvt->F3 = F3; - - ret = -EINVAL; - fam_type = per_family_init(pvt); - if (!fam_type) - goto err_free; + int ret = -EINVAL; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(num_umcs, sizeof(struct amd64_umc), GFP_KERNEL); - if (!pvt->umc) { - ret = -ENOMEM; - goto err_free; - } + pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; pci_id1 = fam_type->f0_id; pci_id2 = fam_type->f6_id; @@ -3454,21 +3408,37 @@ static int init_one_instance(unsigned int nid) pci_id2 = fam_type->f2_id; } - err = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); - if (err) - goto err_post_init; + ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); + if (ret) + return ret; read_mc_regs(pvt); + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + if (pvt->F0 || pvt->F1) + free_mc_sibling_devs(pvt); + + kfree(pvt->umc); +} + +static int init_one_instance(struct amd64_pvt *pvt) +{ + struct mem_ctl_info *mci = NULL; + struct edac_mc_layer layers[2]; + int ret = -EINVAL; + /* * We need to determine how many memory channels there are. Then use * that information for calculating the size of the dynamic instance * tables in the 'mci' structure. */ - ret = -EINVAL; pvt->channel_count = pvt->ops->early_channel_count(pvt); if (pvt->channel_count < 0) - goto err_siblings; + return ret; ret = -ENOMEM; layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; @@ -3480,24 +3450,18 @@ static int init_one_instance(unsigned int nid) * Always allocate two channels since we can have setups with DIMMs on * only one channel. Also, this simplifies handling later for the price * of a couple of KBs tops. - * - * On Fam17h+, the number of controllers may be greater than two. So set - * the size equal to the maximum number of UMCs. */ - if (pvt->fam >= 0x17) - layers[1].size = num_umcs; - else - layers[1].size = 2; + layers[1].size = fam_type->max_mcs; layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(nid, ARRAY_SIZE(layers), layers, 0); + mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); if (!mci) - goto err_siblings; + return ret; mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; - setup_mci_misc_attrs(mci, fam_type); + setup_mci_misc_attrs(mci); if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; @@ -3505,31 +3469,30 @@ static int init_one_instance(unsigned int nid) ret = -ENODEV; if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { edac_dbg(1, "failed edac_mc_add_mc()\n"); - goto err_add_mc; + edac_mc_free(mci); + return ret; } return 0; +} -err_add_mc: - edac_mc_free(mci); - -err_siblings: - free_mc_sibling_devs(pvt); - -err_post_init: - if (pvt->fam >= 0x17) - kfree(pvt->umc); +static bool instance_has_memory(struct amd64_pvt *pvt) +{ + bool cs_enabled = false; + int cs = 0, dct = 0; -err_free: - kfree(pvt); + for (dct = 0; dct < fam_type->max_mcs; dct++) { + for_each_chip_select(cs, dct, pvt) + cs_enabled |= csrow_enabled(cs, dct, pvt); + } -err_ret: - return ret; + return cs_enabled; } static int probe_one_instance(unsigned int nid) { struct pci_dev *F3 = node_to_amd_nb(nid)->misc; + struct amd64_pvt *pvt = NULL; struct ecc_settings *s; int ret; @@ -3540,8 +3503,29 @@ static int probe_one_instance(unsigned int nid) ecc_stngs[nid] = s; - if (!ecc_enabled(F3, nid)) { - ret = 0; + pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL); + if (!pvt) + goto err_settings; + + pvt->mc_node_id = nid; + pvt->F3 = F3; + + fam_type = per_family_init(pvt); + if (!fam_type) + goto err_enable; + + ret = hw_info_get(pvt); + if (ret < 0) + goto err_enable; + + ret = 0; + if (!instance_has_memory(pvt)) { + amd64_info("Node %d: No DIMMs detected.\n", nid); + goto err_enable; + } + + if (!ecc_enabled(pvt)) { + ret = -ENODEV; if (!ecc_enable_override) goto err_enable; @@ -3556,7 +3540,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - ret = init_one_instance(nid); + ret = init_one_instance(pvt); if (ret < 0) { amd64_err("Error probing instance: %d\n", nid); @@ -3566,9 +3550,15 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } + dump_misc_regs(pvt); + return ret; err_enable: + hw_info_put(pvt); + kfree(pvt); + +err_settings: kfree(s); ecc_stngs[nid] = NULL; @@ -3595,14 +3585,13 @@ static void remove_one_instance(unsigned int nid) restore_ecc_error_reporting(s, nid, F3); - free_mc_sibling_devs(pvt); - kfree(ecc_stngs[nid]); ecc_stngs[nid] = NULL; /* Free the EDAC CORE resources */ mci->pvt_info = NULL; + hw_info_put(pvt); kfree(pvt); edac_mc_free(mci); } @@ -3668,8 +3657,6 @@ static int __init amd64_edac_init(void) if (!msrs) goto err_free; - compute_num_umcs(); - for (i = 0; i < amd_nb_num(); i++) { err = probe_one_instance(i); if (err) { diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8c3cda81e619..9be31688110b 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -479,6 +479,8 @@ struct low_ops { struct amd64_family_type { const char *ctl_name; u16 f0_id, f1_id, f2_id, f6_id; + /* Maximum number of memory controllers per die/node. */ + u8 max_mcs; struct low_ops ops; }; diff --git a/drivers/edac/aspeed_edac.c b/drivers/edac/aspeed_edac.c index 5634437bb39d..09a9e3de9595 100644 --- a/drivers/edac/aspeed_edac.c +++ b/drivers/edac/aspeed_edac.c @@ -281,16 +281,11 @@ static int aspeed_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct edac_mc_layer layers[2]; struct mem_ctl_info *mci; - struct resource *res; void __iomem *regs; u32 reg04; int rc; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENOENT; - - regs = devm_ioremap_resource(dev, res); + regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) return PTR_ERR(regs); diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 65cf2b9355c4..8c4d947fb848 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -555,12 +555,16 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info return edac_dev->panic_on_ue; } -void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg) +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg) { struct edac_device_instance *instance; struct edac_device_block *block = NULL; + if (!count) + return; + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { edac_device_printk(edac_dev, KERN_ERR, "INTERNAL ERROR: 'instance' out of range " @@ -582,27 +586,31 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev, if (instance->nr_blocks > 0) { block = instance->blocks + block_nr; - block->counters.ce_count++; + block->counters.ce_count += count; } /* Propagate the count up the 'totals' tree */ - instance->counters.ce_count++; - edac_dev->counters.ce_count++; + instance->counters.ce_count += count; + edac_dev->counters.ce_count += count; if (edac_device_get_log_ce(edac_dev)) edac_device_printk(edac_dev, KERN_WARNING, - "CE: %s instance: %s block: %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + "CE: %s instance: %s block: %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); } -EXPORT_SYMBOL_GPL(edac_device_handle_ce); +EXPORT_SYMBOL_GPL(edac_device_handle_ce_count); -void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg) +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg) { struct edac_device_instance *instance; struct edac_device_block *block = NULL; + if (!count) + return; + if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) { edac_device_printk(edac_dev, KERN_ERR, "INTERNAL ERROR: 'instance' out of range " @@ -624,22 +632,22 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, if (instance->nr_blocks > 0) { block = instance->blocks + block_nr; - block->counters.ue_count++; + block->counters.ue_count += count; } /* Propagate the count up the 'totals' tree */ - instance->counters.ue_count++; - edac_dev->counters.ue_count++; + instance->counters.ue_count += count; + edac_dev->counters.ue_count += count; if (edac_device_get_log_ue(edac_dev)) edac_device_printk(edac_dev, KERN_EMERG, - "UE: %s instance: %s block: %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + "UE: %s instance: %s block: %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); if (edac_device_get_panic_on_ue(edac_dev)) - panic("EDAC %s: UE instance: %s block %s '%s'\n", - edac_dev->ctl_name, instance->name, - block ? block->name : "N/A", msg); + panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n", + edac_dev->ctl_name, instance->name, + block ? block->name : "N/A", count, msg); } -EXPORT_SYMBOL_GPL(edac_device_handle_ue); +EXPORT_SYMBOL_GPL(edac_device_handle_ue_count); diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h index 1aaba74ae411..c4c0e0bdce14 100644 --- a/drivers/edac/edac_device.h +++ b/drivers/edac/edac_device.h @@ -286,27 +286,60 @@ extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev); extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev); /** - * edac_device_handle_ue(): - * perform a common output and handling of an 'edac_dev' UE event + * Log correctable errors. * * @edac_dev: pointer to struct &edac_device_ctl_info - * @inst_nr: number of the instance where the UE error happened - * @block_nr: number of the block where the UE error happened + * @inst_nr: number of the instance where the CE error happened + * @count: Number of errors to log. + * @block_nr: number of the block where the CE error happened + * @msg: message to be printed + */ +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg); + +/** + * Log uncorrectable errors. + * + * @edac_dev: pointer to struct &edac_device_ctl_info + * @inst_nr: number of the instance where the CE error happened + * @count: Number of errors to log. + * @block_nr: number of the block where the CE error happened * @msg: message to be printed */ -extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev, - int inst_nr, int block_nr, const char *msg); +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev, + unsigned int count, int inst_nr, int block_nr, + const char *msg); + /** - * edac_device_handle_ce(): - * perform a common output and handling of an 'edac_dev' CE event + * edac_device_handle_ce(): Log a single correctable error * * @edac_dev: pointer to struct &edac_device_ctl_info * @inst_nr: number of the insta |