From ad6a32e96939a0eb0eb382e7d78dbf33457aed1a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 9 Mar 2010 12:46:00 +0100 Subject: amd64_edac: Sanitize syndrome extraction Remove the two syndrome extraction macros and add a single function which does the same thing but with proper typechecking. While at it, make sure to cache ECC syndrome size and dump it in debug output. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 83 ++++++++++++++++++++++++++++------------------- drivers/edac/amd64_edac.h | 5 +++ 2 files changed, 55 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ac9f7985096d..e8d84f89dbcf 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -796,6 +796,11 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); +static u16 extract_syndrome(struct err_regs *err) +{ + return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00); +} + static void amd64_cpu_display_info(struct amd64_pvt *pvt) { if (boot_cpu_data.x86 == 0x11) @@ -888,6 +893,9 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) return; } + amd64_printk(KERN_INFO, "using %s syndromes.\n", + ((pvt->syn_type == 8) ? "x8" : "x4")); + /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) amd64_dump_dramcfg_low(pvt->dclr1, 1); @@ -1101,20 +1109,17 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) } static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, - struct err_regs *info, - u64 sys_addr) + struct err_regs *err_info, u64 sys_addr) { struct mem_ctl_info *src_mci; - unsigned short syndrome; int channel, csrow; u32 page, offset; + u16 syndrome; - /* Extract the syndrome parts and form a 16-bit syndrome */ - syndrome = HIGH_SYNDROME(info->nbsl) << 8; - syndrome |= LOW_SYNDROME(info->nbsh); + syndrome = extract_syndrome(err_info); /* CHIPKILL enabled */ - if (info->nbcfg & K8_NBCFG_CHIPKILL) { + if (err_info->nbcfg & K8_NBCFG_CHIPKILL) { channel = get_channel_from_ecc_syndrome(mci, syndrome); if (channel < 0) { /* @@ -1123,8 +1128,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * as suspect. */ amd64_mc_printk(mci, KERN_WARNING, - "unknown syndrome 0x%x - possible error " - "reporting race\n", syndrome); + "unknown syndrome 0x%04x - possible " + "error reporting race\n", syndrome); edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); return; } @@ -1654,13 +1659,13 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, * (MCX_ADDR). */ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, - struct err_regs *info, + struct err_regs *err_info, u64 sys_addr) { struct amd64_pvt *pvt = mci->pvt_info; u32 page, offset; - unsigned short syndrome; int nid, csrow, chan = 0; + u16 syndrome; csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); @@ -1671,8 +1676,7 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, error_address_to_page_and_offset(sys_addr, &page, &offset); - syndrome = HIGH_SYNDROME(info->nbsl) << 8; - syndrome |= LOW_SYNDROME(info->nbsh); + syndrome = extract_syndrome(err_info); /* * We need the syndromes for channel detection only when we're @@ -1878,7 +1882,7 @@ static u16 x8_vectors[] = { }; static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs, - int v_dim) + int v_dim) { unsigned int i, err_sym; @@ -1955,23 +1959,23 @@ static int map_err_sym_to_channel(int err_sym, int sym_size) static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) { struct amd64_pvt *pvt = mci->pvt_info; - u32 value = 0; - int err_sym = 0; - - if (boot_cpu_data.x86 == 0x10) { - - amd64_read_pci_cfg(pvt->misc_f3_ctl, 0x180, &value); - - /* F3x180[EccSymbolSize]=1 => x8 symbols */ - if (boot_cpu_data.x86_model > 7 && - value & BIT(25)) { - err_sym = decode_syndrome(syndrome, x8_vectors, - ARRAY_SIZE(x8_vectors), 8); - return map_err_sym_to_channel(err_sym, 8); - } + int err_sym = -1; + + if (pvt->syn_type == 8) + err_sym = decode_syndrome(syndrome, x8_vectors, + ARRAY_SIZE(x8_vectors), + pvt->syn_type); + else if (pvt->syn_type == 4) + err_sym = decode_syndrome(syndrome, x4_vectors, + ARRAY_SIZE(x4_vectors), + pvt->syn_type); + else { + amd64_printk(KERN_WARNING, "%s: Illegal syndrome type: %u\n", + __func__, pvt->syn_type); + return err_sym; } - err_sym = decode_syndrome(syndrome, x4_vectors, ARRAY_SIZE(x4_vectors), 4); - return map_err_sym_to_channel(err_sym, 4); + + return map_err_sym_to_channel(err_sym, pvt->syn_type); } /* @@ -2284,6 +2288,7 @@ static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt) static void amd64_read_mc_registers(struct amd64_pvt *pvt) { u64 msr_val; + u32 tmp; int dram; /* @@ -2349,10 +2354,22 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt) amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); - if (!dct_ganging_enabled(pvt) && boot_cpu_data.x86 >= 0x10) { - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); + if (boot_cpu_data.x86 >= 0x10) { + if (!dct_ganging_enabled(pvt)) { + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); + } + amd64_read_pci_cfg(pvt->misc_f3_ctl, EXT_NB_MCA_CFG, &tmp); } + + if (boot_cpu_data.x86 == 0x10 && + boot_cpu_data.x86_model > 7 && + /* F3x180[EccSymbolSize]=1 => x8 symbols */ + tmp & BIT(25)) + pvt->syn_type = 8; + else + pvt->syn_type = 4; + amd64_dump_misc_regs(pvt); } diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 0d4bf5638243..707745b36733 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -382,6 +382,8 @@ enum { #define K8_NBCAP_SECDED BIT(3) #define K8_NBCAP_DCT_DUAL BIT(0) +#define EXT_NB_MCA_CFG 0x180 + /* MSRs */ #define K8_MSR_MCGCTL_NBE BIT(4) @@ -471,6 +473,9 @@ struct amd64_pvt { u32 dram_ctl_select_high; /* DRAM Controller Select High Reg */ u32 online_spare; /* On-Line spare Reg */ + /* x4 or x8 syndromes in use */ + u8 syn_type; + /* temp storage for when input is received from sysfs */ struct err_regs ctl_error_info; -- cgit v1.2.3 From 935ab88e341ccb1507b2b0b1f1e9adcbbd693265 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 15 Mar 2010 19:17:57 +0100 Subject: edac: Remove EDAC_DEBUG_VERBOSE This option differs from EDAC_DEBUG only by printing the file and line of where the debug statement is placed, which contains unneeded information. So remove it. Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/Kconfig | 8 -------- drivers/edac/edac_core.h | 15 --------------- 2 files changed, 23 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 0d2f9dbb47e4..70bb350de996 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -39,14 +39,6 @@ config EDAC_DEBUG there're four debug levels (x=0,1,2,3 from low to high). Usually you should select 'N'. -config EDAC_DEBUG_VERBOSE - bool "More verbose debugging" - depends on EDAC_DEBUG - help - This option makes debugging information more verbose. - Source file name and line number where debugging message - printed will be added to debugging message. - config EDAC_DECODE_MCE tristate "Decode MCEs in human-readable form (only on AMD for now)" depends on CPU_SUP_AMD && X86_MCE diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index efca9343d26a..ade4f1d70539 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -49,21 +49,15 @@ #define edac_printk(level, prefix, fmt, arg...) \ printk(level "EDAC " prefix ": " fmt, ##arg) -#define edac_printk_verbose(level, prefix, fmt, arg...) \ - printk(level "EDAC " prefix ": " "in %s, line at %d: " fmt, \ - __FILE__, __LINE__, ##arg) - #define edac_mc_printk(mci, level, fmt, arg...) \ printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg) #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) -/* edac_device printk */ #define edac_device_printk(ctl, level, fmt, arg...) \ printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) -/* edac_pci printk */ #define edac_pci_printk(ctl, level, fmt, arg...) \ printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) @@ -76,21 +70,12 @@ extern int edac_debug_level; extern const char *edac_mem_types[]; -#ifndef CONFIG_EDAC_DEBUG_VERBOSE #define edac_debug_printk(level, fmt, arg...) \ do { \ if (level <= edac_debug_level) \ edac_printk(KERN_DEBUG, EDAC_DEBUG, \ "%s: " fmt, __func__, ##arg); \ } while (0) -#else /* CONFIG_EDAC_DEBUG_VERBOSE */ -#define edac_debug_printk(level, fmt, arg...) \ - do { \ - if (level <= edac_debug_level) \ - edac_printk_verbose(KERN_DEBUG, EDAC_DEBUG, fmt, \ - ##arg); \ - } while (0) -#endif #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) #define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ ) -- cgit v1.2.3 From 695426506ebba6acc87843cca075595a775e8866 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 15 Mar 2010 19:39:18 +0100 Subject: amd64_edac: Remove unneeded defines All F2x110-related bit defines are used at only one place so replace them with simple BIT() macros. Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.h | 43 ++++++++----------------------------------- 1 file changed, 8 insertions(+), 35 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 707745b36733..613b9381e71a 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -244,44 +244,17 @@ #define F10_DCTL_SEL_LOW 0x110 - -#define dct_sel_baseaddr(pvt) \ - ((pvt->dram_ctl_select_low) & 0xFFFFF800) - -#define dct_sel_interleave_addr(pvt) \ - (((pvt->dram_ctl_select_low) >> 6) & 0x3) - -enum { - F10_DCTL_SEL_LOW_DctSelHiRngEn = BIT(0), - F10_DCTL_SEL_LOW_DctSelIntLvEn = BIT(2), - F10_DCTL_SEL_LOW_DctGangEn = BIT(4), - F10_DCTL_SEL_LOW_DctDatIntLv = BIT(5), - F10_DCTL_SEL_LOW_DramEnable = BIT(8), - F10_DCTL_SEL_LOW_MemCleared = BIT(10), -}; - -#define dct_high_range_enabled(pvt) \ - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelHiRngEn) - -#define dct_interleave_enabled(pvt) \ - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelIntLvEn) - -#define dct_ganging_enabled(pvt) \ - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctGangEn) - -#define dct_data_intlv_enabled(pvt) \ - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctDatIntLv) - -#define dct_dram_enabled(pvt) \ - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DramEnable) - -#define dct_memory_cleared(pvt) \ - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_MemCleared) - +#define dct_sel_baseaddr(pvt) ((pvt->dram_ctl_select_low) & 0xFFFFF800) +#define dct_sel_interleave_addr(pvt) (((pvt->dram_ctl_select_low) >> 6) & 0x3) +#define dct_high_range_enabled(pvt) (pvt->dram_ctl_select_low & BIT(0)) +#define dct_interleave_enabled(pvt) (pvt->dram_ctl_select_low & BIT(2)) +#define dct_ganging_enabled(pvt) (pvt->dram_ctl_select_low & BIT(4)) +#define dct_data_intlv_enabled(pvt) (pvt->dram_ctl_select_low & BIT(5)) +#define dct_dram_enabled(pvt) (pvt->dram_ctl_select_low & BIT(8)) +#define dct_memory_cleared(pvt) (pvt->dram_ctl_select_low & BIT(10)) #define F10_DCTL_SEL_HIGH 0x114 - /* * Function 3 - Misc Control */ -- cgit v1.2.3 From f4347553b30ec66530bfe63c84530afea3803396 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 15 May 2010 13:51:57 +0200 Subject: amd64_edac: Remove polling mechanism Switch to reusing the mcheck core's machine check polling mechanism instead of duplicating functionality by using the EDAC polling routine. Correct formatting while at it. Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.c | 118 -------------------------------------------- drivers/edac/edac_mce_amd.c | 16 +++--- 2 files changed, 8 insertions(+), 126 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e8d84f89dbcf..a44e90abb755 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1978,107 +1978,6 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) return map_err_sym_to_channel(err_sym, pvt->syn_type); } -/* - * Check for valid error in the NB Status High register. If so, proceed to read - * NB Status Low, NB Address Low and NB Address High registers and store data - * into error structure. - * - * Returns: - * - 1: if hardware regs contains valid error info - * - 0: if no valid error is indicated - */ -static int amd64_get_error_info_regs(struct mem_ctl_info *mci, - struct err_regs *regs) -{ - struct amd64_pvt *pvt; - struct pci_dev *misc_f3_ctl; - - pvt = mci->pvt_info; - misc_f3_ctl = pvt->misc_f3_ctl; - - if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSH, ®s->nbsh)) - return 0; - - if (!(regs->nbsh & K8_NBSH_VALID_BIT)) - return 0; - - /* valid error, read remaining error information registers */ - if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSL, ®s->nbsl) || - amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAL, ®s->nbeal) || - amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAH, ®s->nbeah) || - amd64_read_pci_cfg(misc_f3_ctl, K8_NBCFG, ®s->nbcfg)) - return 0; - - return 1; -} - -/* - * This function is called to retrieve the error data from hardware and store it - * in the info structure. - * - * Returns: - * - 1: if a valid error is found - * - 0: if no error is found - */ -static int amd64_get_error_info(struct mem_ctl_info *mci, - struct err_regs *info) -{ - struct amd64_pvt *pvt; - struct err_regs regs; - - pvt = mci->pvt_info; - - if (!amd64_get_error_info_regs(mci, info)) - return 0; - - /* - * Here's the problem with the K8's EDAC reporting: There are four - * registers which report pieces of error information. They are shared - * between CEs and UEs. Furthermore, contrary to what is stated in the - * BKDG, the overflow bit is never used! Every error always updates the - * reporting registers. - * - * Can you see the race condition? All four error reporting registers - * must be read before a new error updates them! There is no way to read - * all four registers atomically. The best than can be done is to detect - * that a race has occured and then report the error without any kind of - * precision. - * - * What is still positive is that errors are still reported and thus - * problems can still be detected - just not localized because the - * syndrome and address are spread out across registers. - * - * Grrrrr!!!!! Here's hoping that AMD fixes this in some future K8 rev. - * UEs and CEs should have separate register sets with proper overflow - * bits that are used! At very least the problem can be fixed by - * honoring the ErrValid bit in 'nbsh' and not updating registers - just - * set the overflow bit - unless the current error is CE and the new - * error is UE which would be the only situation for overwriting the - * current values. - */ - - regs = *info; - - /* Use info from the second read - most current */ - if (unlikely(!amd64_get_error_info_regs(mci, info))) - return 0; - - /* clear the error bits in hardware */ - pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT); - - /* Check for the possible race condition */ - if ((regs.nbsh != info->nbsh) || - (regs.nbsl != info->nbsl) || - (regs.nbeah != info->nbeah) || - (regs.nbeal != info->nbeal)) { - amd64_mc_printk(mci, KERN_WARNING, - "hardware STATUS read access race condition " - "detected!\n"); - return 0; - } - return 1; -} - /* * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR * ADDRESS and process. @@ -2202,20 +2101,6 @@ void amd64_decode_bus_error(int node_id, struct err_regs *regs) } -/* - * The main polling 'check' function, called FROM the edac core to perform the - * error checking and if an error is encountered, error processing. - */ -static void amd64_check(struct mem_ctl_info *mci) -{ - struct err_regs regs; - - if (amd64_get_error_info(mci, ®s)) { - struct amd64_pvt *pvt = mci->pvt_info; - amd_decode_nb_mce(pvt->mc_node_id, ®s, 1); - } -} - /* * Input: * 1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer @@ -2756,9 +2641,6 @@ static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci) mci->dev_name = pci_name(pvt->dram_f2_ctl); mci->ctl_page_to_phys = NULL; - /* IMPORTANT: Set the polling 'check' function in this module */ - mci->edac_check = amd64_check; - /* memory scrubber interface */ mci->set_sdram_scrub_rate = amd64_set_scrub_rate; mci->get_sdram_scrub_rate = amd64_get_scrub_rate; diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 97e64bcdbc06..bae9351e9473 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -133,7 +133,7 @@ static void amd_decode_dc_mce(u64 mc0_status) u32 ec = mc0_status & 0xffff; u32 xec = (mc0_status >> 16) & 0xf; - pr_emerg(" Data Cache Error"); + pr_emerg("Data Cache Error"); if (xec == 1 && TLB_ERROR(ec)) pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); @@ -176,7 +176,7 @@ static void amd_decode_ic_mce(u64 mc1_status) u32 ec = mc1_status & 0xffff; u32 xec = (mc1_status >> 16) & 0xf; - pr_emerg(" Instruction Cache Error"); + pr_emerg("Instruction Cache Error"); if (xec == 1 && TLB_ERROR(ec)) pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); @@ -233,7 +233,7 @@ static void amd_decode_bu_mce(u64 mc2_status) u32 ec = mc2_status & 0xffff; u32 xec = (mc2_status >> 16) & 0xf; - pr_emerg(" Bus Unit Error"); + pr_emerg("Bus Unit Error"); if (xec == 0x1) pr_cont(" in the write data buffers.\n"); @@ -275,7 +275,7 @@ static void amd_decode_ls_mce(u64 mc3_status) u32 ec = mc3_status & 0xffff; u32 xec = (mc3_status >> 16) & 0xf; - pr_emerg(" Load Store Error"); + pr_emerg("Load Store Error"); if (xec == 0x0) { u8 rrrr = (ec >> 4) & 0xf; @@ -304,7 +304,7 @@ void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors) if (TLB_ERROR(ec) && !report_gart_errors) return; - pr_emerg(" Northbridge Error, node %d", node_id); + pr_emerg("Northbridge Error, node %d", node_id); /* * F10h, revD can disable ErrCpu[3:0] so check that first and also the @@ -342,13 +342,13 @@ static void amd_decode_fr_mce(u64 mc5_status) static inline void amd_decode_err_code(unsigned int ec) { if (TLB_ERROR(ec)) { - pr_emerg(" Transaction: %s, Cache Level %s\n", + pr_emerg("Transaction: %s, Cache Level %s\n", TT_MSG(ec), LL_MSG(ec)); } else if (MEM_ERROR(ec)) { - pr_emerg(" Transaction: %s, Type: %s, Cache Level: %s", + pr_emerg("Transaction: %s, Type: %s, Cache Level: %s", RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); } else if (BUS_ERROR(ec)) { - pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s, " + pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, " "Participating Processor: %s\n", RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), PP_MSG(ec)); -- cgit v1.2.3 From 9975a5f22a4fcc8d08035c65439900a983f891ad Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 8 Mar 2010 18:29:35 +0100 Subject: amd64_edac: Fix DCT base address selector The correct check is to verify whether in high range we're below 4GB and not to extract the DctSelBaseAddr again. See "2.8.5 Routing DRAM Requests" in the F10h BKDG. Cc: # .32.x .33.x .34.x Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index a44e90abb755..4129aa0930cd 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1435,7 +1435,7 @@ static inline u64 f10_get_base_addr_offset(u64 sys_addr, int hi_range_sel, u64 chan_off; if (hi_range_sel) { - if (!(dct_sel_base_addr & 0xFFFFF800) && + if (!(dct_sel_base_addr & 0xFFFF0000) && hole_valid && (sys_addr >= 0x100000000ULL)) chan_off = hole_off << 16; else -- cgit v1.2.3 From bc57117856cf1e581135810b37d3b75f9d1749f5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 21 May 2010 21:25:03 +0200 Subject: amd64_edac: Correct scrub rate setting Exit early when setting scrub rate on unknown/unsupported families. Cc: # 32.x 33.x 34.x Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 4129aa0930cd..cdf457925f03 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -178,7 +178,7 @@ static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth) default: amd64_printk(KERN_ERR, "Unsupported family!\n"); - break; + return -EINVAL; } return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth, min_scrubrate); -- cgit v1.2.3 From eba042a81edd6baaff44831b2d719b14a6d21e58 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 25 May 2010 18:21:07 +0200 Subject: edac, mc: Improve scrub rate handling Fortify the interface to not accept negative values, remove memctrl_int_store() as a result. Also, sanitize bandwidth setting by making the argument a simple u32 instead of strange u32 pointer being passed around for no obvious reason. Then, fix error handling and teach it to return proper error values. Finally, make code more readable, simplify debug messages. Cc: Mauro Carvalho Chehab Cc: Arthur Jones Signed-off-by: Borislav Petkov Acked-by: Doug Thompson --- drivers/edac/amd64_edac.c | 6 ++-- drivers/edac/e752x_edac.c | 4 +-- drivers/edac/edac_core.h | 2 +- drivers/edac/edac_mc_sysfs.c | 86 +++++++++++++++++++------------------------- drivers/edac/i5100_edac.c | 7 ++-- 5 files changed, 46 insertions(+), 59 deletions(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index cdf457925f03..0106d343a681 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -160,7 +160,7 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, return 0; } -static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth) +static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth) { struct amd64_pvt *pvt = mci->pvt_info; u32 min_scrubrate = 0x0; @@ -180,8 +180,8 @@ static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth) amd64_printk(KERN_ERR, "Unsupported family!\n"); return -EINVAL; } - return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth, - min_scrubrate); + return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, bandwidth, + min_scrubrate); } static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index ae3f80c54198..073f5a06d238 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -958,7 +958,7 @@ static void e752x_check(struct mem_ctl_info *mci) } /* Program byte/sec bandwidth scrub rate to hardware */ -static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *new_bw) +static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) { const struct scrubrate *scrubrates; struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; @@ -975,7 +975,7 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *new_bw) * desired rate and program the cooresponding register value. */ for (i = 0; scrubrates[i].bandwidth != SDRATE_EOT; i++) - if (scrubrates[i].bandwidth >= *new_bw) + if (scrubrates[i].bandwidth >= new_bw) break; if (scrubrates[i].bandwidth == SDRATE_EOT) diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index ade4f1d70539..ce7146677e9b 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -378,7 +378,7 @@ struct mem_ctl_info { internal representation and configures whatever else needs to be configured. */ - int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); + int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); /* Get the current sdram memory scrub rate from the internal representation and converts it to the closest matching diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index c200c2fd43ea..8aad94d10c0c 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -124,19 +124,6 @@ static const char *edac_caps[] = { [EDAC_S16ECD16ED] = "S16ECD16ED" }; - - -static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) -{ - int *value = (int *)ptr; - - if (isdigit(*buffer)) - *value = simple_strtoul(buffer, NULL, 0); - - return count; -} - - /* EDAC sysfs CSROW data structures and methods */ @@ -450,53 +437,54 @@ static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci, /* memory scrubbing */ static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, - const char *data, size_t count) + const char *data, size_t count) { - u32 bandwidth = -1; + unsigned long bandwidth = 0; + int err; - if (mci->set_sdram_scrub_rate) { + if (!mci->set_sdram_scrub_rate) { + edac_printk(KERN_WARNING, EDAC_MC, + "Memory scrub rate setting not implemented!\n"); + return -EINVAL; + } - memctrl_int_store(&bandwidth, data, count); + if (strict_strtoul(data, 10, &bandwidth) < 0) + return -EINVAL; - if (!(*mci->set_sdram_scrub_rate) (mci, &bandwidth)) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set successfully, applied: %d\n", - bandwidth); - } else { - /* FIXME: error codes maybe? */ - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate set FAILED, could not apply: %d\n", - bandwidth); - } - } else { - /* FIXME: produce "not implemented" ERROR for user-side. */ - edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrubbing 'set'control is not implemented!\n"); + err = mci->set_sdram_scrub_rate(mci, (u32)bandwidth); + if (err) { + edac_printk(KERN_DEBUG, EDAC_MC, + "Failed setting scrub rate to %lu\n", bandwidth); + return -EINVAL; + } + else { + edac_printk(KERN_DEBUG, EDAC_MC, + "Scrub rate set to: %lu\n", bandwidth); + return count; } - return count; } static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) { - u32 bandwidth = -1; - - if (mci->get_sdram_scrub_rate) { - if (!(*mci->get_sdram_scrub_rate) (mci, &bandwidth)) { - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate successfully, fetched: %d\n", - bandwidth); - } else { - /* FIXME: error codes maybe? */ - edac_printk(KERN_DEBUG, EDAC_MC, - "Scrub rate fetch FAILED, got: %d\n", - bandwidth); - } - } else { - /* FIXME: produce "not implemented" ERROR for user-side. */ + u32 bandwidth = 0; + int err; + + if (!mci->get_sdram_scrub_rate) { edac_printk(KERN_WARNING, EDAC_MC, - "Memory scrubbing 'get' control is not implemented\n"); + "Memory scrub rate reading not implemented\n"); + return -EINVAL; + } + + err = mci->get_sdram_scrub_rate(mci, &bandwidth); + if (err) { + edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); + return err; + } + else { + edac_printk(KERN_DEBUG, EDAC_MC, + "Read scrub rate: %d\n", bandwidth); + return sprintf(data, "%d\n", bandwidth); } - return sprintf(data, "%d\n", bandwidth); } /* default attribute files for the MCI object */ diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index ee9753cf362c..f459a6c0886b 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -589,14 +589,13 @@ static void i5100_refresh_scrubbing(struct work_struct *work) /* * The bandwidth is based on experimentation, feel free to refine it. */ -static int i5100_set_scrub_rate(struct mem_ctl_info *mci, - u32 *bandwidth) +static int i5100_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth) { struct i5100_priv *priv = mci->pvt_info; u32 dw; pci_read_config_dword(priv->mc, I5100_MC, &dw); - if (*bandwidth) { + if (bandwidth) { priv->scrub_enable = 1; dw |= I5100_MC_SCRBEN_MASK; schedule_delayed_work(&(priv->i5100_scrubbing), @@ -610,7 +609,7 @@ static int i5100_set_scrub_rate(struct mem_ctl_info *mci, pci_read_config_dword(priv->mc, I5100_MC, &dw); - *bandwidth = 5900000 * i5100_mc_scrben(dw); + bandwidth = 5900000 * i5100_mc_scrben(dw); return 0; } -- cgit v1.2.3 From 962b70a1eb22c467b95756a290c694e73da17f41 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Aug 2010 16:51:28 +0200 Subject: amd64_edac: Fix operator precendence error The bitwise AND is of higher precedence, make that explicit. Cc: # 34.x Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0106d343a681..c29d24fe1856 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1683,7 +1683,7 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * ganged. Otherwise @chan should already contain the channel at * this point. */ - if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL) + if (dct_ganging_enabled(pvt) && (pvt->nbcfg & K8_NBCFG_CHIPKILL)) chan = get_channel_from_ecc_syndrome(mci, syndrome); if (chan >= 0) -- cgit v1.2.3 From c4799c7570475352c8c5de82ae938f7a02f206fa Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Aug 2010 17:25:18 +0200 Subject: amd64_edac: Minor formatting fix EDAC MC3: CE page 0xc32281, offset 0x8a0, grain 0, syndrome 0x1, row 2, channel 1, label "": amd64_edac EDAC MC3: CE - no information available: amd64_edacError Overflow Add the missing space before "Error Overflow" on the second line. Signed-off-by: Borislav Petkov --- drivers/edac/amd64_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index c29d24fe1856..670239ab7511 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2080,7 +2080,7 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, * catastrophic. */ if (info->nbsh & K8_NBSH_OVERFLOW) - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow"); + edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR " Error Overflow"); } void amd64_decode_bus_error(int node_id, struct err_regs *regs) -- cgit v1.2.3