diff options
Diffstat (limited to 'drivers/edac/amd64_edac.c')
-rw-r--r-- | drivers/edac/amd64_edac.c | 1251 |
1 files changed, 561 insertions, 690 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index a38831c82649..5fdd6daa40ea 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -19,26 +19,48 @@ static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES]; /* - * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only - * for DDR2 DRAM mapping. + * Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and + * later. */ -u32 revf_quad_ddr2_shift[] = { - 0, /* 0000b NULL DIMM (128mb) */ - 28, /* 0001b 256mb */ - 29, /* 0010b 512mb */ - 29, /* 0011b 512mb */ - 29, /* 0100b 512mb */ - 30, /* 0101b 1gb */ - 30, /* 0110b 1gb */ - 31, /* 0111b 2gb */ - 31, /* 1000b 2gb */ - 32, /* 1001b 4gb */ - 32, /* 1010b 4gb */ - 33, /* 1011b 8gb */ - 0, /* 1100b future */ - 0, /* 1101b future */ - 0, /* 1110b future */ - 0 /* 1111b future */ +static int ddr2_dbam_revCG[] = { + [0] = 32, + [1] = 64, + [2] = 128, + [3] = 256, + [4] = 512, + [5] = 1024, + [6] = 2048, +}; + +static int ddr2_dbam_revD[] = { + [0] = 32, + [1] = 64, + [2 ... 3] = 128, + [4] = 256, + [5] = 512, + [6] = 256, + [7] = 512, + [8 ... 9] = 1024, + [10] = 2048, +}; + +static int ddr2_dbam[] = { [0] = 128, + [1] = 256, + [2 ... 4] = 512, + [5 ... 6] = 1024, + [7 ... 8] = 2048, + [9 ... 10] = 4096, + [11] = 8192, +}; + +static int ddr3_dbam[] = { [0] = -1, + [1] = 256, + [2] = 512, + [3 ... 4] = -1, + [5 ... 6] = 1024, + [7 ... 8] = 2048, + [9 ... 10] = 4096, + [11] = 8192, }; /* @@ -164,11 +186,9 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) { struct amd64_pvt *pvt = mci->pvt_info; u32 scrubval = 0; - int status = -1, i, ret = 0; + int status = -1, i; - ret = pci_read_config_dword(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval); - if (ret) - debugf0("Reading K8_SCRCTRL failed\n"); + amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval); scrubval = scrubval & 0x001F; @@ -189,7 +209,7 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) /* Map from a CSROW entry to the mask entry that operates on it */ static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) return csrow; else return csrow >> 1; @@ -437,7 +457,7 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 base; /* only revE and later have the DRAM Hole Address Register */ - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_E) { + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_E) { debugf1(" revision %d for node %d does not support DHAR\n", pvt->ext_model, pvt->mc_node_id); return 1; @@ -743,21 +763,6 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow, *input_addr_max = base | mask | pvt->dcs_mask_notused; } -/* - * Extract error address from MCA NB Address Low (section 3.6.4.5) and MCA NB - * Address High (section 3.6.4.6) register values and return the result. Address - * is located in the info structure (nbeah and nbeal), the encoding is device - * specific. - */ -static u64 extract_error_address(struct mem_ctl_info *mci, - struct err_regs *info) -{ - struct amd64_pvt *pvt = mci->pvt_info; - - return pvt->ops->get_error_address(mci, info); -} - - /* Map the Error address to a PAGE and PAGE OFFSET. */ static inline void error_address_to_page_and_offset(u64 error_address, u32 *page, u32 *offset) @@ -787,7 +792,7 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } -static int get_channel_from_ecc_syndrome(unsigned short syndrome); +static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); static void amd64_cpu_display_info(struct amd64_pvt *pvt) { @@ -797,7 +802,7 @@ static void amd64_cpu_display_info(struct amd64_pvt *pvt) edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n"); else if (boot_cpu_data.x86 == 0xf) edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n", - (pvt->ext_model >= OPTERON_CPU_REV_F) ? + (pvt->ext_model >= K8_REV_F) ? "Rev F or later" : "Rev E or earlier"); else /* we'll hardly ever ever get here */ @@ -813,7 +818,7 @@ static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt) int bit; enum dev_type edac_cap = EDAC_FLAG_NONE; - bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= OPTERON_CPU_REV_F) + bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F) ? 19 : 17; @@ -824,111 +829,86 @@ static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt) } -static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt, - int ganged); +static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt); + +static void amd64_dump_dramcfg_low(u32 dclr, int chan) +{ + debugf1("F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); + + debugf1(" DIMM type: %sbuffered; all DIMMs support ECC: %s\n", + (dclr & BIT(16)) ? "un" : "", + (dclr & BIT(19)) ? "yes" : "no"); + + debugf1(" PAR/ERR parity: %s\n", + (dclr & BIT(8)) ? "enabled" : "disabled"); + + debugf1(" DCT 128bit mode width: %s\n", + (dclr & BIT(11)) ? "128b" : "64b"); + + debugf1(" x4 logical DIMMs present: L0: %s L1: %s L2: %s L3: %s\n", + (dclr & BIT(12)) ? "yes" : "no", + (dclr & BIT(13)) ? "yes" : "no", + (dclr & BIT(14)) ? "yes" : "no", + (dclr & BIT(15)) ? "yes" : "no"); +} /* Display and decode various NB registers for debug purposes. */ static void amd64_dump_misc_regs(struct amd64_pvt *pvt) { int ganged; - debugf1(" nbcap:0x%8.08x DctDualCap=%s DualNode=%s 8-Node=%s\n", - pvt->nbcap, - (pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "True" : "False", - (pvt->nbcap & K8_NBCAP_DUAL_NODE) ? "True" : "False", - (pvt->nbcap & K8_NBCAP_8_NODE) ? "True" : "False"); - debugf1(" ECC Capable=%s ChipKill Capable=%s\n", - (pvt->nbcap & K8_NBCAP_SECDED) ? "True" : "False", - (pvt->nbcap & K8_NBCAP_CHIPKILL) ? "True" : "False"); - debugf1(" DramCfg0-low=0x%08x DIMM-ECC=%s Parity=%s Width=%s\n", - pvt->dclr0, - (pvt->dclr0 & BIT(19)) ? "Enabled" : "Disabled", - (pvt->dclr0 & BIT(8)) ? "Enabled" : "Disabled", - (pvt->dclr0 & BIT(11)) ? "128b" : "64b"); - debugf1(" DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s DIMM Type=%s\n", - (pvt->dclr0 & BIT(12)) ? "Y" : "N", - (pvt->dclr0 & BIT(13)) ? "Y" : "N", - (pvt->dclr0 & BIT(14)) ? "Y" : "N", - (pvt->dclr0 & BIT(15)) ? "Y" : "N", - (pvt->dclr0 & BIT(16)) ? "UN-Buffered" : "Buffered"); - - - debugf1(" online-spare: 0x%8.08x\n", pvt->online_spare); + debugf1("F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); - if (boot_cpu_data.x86 == 0xf) { - debugf1(" dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n", - pvt->dhar, dhar_base(pvt->dhar), - k8_dhar_offset(pvt->dhar)); - debugf1(" DramHoleValid=%s\n", - (pvt->dhar & DHAR_VALID) ? "True" : "False"); + debugf1(" NB two channel DRAM capable: %s\n", + (pvt->nbcap & K8_NBCAP_DCT_DUAL) ? "yes" : "no"); - debugf1(" dbam-dkt: 0x%8.08x\n", pvt->dbam0); + debugf1(" ECC capable: %s, ChipKill ECC capable: %s\n", + (pvt->nbcap & K8_NBCAP_SECDED) ? "yes" : "no", + (pvt->nbcap & K8_NBCAP_CHIPKILL) ? "yes" : "no"); - /* everything below this point is Fam10h and above */ - return; + amd64_dump_dramcfg_low(pvt->dclr0, 0); - } else { - debugf1(" dhar: 0x%8.08x Base=0x%08x Offset=0x%08x\n", - pvt->dhar, dhar_base(pvt->dhar), - f10_dhar_offset(pvt->dhar)); - debugf1(" DramMemHoistValid=%s DramHoleValid=%s\n", - (pvt->dhar & F10_DRAM_MEM_HOIST_VALID) ? - "True" : "False", - (pvt->dhar & DHAR_VALID) ? - "True" : "False"); - } + debugf1("F3xB0 (Online Spare): 0x%08x\n", pvt->online_spare); - /* Only if NOT ganged does dcl1 have valid info */ - if (!dct_ganging_enabled(pvt)) { - debugf1(" DramCfg1-low=0x%08x DIMM-ECC=%s Parity=%s " - "Width=%s\n", pvt->dclr1, - (pvt->dclr1 & BIT(19)) ? "Enabled" : "Disabled", - (pvt->dclr1 & BIT(8)) ? "Enabled" : "Disabled", - (pvt->dclr1 & BIT(11)) ? "128b" : "64b"); - debugf1(" DIMM x4 Present: L0=%s L1=%s L2=%s L3=%s " - "DIMM Type=%s\n", - (pvt->dclr1 & BIT(12)) ? "Y" : "N", - (pvt->dclr1 & BIT(13)) ? "Y" : "N", - (pvt->dclr1 & BIT(14)) ? "Y" : "N", - (pvt->dclr1 & BIT(15)) ? "Y" : "N", - (pvt->dclr1 & BIT(16)) ? "UN-Buffered" : "Buffered"); + debugf1("F1xF0 (DRAM Hole Address): 0x%08x, base: 0x%08x, " + "offset: 0x%08x\n", + pvt->dhar, + dhar_base(pvt->dhar), + (boot_cpu_data.x86 == 0xf) ? k8_dhar_offset(pvt->dhar) + : f10_dhar_offset(pvt->dhar)); + + debugf1(" DramHoleValid: %s\n", + (pvt->dhar & DHAR_VALID) ? "yes" : "no"); + + /* everything below this point is Fam10h and above */ + if (boot_cpu_data.x86 == 0xf) { + amd64_debug_display_dimm_sizes(0, pvt); + return; } + /* Only if NOT ganged does dclr1 have valid info */ + if (!dct_ganging_enabled(pvt)) + amd64_dump_dramcfg_low(pvt->dclr1, 1); + /* * Determine if ganged and then dump memory sizes for first controller, * and if NOT ganged dump info for 2nd controller. */ ganged = dct_ganging_enabled(pvt); - f10_debug_display_dimm_sizes(0, pvt, ganged); + amd64_debug_display_dimm_sizes(0, pvt); if (!ganged) - f10_debug_display_dimm_sizes(1, pvt, ganged); + amd64_debug_display_dimm_sizes(1, pvt); } /* Read in both of DBAM registers */ static void amd64_read_dbam_reg(struct amd64_pvt *pvt) { - int err = 0; - unsigned int reg; - - reg = DBAM0; - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam0); - if (err) - goto err_reg; + amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM0, &pvt->dbam0); - if (boot_cpu_data.x86 >= 0x10) { - reg = DBAM1; - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dbam1); - - if (err) - goto err_reg; - } - - return; - -err_reg: - debugf0("Error reading F2x%03x.\n", reg); + if (boot_cpu_data.x86 >= 0x10) + amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM1, &pvt->dbam1); } /* @@ -963,7 +943,7 @@ err_reg: static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt) { - if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) { + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < K8_REV_F) { pvt->dcsb_base = REV_E_DCSB_BASE_BITS; pvt->dcsm_mask = REV_E_DCSM_MASK_BITS; pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS; @@ -991,28 +971,21 @@ static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt) */ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) { - int cs, reg, err = 0; + int cs, reg; amd64_set_dct_base_and_mask(pvt); for (cs = 0; cs < pvt->cs_count; cs++) { reg = K8_DCSB0 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsb0[cs]); - if (unlikely(err)) - debugf0("Reading K8_DCSB0[%d] failed\n", cs); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsb0[cs])) debugf0(" DCSB0[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsb0[cs], reg); /* If DCT are NOT ganged, then read in DCT1's base */ if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) { reg = F10_DCSB1 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsb1[cs]); - if (unlikely(err)) - debugf0("Reading F10_DCSB1[%d] failed\n", cs); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, + &pvt->dcsb1[cs])) debugf0(" DCSB1[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsb1[cs], reg); } else { @@ -1022,26 +995,20 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) for (cs = 0; cs < pvt->num_dcsm; cs++) { reg = K8_DCSM0 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsm0[cs]); - if (unlikely(err)) - debugf0("Reading K8_DCSM0 failed\n"); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsm0[cs])) debugf0(" DCSM0[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsm0[cs], reg); /* If DCT are NOT ganged, then read in DCT1's mask */ if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) { reg = F10_DCSM1 + (cs * 4); - err = pci_read_config_dword(pvt->dram_f2_ctl, reg, - &pvt->dcsm1[cs]); - if (unlikely(err)) - debugf0("Reading F10_DCSM1[%d] failed\n", cs); - else + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, + &pvt->dcsm1[cs])) debugf0(" DCSM1[%d]=0x%08x reg: F2x%x\n", cs, pvt->dcsm1[cs], reg); - } else + } else { pvt->dcsm1[cs] = 0; + } } } @@ -1049,18 +1016,16 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt) { enum mem_type type; - if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= OPTERON_CPU_REV_F) { - /* Rev F and later */ - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; + if (boot_cpu_data.x86 >= 0x10 || pvt->ext_model >= K8_REV_F) { + if (pvt->dchr0 & DDR3_MODE) + type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; + else + type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; } else { - /* Rev E and earlier */ type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; } - debugf1(" Memory type is: %s\n", - (type == MEM_DDR2) ? "MEM_DDR2" : - (type == MEM_RDDR2) ? "MEM_RDDR2" : - (type == MEM_DDR) ? "MEM_DDR" : "MEM_RDDR"); + debugf1(" Memory type is: %s\n", edac_mem_types[type]); return type; } @@ -1078,11 +1043,11 @@ static int k8_early_channel_count(struct amd64_pvt *pvt) { int flag, err = 0; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); + err = amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); if (err) return err; - if ((boot_cpu_data.x86_model >> 4) >= OPTERON_CPU_REV_F) { + if ((boot_cpu_data.x86_model >> 4) >= K8_REV_F) { /* RevF (NPT) and later */ flag = pvt->dclr0 & F10_WIDTH_128; } else { @@ -1114,22 +1079,15 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) { u32 low; u32 off = dram << 3; /* 8 bytes between DRAM entries */ - int err; - err = pci_read_config_dword(pvt->addr_f1_ctl, - K8_DRAM_BASE_LOW + off, &low); - if (err) - debugf0("Reading K8_DRAM_BASE_LOW failed\n"); + amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_BASE_LOW + off, &low); /* Extract parts into separate data entries */ pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8; pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7; pvt->dram_rw_en[dram] = (low & 0x3); - err = pci_read_config_dword(pvt->addr_f1_ctl, - K8_DRAM_LIMIT_LOW + off, &low); - if (err) - debugf0("Reading K8_DRAM_LIMIT_LOW failed\n"); + amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_LIMIT_LOW + off, &low); /* * Extract parts into separate data entries. Limit is the HIGHEST memory @@ -1142,7 +1100,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, struct err_regs *info, - u64 SystemAddress) + u64 sys_addr) { struct mem_ctl_info *src_mci; unsigned short syndrome; @@ -1155,7 +1113,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, /* CHIPKILL enabled */ if (info->nbcfg & K8_NBCFG_CHIPKILL) { - channel = get_channel_from_ecc_syndrome(syndrome); + channel = get_channel_from_ecc_syndrome(mci, syndrome); if (channel < 0) { /* * Syndrome didn't map, so we don't know which of the @@ -1177,64 +1135,46 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * was obtained from email communication with someone at AMD. * (Wish the email was placed in this comment - norsk) */ - channel = ((SystemAddress & BIT(3)) != 0); + channel = ((sys_addr & BIT(3)) != 0); } /* * Find out which node the error address belongs to. This may be * different from the node that detected the error. */ - src_mci = find_mc_by_sys_addr(mci, SystemAddress); + src_mci = find_mc_by_sys_addr(mci, sys_addr); if (!src_mci) { amd64_mc_printk(mci, KERN_ERR, "failed to map error address 0x%lx to a node\n", - (unsigned long)SystemAddress); + (unsigned long)sys_addr); edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); return; } - /* Now map the SystemAddress to a CSROW */ - csrow = sys_addr_to_csrow(src_mci, SystemAddress); + /* Now map the sys_addr to a CSROW */ + csrow = sys_addr_to_csrow(src_mci, sys_addr); if (csrow < 0) { edac_mc_handle_ce_no_info(src_mci, EDAC_MOD_STR); } else { - error_address_to_page_and_offset(SystemAddress, &page, &offset); + error_address_to_page_and_offset(sys_addr, &page, &offset); edac_mc_handle_ce(src_mci, page, offset, syndrome, csrow, channel, EDAC_MOD_STR); } } -/* - * determrine the number of PAGES in for this DIMM's size based on its DRAM - * Address Mapping. - * - * First step is to calc the number of bits to shift a value of 1 left to - * indicate show many pages. Start with the DBAM value as the starting bits, - * then proceed to adjust those shift bits, based on CPU rev and the table. - * See BKDG on the DBAM - */ -static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) +static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode) { - int nr_pages; + int *dbam_map; - if (pvt->ext_model >= OPTERON_CPU_REV_F) { - nr_pages = 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT); - } else { - /* - * RevE and less section; this line is tricky. It collapses the - * table used by RevD and later to one that matches revisions CG - * and earlier. - */ - dram_map -= (pvt->ext_model >= OPTERON_CPU_REV_D) ? - (dram_map > 8 ? 4 : (dram_map > 5 ? - 3 : (dram_map > 2 ? 1 : 0))) : 0; - - /* 25 shift is 32MiB minimum DIMM size in RevE and prior */ - nr_pages = 1 << (dram_map + 25 - PAGE_SHIFT); - } + if (pvt->ext_model >= K8_REV_F) + dbam_map = ddr2_dbam; + else if (pvt->ext_model >= K8_REV_D) + dbam_map = ddr2_dbam_revD; + else + dbam_map = ddr2_dbam_revCG; - return nr_pages; + return dbam_map[cs_mode]; } /* @@ -1248,34 +1188,24 @@ static int k8_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) static int f10_early_channel_count(struct amd64_pvt *pvt) { int dbams[] = { DBAM0, DBAM1 }; - int err = 0, channels = 0; - int i, j; + int i, j, channels = 0; u32 dbam; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); - if (err) - goto err_reg; - - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); - if (err) - goto err_reg; - /* If we are in 128 bit mode, then we are using 2 channels */ if (pvt->dclr0 & F10_WIDTH_128) { - debugf0("Data WIDTH is 128 bits - 2 channels\n"); channels = 2; return channels; } /* - * Need to check if in UN-ganged mode: In such, there are 2 channels, - * but they are NOT in 128 bit mode and thus the above 'dcl0' status bit - * will be OFF. + * Need to check if in unganged mode: In such, there are 2 channels, + * but they are not in 128 bit mode and thus the above 'dclr0' status + * bit will be OFF. * * Need to check DCT0[0] and DCT1[0] to see if only one of them has * their CSEnable bit on. If so, then SINGLE DIMM case. */ - debugf0("Data WIDTH is NOT 128 bits - need more decoding\n"); + debugf0("Data width is not 128 bits - need more decoding\n"); /* * Check DRAM Bank Address Mapping values for each DIMM to see if there @@ -1283,8 +1213,7 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) * both controllers since DIMMs can be placed in either one. */ for (i = 0; i < ARRAY_SIZE(dbams); i++) { - err = pci_read_config_dword(pvt->dram_f2_ctl, dbams[i], &dbam); - if (err) + if (amd64_read_pci_cfg(pvt->dram_f2_ctl, dbams[i], &dbam)) goto err_reg; for (j = 0; j < 4; j++) { @@ -1295,6 +1224,9 @@ static int f10_early_channel_count(struct amd64_pvt *pvt) } } + if (channels > 2) + channels = 2; + debugf0("MCT channel count: %d\n", channels); return channels; @@ -1304,9 +1236,16 @@ err_reg: } -static int f10_dbam_map_to_pages(struct amd64_pvt *pvt, int dram_map) +static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode) { - return 1 << (revf_quad_ddr2_shift[dram_map] - PAGE_SHIFT); + int *dbam_map; + + if (pvt->dchr0 & DDR3_MODE || pvt->dchr1 & DDR3_MODE) + dbam_map = ddr3_dbam; + else + dbam_map = ddr2_dbam; + + return dbam_map[cs_mode]; } /* Enable extended configuration access via 0xCF8 feature */ @@ -1314,7 +1253,7 @@ static void amd64_setup(struct amd64_pvt *pvt) { u32 reg; - pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); + amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG); reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG; @@ -1326,7 +1265,7 @@ static void amd64_teardown(struct amd64_pvt *pvt) { u32 reg; - pci_read_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); + amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, ®); reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG; if (pvt->flags.cf8_extcfg) @@ -1355,10 +1294,10 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) high_offset = F10_DRAM_BASE_HIGH + (dram << 3); /* read the 'raw' DRAM BASE Address register */ - pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_base); + amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_base); /* Read from the ECS data register */ - pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_base); + amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_base); /* Extract parts into separate data entries */ pvt->dram_rw_en[dram] = (low_base & 0x3); @@ -1375,13 +1314,10 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3); /* read the 'raw' LIMIT registers */ - pci_read_config_dword(pvt->addr_f1_ctl, low_offset, &low_limit); + amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_limit); /* Read from the ECS data register for the HIGH portion */ - pci_read_config_dword(pvt->addr_f1_ctl, high_offset, &high_limit); - - debugf0(" HW Regs: BASE=0x%08x-%08x LIMIT= 0x%08x-%08x\n", - high_base, low_base, high_limit, low_limit); + amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_limit); pvt->dram_DstNode[dram] = (low_limit & 0x7); pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7; @@ -1397,32 +1333,35 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) { - int err = 0; - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW, - &pvt->dram_ctl_select_low); - if (err) { - debugf0("Reading F10_DCTL_SEL_LOW failed\n"); - } else { - debugf0("DRAM_DCTL_SEL_LOW=0x%x DctSelBaseAddr=0x%x\n", - pvt->dram_ctl_select_low, dct_sel_baseaddr(pvt)); - - debugf0(" DRAM DCTs are=%s DRAM Is=%s DRAM-Ctl-" - "sel-hi-range=%s\n", - (dct_ganging_enabled(pvt) ? "GANGED" : "NOT GANGED"), - (dct_dram_enabled(pvt) ? "Enabled" : "Disabled"), - (dct_high_range_enabled(pvt) ? "Enabled" : "Disabled")); - - debugf0(" DctDatIntLv=%s MemCleared=%s DctSelIntLvAddr=0x%x\n", - (dct_data_intlv_enabled(pvt) ? "Enabled" : "Disabled"), - (dct_memory_cleared(pvt) ? "True " : "False "), + if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW, + &pvt->dram_ctl_select_low)) { + debugf0("F2x110 (DCTL Sel. Low): 0x%08x, " + "High range addresses at: 0x%x\n", + pvt->dram_ctl_select_low, + dct_sel_baseaddr(pvt)); + + debugf0(" DCT mode: %s, All DCTs on: %s\n", + (dct_ganging_enabled(pvt) ? "ganged" : "unganged"), + (dct_dram_enabled(pvt) ? "yes" : "no")); + + if (!dct_ganging_enabled(pvt)) + debugf0(" Address range split per DCT: %s\n", + (dct_high_range_enabled(pvt) ? "yes" : "no")); + + debugf0(" DCT data interleave for ECC: %s, " + "DRAM cleared since last warm reset: %s\n", + (dct_data_intlv_enabled(pvt) ? "enabled" : "disabled"), + (dct_memory_cleared(pvt) ? "yes" : "no")); + + debugf0(" DCT channel interleave: %s, " + "DCT interleave bits selector: 0x%x\n", + (dct_interleave_enabled(pvt) ? "enabled" : "disabled"), dct_sel_interleave_addr(pvt)); } - err = pci_read_config_dword(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH, - &pvt->dram_ctl_select_high); - if (err) - debugf0("Reading F10_DCTL_SEL_HIGH failed\n"); + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH, + &pvt->dram_ctl_select_high); } /* @@ -1706,10 +1645,11 @@ static int f10_translate_sysaddr_to_cs(struct amd64_pvt *pvt, u64 sys_addr, } /* - * This the F10h reference code from AMD to map a @sys_addr to NodeID, - * CSROW, Channel. + * For reference see "2.8.5 Routing DRAM Requests" in F10 BKDG. This code maps + * a @sys_addr to NodeID, DCT (channel) and chip select (CSROW). * - * The @sys_addr is usually an error address received from the hardware. + * The @sys_addr is usually an error address received from the hardware + * (MCX_ADDR). */ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, struct err_regs *info, @@ -1722,133 +1662,76 @@ static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); - if (csrow >= 0) { - error_address_to_page_and_offset(sys_addr, &page, &offset); - - syndrome = HIGH_SYNDROME(info->nbsl) << 8; - syndrome |= LOW_SYNDROME(info->nbsh); - - /* - * Is CHIPKILL on? If so, then we can attempt to use the - * syndrome to isolate which channel the error was on. - */ - if (pvt->nbcfg & K8_NBCFG_CHIPKILL) - chan = get_channel_from_ecc_syndrome(syndrome); - - if (chan >= 0) { - edac_mc_handle_ce(mci, page, offset, syndrome, - csrow, chan, EDAC_MOD_STR); - } else { - /* - * Channel unknown, report all channels on this - * CSROW as failed. - */ - for (chan = 0; chan < mci->csrows[csrow].nr_channels; - chan++) { - edac_mc_handle_ce(mci, page, offset, - syndrome, - csrow, chan, - EDAC_MOD_STR); - } - } - - } else { + if (csrow < 0) { edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); + return; } -} -/* - * Input (@index) is the DBAM DIMM value (1 of 4) used as an index into a shift - * table (revf_quad_ddr2_shift) which starts at 128MB DIMM size. Index of 0 - * indicates an empty DIMM slot, as reported by Hardware on empty slots. - * - * Normalize to 128MB by subracting 27 bit shift. - */ -static int map_dbam_to_csrow_size(int index) -{ - int mega_bytes = 0; + error_address_to_page_and_offset(sys_addr, &page, &offset); - if (index > 0 && index <= DBAM_MAX_VALUE) - mega_bytes = ((128 << (revf_quad_ddr2_shift[index]-27))); + syndrome = HIGH_SYNDROME(info->nbsl) << 8; + syndrome |= LOW_SYNDROME(info->nbsh); - return mega_bytes; + /* + * We need the syndromes for channel detection only when we're + * ganged. Otherwise @chan should already contain the channel at + * this point. + */ + if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL) + chan = get_channel_from_ecc_syndrome(mci, syndrome); + + if (chan >= 0) + edac_mc_handle_ce(mci, page, offset, syndrome, csrow, chan, + EDAC_MOD_STR); + else + /* + * Channel unknown, report all channels on this CSROW as failed. + */ + for (chan = 0; chan < mci->csrows[csrow].nr_channels; chan++) + edac_mc_handle_ce(mci, page, offset, syndrome, + csrow, chan, EDAC_MOD_STR); } /* - * debug routine to display the memory sizes of a DIMM (ganged or not) and it + * debug routine to display the memory sizes of all logical DIMMs and its * CSROWs as well */ -static void f10_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt, - int ganged) +static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt) { int dimm, size0, size1; u32 dbam; u32 *dcsb; - debugf1(" dbam%d: 0x%8.08x CSROW is %s\n", ctrl, - ctrl ? pvt->dbam1 : pvt->dbam0, - ganged ? "GANGED - dbam1 not used" : "NON-GANGED"); + if (boot_cpu_data.x86 == 0xf) { + /* K8 families < revF not supported yet */ + if (pvt->ext_model < K8_REV_F) + return; + else + WARN_ON(ctrl != 0); + } + + debugf1("F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", + ctrl, ctrl ? pvt->dbam1 : pvt->dbam0); dbam = ctrl ? pvt->dbam1 : pvt->dbam0; dcsb = ctrl ? pvt->dcsb1 : pvt->dcsb0; + edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); + /* Dump memory sizes for DIMM and its CSROWs */ for (dimm = 0; dimm < 4; dimm++) { size0 = 0; if (dcsb[dimm*2] & K8_DCSB_CS_ENABLE) - size0 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam)); + size0 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam)); size1 = 0; if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE) - size1 = map_dbam_to_csrow_size(DBAM_DIMM(dimm, dbam)); - - debugf1(" CTRL-%d DIMM-%d=%5dMB CSROW-%d=%5dMB " - "CSROW-%d=%5dMB\n", - ctrl, - dimm, - size0 + size1, - dimm * 2, - size0, - dimm * 2 + 1, - size1); - } -} - -/* - * Very early hardware probe on pci_probe thread to determine if this module - * supports the hardware. - * - * Return: - * 0 for OK - * 1 for error - */ -static int f10_probe_valid_hardware(struct amd64_pvt *pvt) -{ - int ret = 0; - - /* - * If we are on a DDR3 machine, we don't know yet if - * we support that properly at this time - */ - if ((pvt->dchr0 & F10_DCHR_Ddr3Mode) || - (pvt->dchr1 & F10_DCHR_Ddr3Mode)) { - - amd64_printk(KERN_WARNING, - "%s() This machine is running with DDR3 memory. " - "This is not currently supported. " - "DCHR0=0x%x DCHR1=0x%x\n", - __func__, pvt->dchr0, pvt->dchr1); - - amd64_printk(KERN_WARNING, - " Contact '%s' module MAINTAINER to help add" - " support.\n", - EDAC_MOD_STR); - - ret = 1; + size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam)); + edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n", + dimm * 2, size0, dimm * 2 + 1, size1); } - return ret; } /* @@ -1868,11 +1751,11 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC, .ops = { - .early_channel_count = k8_early_channel_count, - .get_error_address = k8_get_error_address, - .read_dram_base_limit = k8_read_dram_base_limit, - .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, - .dbam_map_to_pages = k8_dbam_map_to_pages, + .early_channel_count = k8_early_channel_count, + .get_error_address = k8_get_error_address, + .read_dram_base_limit = k8_read_dram_base_limit, + .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, + .dbam_to_cs = k8_dbam_to_chip_select, } }, [F10_CPUS] = { @@ -1880,13 +1763,12 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC, .ops = { - .probe_valid_hardware = f10_probe_valid_hardware, - .early_channel_count = f10_early_channel_count, - .get_error_address = f10_get_error_address, - .read_dram_base_limit = f10_read_dram_base_limit, - .read_dram_ctl_register = f10_read_dram_ctl_register, - .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, - .dbam_map_to_pages = f10_dbam_map_to_pages, + .early_channel_count = f10_early_channel_count, + .get_error_address = f10_get_error_address, + .read_dram_base_limit = f10_read_dram_base_limit, + .read_dram_ctl_register = f10_read_dram_ctl_register, + .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, + .dbam_to_cs = f10_dbam_to_chip_select, } }, [F11_CPUS] = { @@ -1894,13 +1776,12 @@ static struct amd64_family_type amd64_family_types[] = { .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP, .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC, .ops = { - .probe_valid_hardware = f10_probe_valid_hardware, - .early_channel_count = f10_early_channel_count, - .get_error_address = f10_get_error_address, - .read_dram_base_limit = f10_read_dram_base_limit, - .read_dram_ctl_register = f10_read_dram_ctl_register, - .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, - .dbam_map_to_pages = f10_dbam_map_to_pages, + .early_channel_count = f10_early_channel_count, + .get_error_address = f10_get_error_address, + .read_dram_base_limit = f10_read_dram_base_limit, + .read_dram_ctl_register = f10_read_dram_ctl_register, + .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow, + .dbam_to_cs = f10_dbam_to_chip_select, } }, }; @@ -1923,142 +1804,170 @@ static struct pci_dev *pci_get_related_function(unsigned int vendor, } /* - * syndrome mapping table for ECC ChipKill devices - * - * The comment in each row is the token (nibble) number that is in error. - * The least significant nibble of the syndrome is the mask for the bits - * that are in error (need to be toggled) for the particular nibble. - * - * Each row contains 16 entries. - * The first entry (0th) is the channel number for that row of syndromes. - * The remaining 15 entries are the syndromes for the respective Error - * bit mask index. - * - * 1st index entry is 0x0001 mask, indicating that the rightmost bit is the - * bit in error. - * The 2nd index entry is 0x0010 that the second bit is damaged. - * The 3rd index entry is 0x0011 indicating that the rightmost 2 bits - * are damaged. - * Thus so on until index 15, 0x1111, whose entry has the syndrome - * indicating that all 4 bits are damaged. - * - * A search is performed on this table looking for a given syndrome. + * These are tables of eigenvectors (one per line) which can be used for the + * construction of the syndrome tables. The modified syndrome search algorithm + * uses those to find the symbol in error and thus the DIMM. * - * See the AMD documentation for ECC syndromes. This ECC table is valid - * across all the versions of the AMD64 processors. - * - * A fast lookup is to use the LAST four bits of the 16-bit syndrome as a - * COLUMN index, then search all ROWS of that column, looking for a match - * with the input syndrome. The ROW value will be the token number. - * - * The 0'th entry on that row, can be returned as the CHANNEL (0 or 1) of this - * error. + * Algorithm courtesy of Ross LaFetra from AMD. */ -#define NUMBER_ECC_ROWS 36 -static const unsigned short ecc_chipkill_syndromes[NUMBER_ECC_ROWS][16] = { - /* Channel 0 syndromes */ - {/*0*/ 0, 0xe821, 0x7c32, 0x9413, 0xbb44, 0x5365, 0xc776, 0x2f57, - 0xdd88, 0x35a9, 0xa1ba, 0x499b, 0x66cc, 0x8eed, 0x1afe, 0xf2df }, - {/*1*/ 0, 0x5d31, 0xa612, 0xfb23, 0x9584, 0xc8b5, 0x3396, 0x6ea7, - 0xeac8, 0xb7f9, 0x4cda, 0x11eb, 0x7f4c, 0x227d, 0xd95e, 0x846f }, - {/*2*/ 0, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f }, - {/*3*/ 0, 0x2021, 0x3032, 0x1013, 0x4044, 0x6065, 0x7076, 0x5057, - 0x8088, 0xa0a9, 0xb0ba, 0x909b, 0xc0cc, 0xe0ed, 0xf0fe, 0xd0df }, - {/*4*/ 0, 0x5041, 0xa082, 0xf0c3, 0x9054, 0xc015, 0x30d6, 0x6097, - 0xe0a8, 0xb0e9, 0x402a, 0x106b, 0x70fc, 0x20bd, 0xd07e, 0x803f }, - {/*5*/ 0, 0xbe21, 0xd732, 0x6913, 0x2144, 0x9f65, 0xf676, 0x4857, - 0x3288, 0x8ca9, 0xe5ba, 0x5b9b, 0x13cc, 0xaded, 0xc4fe, 0x7adf }, - {/*6*/ 0, 0x4951, 0x8ea2, 0xc7f3, 0x5394, 0x1ac5, 0xdd36, 0x9467, - 0xa1e8, 0xe8b9, 0x2f4a, 0x661b, 0xf27c, 0xbb2d, 0x7cde, 0x358f }, - {/*7*/ 0, 0x74e1, 0x9872, 0xec93, 0xd6b4, 0xa255, 0x4ec6, 0x3a27, - 0x6bd8, 0x1f39, 0xf3aa, 0x874b, 0xbd6c, 0xc98d, 0x251e, 0x51ff }, - {/*8*/ 0, 0x15c1, 0x2a42, 0x3f83, 0xcef4, 0xdb35, 0xe4b6, 0xf177, - 0x4758, 0x5299, 0x6d1a, 0x78db, 0x89ac, 0x9c6d, 0xa3ee, 0xb62f }, - {/*9*/ 0, 0x3d01, 0x1602, 0x2b03, 0x8504, 0xb805, 0x9306, 0xae07, - |