summaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2011-11-13 20:55:35 +0100
committerJiri Kosina <jkosina@suse.cz>2011-11-13 20:55:53 +0100
commit2290c0d06d82faee87b1ab2d9d4f7bf81ef64379 (patch)
treee075e4d5534193f28e6059904f61e5ca03958d3c /drivers/edac
parent4da669a2e3e5bc70b30a0465f3641528681b5f77 (diff)
parent52e4c2a05256cb83cda12f3c2137ab1533344edb (diff)
Merge branch 'master' into for-next
Sync with Linus tree to have 157550ff ("mtd: add GPMI-NAND driver in the config and Makefile") as I have patch depending on that one.
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig16
-rw-r--r--drivers/edac/Makefile2
-rw-r--r--drivers/edac/amd64_edac.c37
-rw-r--r--drivers/edac/cpc925_edac.c67
-rw-r--r--drivers/edac/edac_core.h350
-rw-r--r--drivers/edac/edac_mce.c61
-rw-r--r--drivers/edac/i7300_edac.c51
-rw-r--r--drivers/edac/i7core_edac.c415
-rw-r--r--drivers/edac/mce_amd.c46
-rw-r--r--drivers/edac/mce_amd.h6
-rw-r--r--drivers/edac/mce_amd_inj.c1
-rw-r--r--drivers/edac/ppc4xx_edac.c2
-rw-r--r--drivers/edac/sb_edac.c1893
13 files changed, 2418 insertions, 529 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index af1a17d42bd7..5948a2194f50 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -41,7 +41,7 @@ config EDAC_DEBUG
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
- depends on CPU_SUP_AMD && X86_MCE
+ depends on CPU_SUP_AMD && X86_MCE_AMD
default y
---help---
Enable this option if you want to decode Machine Check Exceptions
@@ -71,9 +71,6 @@ config EDAC_MM_EDAC
occurred so that a particular failing memory module can be
replaced. If unsure, select 'Y'.
-config EDAC_MCE
- bool
-
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h"
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
@@ -173,8 +170,7 @@ config EDAC_I5400
config EDAC_I7CORE
tristate "Intel i7 Core (Nehalem) processors"
- depends on EDAC_MM_EDAC && PCI && X86
- select EDAC_MCE
+ depends on EDAC_MM_EDAC && PCI && X86 && X86_MCE_INTEL
help
Support for error detection and correction the Intel
i7 Core (Nehalem) Integrated Memory Controller that exists on
@@ -216,6 +212,14 @@ config EDAC_I7300
Support for error detection and correction the Intel
Clarksboro MCH (Intel 7300 chipset).
+config EDAC_SBRIDGE
+ tristate "Intel Sandy-Bridge Integrated MC"
+ depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+ depends on EXPERIMENTAL
+ help
+ Support for error detection and correction the Intel
+ Sandy Bridge Integrated Memory Controller.
+
config EDAC_MPC85XX
tristate "Freescale MPC83xx / MPC85xx"
depends on EDAC_MM_EDAC && FSL_SOC && (PPC_83xx || PPC_85xx)
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 3e239133e29e..196a63dd37c5 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -8,7 +8,6 @@
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
-obj-$(CONFIG_EDAC_MCE) += edac_mce.o
edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o
edac_core-y += edac_module.o edac_device_sysfs.o
@@ -29,6 +28,7 @@ obj-$(CONFIG_EDAC_I5100) += i5100_edac.o
obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
+obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 9a8bebcf6b17..c9eee6d33e9a 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -114,10 +114,22 @@ static int f10_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
}
+/*
+ * Select DCT to which PCI cfg accesses are routed
+ */
+static void f15h_select_dct(struct amd64_pvt *pvt, u8 dct)
+{
+ u32 reg = 0;
+
+ amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
+ reg &= 0xfffffffe;
+ reg |= dct;
+ amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
+}
+
static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
const char *func)
{
- u32 reg = 0;
u8 dct = 0;
if (addr >= 0x140 && addr <= 0x1a0) {
@@ -125,10 +137,7 @@ static int f15_read_dct_pci_cfg(struct amd64_pvt *pvt, int addr, u32 *val,
addr -= 0x100;
}
- amd64_read_pci_cfg(pvt->F1, DCT_CFG_SEL, &reg);
- reg &= 0xfffffffe;
- reg |= dct;
- amd64_write_pci_cfg(pvt->F1, DCT_CFG_SEL, reg);
+ f15h_select_dct(pvt, dct);
return __amd64_read_pci_cfg_dword(pvt->F2, addr, val, func);
}
@@ -198,6 +207,10 @@ static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
if (boot_cpu_data.x86 == 0xf)
min_scrubrate = 0x0;
+ /* F15h Erratum #505 */
+ if (boot_cpu_data.x86 == 0x15)
+ f15h_select_dct(pvt, 0);
+
return __amd64_set_scrub_rate(pvt->F3, bw, min_scrubrate);
}
@@ -207,6 +220,10 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
u32 scrubval = 0;
int i, retval = -EINVAL;
+ /* F15h Erratum #505 */
+ if (boot_cpu_data.x86 == 0x15)
+ f15h_select_dct(pvt, 0);
+
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
scrubval = scrubval & 0x001F;
@@ -751,10 +768,10 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16);
* Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
* are ECC capable.
*/
-static enum edac_type amd64_determine_edac_cap(struct amd64_pvt *pvt)
+static unsigned long amd64_determine_edac_cap(struct amd64_pvt *pvt)
{
u8 bit;
- enum dev_type edac_cap = EDAC_FLAG_NONE;
+ unsigned long edac_cap = EDAC_FLAG_NONE;
bit = (boot_cpu_data.x86 > 0xf || pvt->ext_model >= K8_REV_F)
? 19
@@ -1953,11 +1970,9 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
amd64_handle_ue(mci, m);
}
-void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
+void amd64_decode_bus_error(int node_id, struct mce *m)
{
- struct mem_ctl_info *mci = mcis[node_id];
-
- __amd64_decode_bus_error(mci, m);
+ __amd64_decode_bus_error(mcis[node_id], m);
}
/*
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index a687a0d16962..a774c0ddaf5b 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -90,6 +90,7 @@ enum apimask_bits {
ECC_MASK_ENABLE = (APIMASK_ECC_UE_H | APIMASK_ECC_CE_H |
APIMASK_ECC_UE_L | APIMASK_ECC_CE_L),
};
+#define APIMASK_ADI(n) CPC925_BIT(((n)+1))
/************************************************************
* Processor Interface Exception Register (APIEXCP)
@@ -581,16 +582,73 @@ static void cpc925_mc_check(struct mem_ctl_info *mci)
}
/******************** CPU err device********************************/
+static u32 cpc925_cpu_mask_disabled(void)
+{
+ struct device_node *cpus;
+ struct device_node *cpunode = NULL;
+ static u32 mask = 0;
+
+ /* use cached value if available */
+ if (mask != 0)
+ return mask;
+
+ mask = APIMASK_ADI0 | APIMASK_ADI1;
+
+ cpus = of_find_node_by_path("/cpus");
+ if (cpus == NULL) {
+ cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
+ return 0;
+ }
+
+ while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
+ const u32 *reg = of_get_property(cpunode, "reg", NULL);
+
+ if (strcmp(cpunode->type, "cpu")) {
+ cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name);
+ continue;
+ }
+
+ if (reg == NULL || *reg > 2) {
+ cpc925_printk(KERN_ERR, "Bad reg value at %s\n", cpunode->full_name);
+ continue;
+ }
+
+ mask &= ~APIMASK_ADI(*reg);
+ }
+
+ if (mask != (APIMASK_ADI0 | APIMASK_ADI1)) {
+ /* We assume that each CPU sits on it's own PI and that
+ * for present CPUs the reg property equals to the PI
+ * interface id */
+ cpc925_printk(KERN_WARNING,
+ "Assuming PI id is equal to CPU MPIC id!\n");
+ }
+
+ of_node_put(cpunode);
+ of_node_put(cpus);
+
+ return mask;
+}
+
/* Enable CPU Errors detection */
static void cpc925_cpu_init(struct cpc925_dev_info *dev_info)
{
u32 apimask;
+ u32 cpumask;
apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
- if ((apimask & CPU_MASK_ENABLE) == 0) {
- apimask |= CPU_MASK_ENABLE;
- __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
+
+ cpumask = cpc925_cpu_mask_disabled();
+ if (apimask & cpumask) {
+ cpc925_printk(KERN_WARNING, "CPU(s) not present, "
+ "but enabled in APIMASK, disabling\n");
+ apimask &= ~cpumask;
}
+
+ if ((apimask & CPU_MASK_ENABLE) == 0)
+ apimask |= CPU_MASK_ENABLE;
+
+ __raw_writel(apimask, dev_info->vbase + REG_APIMASK_OFFSET);
}
/* Disable CPU Errors detection */
@@ -622,6 +680,9 @@ static void cpc925_cpu_check(struct edac_device_ctl_info *edac_dev)
if ((apiexcp & CPU_EXCP_DETECTED) == 0)
return;
+ if ((apiexcp & ~cpc925_cpu_mask_disabled()) == 0)
+ return;
+
apimask = __raw_readl(dev_info->vbase + REG_APIMASK_OFFSET);
cpc925_printk(KERN_INFO, "Processor Interface Fault\n"
"Processor Interface register dump:\n");
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 55b8278bb172..fe90cd4a7ebc 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -34,11 +34,10 @@
#include <linux/platform_device.h>
#include <linux/sysdev.h>
#include <linux/workqueue.h>
+#include <linux/edac.h>
-#define EDAC_MC_LABEL_LEN 31
#define EDAC_DEVICE_NAME_LEN 31
#define EDAC_ATTRIB_VALUE_LEN 15
-#define MC_PROC_NAME_MAX_LEN 7
#if PAGE_SHIFT < 20
#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
@@ -101,353 +100,6 @@ extern int edac_debug_level;
#define edac_dev_name(dev) (dev)->dev_name
-/* memory devices */
-enum dev_type {
- DEV_UNKNOWN = 0,
- DEV_X1,
- DEV_X2,
- DEV_X4,
- DEV_X8,
- DEV_X16,
- DEV_X32, /* Do these parts exist? */
- DEV_X64 /* Do these parts exist? */
-};
-
-#define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
-#define DEV_FLAG_X1 BIT(DEV_X1)
-#define DEV_FLAG_X2 BIT(DEV_X2)
-#define DEV_FLAG_X4 BIT(DEV_X4)
-#define DEV_FLAG_X8 BIT(DEV_X8)
-#define DEV_FLAG_X16 BIT(DEV_X16)
-#define DEV_FLAG_X32 BIT(DEV_X32)
-#define DEV_FLAG_X64 BIT(DEV_X64)
-
-/* memory types */
-enum mem_type {
- MEM_EMPTY = 0, /* Empty csrow */
- MEM_RESERVED, /* Reserved csrow type */
- MEM_UNKNOWN, /* Unknown csrow type */
- MEM_FPM, /* Fast page mode */
- MEM_EDO, /* Extended data out */
- MEM_BEDO, /* Burst Extended data out */
- MEM_SDR, /* Single data rate SDRAM */
- MEM_RDR, /* Registered single data rate SDRAM */
- MEM_DDR, /* Double data rate SDRAM */
- MEM_RDDR, /* Registered Double data rate SDRAM */
- MEM_RMBS, /* Rambus DRAM */
- MEM_DDR2, /* DDR2 RAM */
- MEM_FB_DDR2, /* fully buffered DDR2 */
- MEM_RDDR2, /* Registered DDR2 RAM */
- MEM_XDR, /* Rambus XDR */
- MEM_DDR3, /* DDR3 RAM */
- MEM_RDDR3, /* Registered DDR3 RAM */
-};
-
-#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
-#define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
-#define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
-#define MEM_FLAG_FPM BIT(MEM_FPM)
-#define MEM_FLAG_EDO BIT(MEM_EDO)
-#define MEM_FLAG_BEDO BIT(MEM_BEDO)
-#define MEM_FLAG_SDR BIT(MEM_SDR)
-#define MEM_FLAG_RDR BIT(MEM_RDR)
-#define MEM_FLAG_DDR BIT(MEM_DDR)
-#define MEM_FLAG_RDDR BIT(MEM_RDDR)
-#define MEM_FLAG_RMBS BIT(MEM_RMBS)
-#define MEM_FLAG_DDR2 BIT(MEM_DDR2)
-#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
-#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
-#define MEM_FLAG_XDR BIT(MEM_XDR)
-#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
-#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
-
-/* chipset Error Detection and Correction capabilities and mode */
-enum edac_type {
- EDAC_UNKNOWN = 0, /* Unknown if ECC is available */
- EDAC_NONE, /* Doesn't support ECC */
- EDAC_RESERVED, /* Reserved ECC type */
- EDAC_PARITY, /* Detects parity errors */
- EDAC_EC, /* Error Checking - no correction */
- EDAC_SECDED, /* Single bit error correction, Double detection */
- EDAC_S2ECD2ED, /* Chipkill x2 devices - do these exist? */
- EDAC_S4ECD4ED, /* Chipkill x4 devices */
- EDAC_S8ECD8ED, /* Chipkill x8 devices */
- EDAC_S16ECD16ED, /* Chipkill x16 devices */
-};
-
-#define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
-#define EDAC_FLAG_NONE BIT(EDAC_NONE)
-#define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
-#define EDAC_FLAG_EC BIT(EDAC_EC)
-#define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
-#define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
-#define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
-#define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
-#define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
-
-/* scrubbing capabilities */
-enum scrub_type {
- SCRUB_UNKNOWN = 0, /* Unknown if scrubber is available */
- SCRUB_NONE, /* No scrubber */
- SCRUB_SW_PROG, /* SW progressive (sequential) scrubbing */
- SCRUB_SW_SRC, /* Software scrub only errors */
- SCRUB_SW_PROG_SRC, /* Progressive software scrub from an error */
- SCRUB_SW_TUNABLE, /* Software scrub frequency is tunable */
- SCRUB_HW_PROG, /* HW progressive (sequential) scrubbing */
- SCRUB_HW_SRC, /* Hardware scrub only errors */
- SCRUB_HW_PROG_SRC, /* Progressive hardware scrub from an error */
- SCRUB_HW_TUNABLE /* Hardware scrub frequency is tunable */
-};
-
-#define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
-#define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
-#define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
-#define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
-#define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
-#define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
-#define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
-#define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
-
-/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
-
-/* EDAC internal operation states */
-#define OP_ALLOC 0x100
-#define OP_RUNNING_POLL 0x201
-#define OP_RUNNING_INTERRUPT 0x202
-#define OP_RUNNING_POLL_INTR 0x203
-#define OP_OFFLINE 0x300
-
-/*
- * There are several things to be aware of that aren't at all obvious:
- *
- *
- * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
- *
- * These are some of the many terms that are thrown about that don't always
- * mean what people think they mean (Inconceivable!). In the interest of
- * creating a common ground for discussion, terms and their definitions
- * will be established.
- *
- * Memory devices: The individual chip on a memory stick. These devices
- * commonly output 4 and 8 bits each. Grouping several
- * of these in parallel provides 64 bits which is common
- * for a memory stick.
- *
- * Memory Stick: A printed circuit board that aggregates multiple
- * memory devices in parallel. This is the atomic
- * memory component that is purchaseable by Joe consumer
- * and loaded into a memory socket.
- *
- * Socket: A physical connector on the motherboard that accepts
- * a single memory stick.
- *
- * Channel: Set of memory devices on a memory stick that must be
- * grouped in parallel with one or more additional
- * channels from other memory sticks. This parallel
- * grouping of the output from multiple channels are
- * necessary for the smallest granularity of memory access.
- * Some memory controllers are capable of single channel -
- * which means that memory sticks can be loaded
- * individually. Other memory controllers are only
- * capable of dual channel - which means that memory
- * sticks must be loaded as pairs (see "socket set").
- *
- * Chip-select row: All of the memory devices that are selected together.
- * for a single, minimum grain of memory access.
- * This selects all of the parallel memory devices across
- * all of the parallel channels. Common chip-select rows
- * for single channel are 64 bits, for dual channel 128
- * bits.
- *
- * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
- * Motherboards commonly drive two chip-select pins to
- * a memory stick. A single-ranked stick, will occupy
- * only one of those rows. The other will be unused.
- *
- * Double-Ranked stick: A double-ranked stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently.
- *
- * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
- * A double-sided stick has two chip-select rows which
- * access different sets of memory devices. The two
- * rows cannot be accessed concurrently. "Double-sided"
- * is irrespective of the memory devices being mounted
- * on both sides of the memory stick.
- *
- * Socket set: All of the memory sticks that are required for
- * a single memory access or all of the memory sticks
- * spanned by a chip-select row. A single socket set
- * has two chip-select rows and if double-sided sticks
- * are used these will occupy those chip-select rows.
- *
- * Bank: This term is avoided because it is unclear when
- * needing to distinguish between chip-select rows and
- * socket sets.
- *
- * Controller pages:
- *
- * Physical pages:
- *
- * Virtual pages:
- *
- *
- * STRUCTURE ORGANIZATION AND CHOICES
- *
- *
- *
- * PS - I enjoyed writing all that about as much as you enjoyed reading it.
- */
-
-struct channel_info {
- int chan_idx; /* channel index */
- u32 ce_count; /* Correctable Errors for this CHANNEL */
- char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
- struct csrow_info *csrow; /* the parent */
-};
-
-struct csrow_info {
- unsigned long first_page; /* first page number in dimm */
- unsigned long last_page; /* last page number in dimm */
- unsigned long page_mask; /* used for interleaving -
- * 0UL for non intlv
- */
- u32 nr_pages; /* number of pages in csrow */
- u32 grain; /* granularity of reported error in bytes */
- int csrow_idx; /* the chip-select row */
- enum dev_type dtype; /* memory device type */
- u32 ue_count; /* Uncorrectable Errors for this csrow */
- u32 ce_count; /* Correctable Errors for this csrow */
- enum mem_type mtype; /* memory csrow type */
- enum edac_type edac_mode; /* EDAC mode for this csrow */
- struct mem_ctl_info *mci; /* the parent */
-
- struct kobject kobj; /* sysfs kobject for this csrow */
-
- /* channel information for this csrow */
- u32 nr_channels;
- struct channel_info *channels;
-};
-
-struct mcidev_sysfs_group {
- const char *name; /* group name */
- const struct mcidev_sysfs_attribute *mcidev_attr; /* group attributes */
-};
-
-struct mcidev_sysfs_group_kobj {
- struct list_head list; /* list for all instances within a mc */
-
- struct kobject kobj; /* kobj for the group */
-
- const struct mcidev_sysfs_group *grp; /* group description table */
- struct mem_ctl_info *mci; /* the parent */
-};
-
-/* mcidev_sysfs_attribute structure
- * used for driver sysfs attributes and in mem_ctl_info
- * sysfs top level entries
- */
-struct mcidev_sysfs_attribute {
- /* It should use either attr or grp */
- struct attribute attr;
- const struct mcidev_sysfs_group *grp; /* Points to a group of attributes */
-
- /* Ops for show/store values at the attribute - not used on group */
- ssize_t (*show)(struct mem_ctl_info *,char *);
- ssize_t (*store)(struct mem_ctl_info *, const char *,size_t);
-};
-
-/* MEMORY controller information structure
- */
-struct mem_ctl_info {
- struct list_head link; /* for global list of mem_ctl_info structs */
-
- struct module *owner; /* Module owner of this control struct */
-
- unsigned long mtype_cap; /* memory types supported by mc */
- unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
- unsigned long edac_cap; /* configuration capabilities - this is
- * closely related to edac_ctl_cap. The
- * difference is that the controller may be
- * capable of s4ecd4ed which would be listed
- * in edac_ctl_cap, but if channels aren't
- * capable of s4ecd4ed then the edac_cap would
- * not have that capability.
- */
- unsigned long scrub_cap; /* chipset scrub capabilities */
- enum scrub_type scrub_mode; /* current scrub mode */
-
- /* Translates sdram memory scrub rate given in bytes/sec to the
- internal representation and configures whatever else needs
- to be configured.
- */
- int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw);
-
- /* Get the current sdram memory scrub rate from the internal
- representation and converts it to the closest matching
- bandwidth in bytes/sec.
- */
- int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
-
-
- /* pointer to edac checking routine */
- void (*edac_check) (struct mem_ctl_info * mci);
-
- /*
- * Remaps memory pages: controller pages to physical pages.
- * For most MC's, this will be NULL.
- */
- /* FIXME - why not send the phys page to begin with? */
- unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
- unsigned long page);
- int mc_idx;
- int nr_csrows;
- struct csrow_info *csrows;
- /*
- * FIXME - what about controllers on other busses? - IDs must be
- * unique. dev pointer should be sufficiently unique, but
- * BUS:SLOT.FUNC numbers may not be unique.
- */
- struct device *dev;
- const char *mod_name;
- const char *mod_ver;
- const char *ctl_name;
- const char *dev_name;
- char proc_name[MC_PROC_NAME_MAX_LEN + 1];
- void *pvt_info;
- u32 ue_noinfo_count; /* Uncorrectable Errors w/o info */
- u32 ce_noinfo_count; /* Correctable Errors w/o info */
- u32 ue_count; /* Total Uncorrectable Errors for this MC */
- u32 ce_count; /* Total Correctable Errors for this MC */
- unsigned long start_time; /* mci load start time (in jiffies) */
-
- struct completion complete;
-
- /* edac sysfs device control */
- struct kobject edac_mci_kobj;
-
- /* list for all grp instances within a mc */
- struct list_head grp_kobj_list;
-
- /* Additional top controller level attributes, but specified
- * by the low level driver.
- *
- * Set by the low level driver to provide attributes at the
- * controller level, same level as 'ue_count' and 'ce_count' above.
- * An array of structures, NULL terminated
- *
- * If attributes are desired, then set to array of attributes
- * If no attributes are desired, leave NULL
- */
- const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
-
- /* work struct for this MC */
- struct delayed_work work;
-
- /* the internal state of this controller instance */
- int op_state;
-};
-
/*
* The following are the structures to provide for a generic
* or abstract 'edac_device'. This set of structures and the
diff --git a/drivers/edac/edac_mce.c b/drivers/edac/edac_mce.c
deleted file mode 100644
index 9ccdc5b140e7..000000000000
--- a/drivers/edac/edac_mce.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Provides edac interface to mcelog events
- *
- * This file may be distributed under the terms of the
- * GNU General Public License version 2.
- *
- * Copyright (c) 2009 by:
- * Mauro Carvalho Chehab <mchehab@redhat.com>
- *
- * Red Hat Inc. http://www.redhat.com
- */
-
-#include <linux/module.h>
-#include <linux/edac_mce.h>
-#include <asm/mce.h>
-
-int edac_mce_enabled;
-EXPORT_SYMBOL_GPL(edac_mce_enabled);
-
-
-/*
- * Extension interface
- */
-
-static LIST_HEAD(edac_mce_list);
-static DEFINE_MUTEX(edac_mce_lock);
-
-int edac_mce_register(struct edac_mce *edac_mce)
-{
- mutex_lock(&edac_mce_lock);
- list_add_tail(&edac_mce->list, &edac_mce_list);
- mutex_unlock(&edac_mce_lock);
- return 0;
-}
-EXPORT_SYMBOL(edac_mce_register);
-
-void edac_mce_unregister(struct edac_mce *edac_mce)
-{
- mutex_lock(&edac_mce_lock);
- list_del(&edac_mce->list);
- mutex_unlock(&edac_mce_lock);
-}
-EXPORT_SYMBOL(edac_mce_unregister);
-
-int edac_mce_parse(struct mce *mce)
-{
- struct edac_mce *edac_mce;
-
- list_for_each_entry(edac_mce, &edac_mce_list, list) {
- if (edac_mce->check_error(edac_mce->priv, mce))
- return 1;
- }
-
- /* Nobody queued the error */
- return 0;
-}
-EXPORT_SYMBOL_GPL(edac_mce_parse);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
-MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
-MODULE_DESCRIPTION("EDAC Driver for mcelog captured errors");
diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c
index a76fe8366b68..6104dba380b6 100644
--- a/drivers/edac/i7300_edac.c
+++ b/drivers/edac/i7300_edac.c
@@ -372,7 +372,7 @@ static const char *get_err_from_table(const char *table[], int size, int pos)
static void i7300_process_error_global(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
- u32 errnum, value;
+ u32 errnum, error_reg;
unsigned long errors;
const char *specific;
bool is_fatal;
@@ -381,9 +381,9 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_HI, &value);
- if (unlikely(value)) {
- errors = value;
+ FERR_GLOBAL_HI, &error_reg);
+ if (unlikely(error_reg)) {
+ errors = error_reg;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_hi_name));
specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
@@ -391,15 +391,15 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_HI, value);
+ FERR_GLOBAL_HI, error_reg);
goto error_global;
}
pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, &value);
- if (unlikely(value)) {
- errors = value;
+ FERR_GLOBAL_LO, &error_reg);
+ if (unlikely(error_reg)) {
+ errors = error_reg;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_global_lo_name));
specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
@@ -407,7 +407,7 @@ static void i7300_process_error_global(struct mem_ctl_info *mci)
/* Clear the error bit */
pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, value);
+ FERR_GLOBAL_LO, error_reg);
goto error_global;
}
@@ -427,7 +427,7 @@ error_global:
static void i7300_process_fbd_error(struct mem_ctl_info *mci)
{
struct i7300_pvt *pvt;
- u32 errnum, value;
+ u32 errnum, value, error_reg;
u16 val16;
unsigned branch, channel, bank, rank, cas, ras;
u32 syndrome;
@@ -440,14 +440,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
- FERR_FAT_FBD, &value);
- if (unlikely(value & FERR_FAT_FBD_ERR_MASK)) {
- errors = value & FERR_FAT_FBD_ERR_MASK ;
+ FERR_FAT_FBD, &error_reg);
+ if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
+ errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_fat_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
+ branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
- branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMA, &val16);
bank = NRECMEMA_BANK(val16);
@@ -455,11 +455,14 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
NRECMEMB, &value);
-
is_wr = NRECMEMB_IS_WR(value);
cas = NRECMEMB_CAS(value);
ras = NRECMEMB_RAS(value);
+ /* Clean the error register */
+ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+ FERR_FAT_FBD, error_reg);
+
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
"FATAL (Branch=%d DRAM-Bank=%d %s "
"RAS=%d CAS=%d Err=0x%lx (%s))",
@@ -476,21 +479,17 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
/* read in the 1st NON-FATAL error register */
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
- FERR_NF_FBD, &value);
- if (unlikely(value & FERR_NF_FBD_ERR_MASK)) {
- errors = value & FERR_NF_FBD_ERR_MASK;
+ FERR_NF_FBD, &error_reg);
+ if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
+ errors = error_reg & FERR_NF_FBD_ERR_MASK;
errnum = find_first_bit(&errors,
ARRAY_SIZE(ferr_nf_fbd_name));
specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
-
- /* Clear the error bit */
- pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
- FERR_GLOBAL_LO, value);
+ branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMA, &syndrome);
- branch = (GET_FBD_FAT_IDX(value) == 2) ? 1 : 0;
pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMA, &val16);
bank = RECMEMA_BANK(val16);
@@ -498,18 +497,20 @@ static void i7300_process_fbd_error(struct mem_ctl_info *mci)
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
RECMEMB, &value);
-
is_wr = RECMEMB_IS_WR(value);
cas = RECMEMB_CAS(value);
ras = RECMEMB_RAS(value);
pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
REDMEMB, &value);
-
channel = (branch << 1);
if (IS_SECOND_CH(value))
channel++;
+ /* Clear the error bit */
+ pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
+ FERR_NF_FBD, error_reg);
+
/* Form out message */
snprintf(pvt->tmp_prt_buffer, PAGE_SIZE,
"Corrected error (Branch=%d, Channel %d), "
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index f6cf448d69b4..70ad8923f1d7 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -31,11 +31,13 @@
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
-#include <linux/edac_mce.h>
#include <linux/smp.h>
+#include <asm/mce.h>
#include <asm/processor.h>
+#include <asm/div64.h>
#include "edac_core.h"
@@ -78,6 +80,8 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
/* OFFSETS for Device 0 Function 0 */
#define MC_CFG_CONTROL 0x90
+ #define MC_CFG_UNLOCK 0x02
+ #define MC_CFG_LOCK 0x00
/* OFFSETS for Device 3 Function 0 */
@@ -98,6 +102,15 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
#define DIMM0_COR_ERR(r) ((r) & 0x7fff)
/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
+#define MC_SSRCONTROL 0x48
+ #define SSR_MODE_DISABLE 0x00
+ #define SSR_MODE_ENABLE 0x01
+ #define SSR_MODE_MASK 0x03
+
+#define MC_SCRUB_CONTROL 0x4c
+ #define STARTSCRUB (1 << 24)
+ #define SCRUBINTERVAL_MASK 0xffffff
+
#define MC_COR_ECC_CNT_0 0x80
#define MC_COR_ECC_CNT_1 0x84
#define MC_COR_ECC_CNT_2 0x88
@@ -253,10 +266,7 @@ struct i7core_pvt {
unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
- unsigned int is_registered;
-
- /* mcelog glue */
- struct edac_mce edac_mce;
+ bool is_registered, enable_scrub;
/* Fifo double buffers */
struct mce mce_entry[MCE_LOG_LEN];
@@ -268,6 +278,9 @@ struct i7core_pvt {
/* Count indicator to show errors not got */
unsigned mce_overrun;
+ /* DCLK Frequency used for computing scrub rate */
+ int dclk_freq;
+
/* Struct to control EDAC polling */
struct edac_pci_ctl_info *i7core_pci;
};
@@ -281,8 +294,7 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
/* Memory controller */
{ PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
{ PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
-
- /* Exists only for RDIMM */
+ /* Exists only for RDIMM */
{ PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
{ PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
@@ -303,6 +315,16 @@ static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
{ PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
{ PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
{ PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
+
+ /* Generic Non-core registers */
+ /*
+ * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
+ * On Xeon 55xx, however, it has a different id (8086:2c40). So,
+ * the probing code needs to test for the other address in case of
+ * failure of this one
+ */
+ { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
+
};
static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
@@ -319,6 +341,12 @@ static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
{ PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
{ PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
{ PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
+
+ /*
+ * This is the PCI device has an alternate address on some
+ * processors like Core i7 860
+ */
+ { PCI_DESCR( 0, 0, PCI