From 80f5ab097b87c86581cb9736a8e55c5a3047d4bb Mon Sep 17 00:00:00 2001
From: Shaun Ruffell <sruffell@digium.com>
Date: Sun, 19 Aug 2012 01:11:24 -0300
Subject: edac: edac_mc no longer deals with kobjects directly

There are no more embedded kobjects in struct mem_ctl_info. Remove a header and
a comment that does not reflect the code anymore.

Signed-off-by: Shaun Ruffell <sruffell@digium.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 7 -------
 include/linux/edac.h   | 1 -
 2 files changed, 8 deletions(-)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 281f566a5513..a641f623fffd 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -441,13 +441,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
 
 	mci->op_state = OP_ALLOC;
 
-	/* at this point, the root kobj is valid, and in order to
-	 * 'free' the object, then the function:
-	 *      edac_mc_unregister_sysfs_main_kobj() must be called
-	 * which will perform kobj unregistration and the actual free
-	 * will occur during the kobject callback operation
-	 */
-
 	return mci;
 
 error:
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 1b8c02b36f76..4784213c819d 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -14,7 +14,6 @@
 
 #include <linux/atomic.h>
 #include <linux/device.h>
-#include <linux/kobject.h>
 #include <linux/completion.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
-- 
cgit v1.2.3


From da14d93d95ee3923b6e690220bdb7ce191e76d95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 25 Oct 2012 09:07:21 -0200
Subject: sb_edac: add a missing /n on a debug message

[   17.024963] EDAC DEBUG: get_memory_layout: TOHM: 132.160 GB (0x0000002043ffffff)<7>[   17.024971] EDAC DEBUG: get_memory_layout: SAD#0 DRAM up to 33.792 GB (0x0000000840000000) Interleave: 8:6 reg=0x000083c3

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/sb_edac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 5715b7c2c517..9ad24242ea18 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 	tmp_mb = (1 + pvt->tohm) >> 20;
 
 	mb = div_u64_rem(tmp_mb, 1000, &kb);
-	edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm);
+	edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm);
 
 	/*
 	 * Step 2) Get SAD range and SAD Interleave list
-- 
cgit v1.2.3


From c31d34fe92e9d0b146907d7294269ee03e1b403f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= <niso@kth.se>
Date: Sun, 29 Jan 2012 23:04:32 +0100
Subject: i7core_edac: fix erroneous size of static array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove size from lookup arrays and mark them as const.

Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Niklas Söderlund <niso@kth.se>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i7core_edac.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 10c8c00d6469..ad5f934c95d3 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms)
 
 static inline int numrank(u32 rank)
 {
-	static int ranks[4] = { 1, 2, 4, -EINVAL };
+	static const int ranks[] = { 1, 2, 4, -EINVAL };
 
 	return ranks[rank & 0x3];
 }
 
 static inline int numbank(u32 bank)
 {
-	static int banks[4] = { 4, 8, 16, -EINVAL };
+	static const int banks[] = { 4, 8, 16, -EINVAL };
 
 	return banks[bank & 0x3];
 }
 
 static inline int numrow(u32 row)
 {
-	static int rows[8] = {
+	static const int rows[] = {
 		1 << 12, 1 << 13, 1 << 14, 1 << 15,
 		1 << 16, -EINVAL, -EINVAL, -EINVAL,
 	};
@@ -444,7 +444,7 @@ static inline int numrow(u32 row)
 
 static inline int numcol(u32 col)
 {
-	static int cols[8] = {
+	static const int cols[] = {
 		1 << 10, 1 << 11, 1 << 12, -EINVAL,
 	};
 	return cols[col & 0x3];
-- 
cgit v1.2.3


From 1c069100c1f5577ecde06b3a366b73f520854c4e Mon Sep 17 00:00:00 2001
From: Lans Zhang <lans.zhang2008@gmail.com>
Date: Thu, 20 Dec 2012 10:16:07 +0800
Subject: i7core_edac: fix kernel crash on unloading i7core_edac.

It is easy to trigger this crash on 3.7.0:

root@intel_westmere_ep-3:~# modprobe -r i7core_edac
EDAC PCI: Removed device 0 for i7core_edac EDAC PCI controller: DEV 0000:fe:03.0
EDAC MC: Removed device 1 for i7core_edac.c i7 core #1: DEV 0000:fe:03.0
EDAC PCI: Removed device 1 for i7core_edac EDAC PCI controller: DEV 0000:ff:03.0
EDAC MC: Removed device 0 for i7core_edac.c i7 core #0: DEV 0000:ff:03.0
BUG: unable to handle kernel NULL pointer dereference at 0000000000000110
IP: [<ffffffff82069ee9>] __blocking_notifier_call_chain+0x29/0x80
PGD 1eaae7067 PUD 1e96e4067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
Modules linked in: minix acpi_cpufreq freq_table mperf ioatdma processor edac_core(-) iTCO_wdt coretemp evdev hwmon lpc_ich dca mfd_core crc32c_intel ioapic [last unloaded: i7core_edac]
CPU 3
Pid: 1268, comm: modprobe Not tainted 3.7.0-WR5.0.1.0_standard+ #30 Intel Corporation S5520HC/S5520HC
RIP: 0010:[<ffffffff82069ee9>]  [<ffffffff82069ee9>] __blocking_notifier_call_chain+0x29/0x80
RSP: 0018:ffff8801eb12de28  EFLAGS: 00010246
RAX: 0000000000000000 RBX: 00000000000000f0 RCX: 00000000ffffffff
RDX: ffff88012b452800 RSI: 0000000000000002 RDI: 00000000000000f0
RBP: ffff8801eb12de68 R08: 0000000000000000 R09: ffffea0004ad1118
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: ffff8801eb12dee8 R14: ffff88012b452800 R15: 000000000060e518
FS:  00007f9ea95a9700(0000) GS:ffff8801efc20000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 0000000000000110 CR3: 00000001262f1000 CR4: 00000000000007e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process modprobe (pid: 1268, threadinfo ffff8801eb12c000, task ffff8801e8421690)
Stack:
  ffff88012c802a00 ffff88012b445ec0 ffff88012c802300 ffff88012b452800
  0000000000000000 ffff8801eb12dee8 000000000060e080 000000000060e518
  ffff8801eb12de78 ffffffff82069f56 ffff8801eb12dea8 ffffffff824ead7c
Call Trace:
  [<ffffffff82069f56>] blocking_notifier_call_chain+0x16/0x20
  [<ffffffff824ead7c>] device_del+0x3c/0x1d0
  [<ffffffffa00095a8>] edac_mc_sysfs_exit+0x1c/0x2f [edac_core]
  [<ffffffffa000961c>] edac_exit+0x4f/0x56 [edac_core]
  [<ffffffff820a3d2a>] sys_delete_module+0x17a/0x240
  [<ffffffff8212da7c>] ? vm_munmap+0x5c/0x80
  [<ffffffff82877682>] system_call_fastpath+0x16/0x1b
Code: 90 90 55 48 89 e5 48 83 ec 40 48 89 5d d8 4c 89 65 e0 4c 89 6d e8 4c 89 75 f0 4c 89 7d f8 66 66 66 66 90 31 c0 49 89 d6 48 89 fb <48> 8b 57 20 49 89 f5 41 89 cf 4c 8d 67 20 48 85 d2 74 2c 4c 89
RIP  [<ffffffff82069ee9>] __blocking_notifier_call_chain+0x29/0x80
  RSP <ffff8801eb12de28>
CR2: 0000000000000110
---[ end trace b69acf12ccad1c0d ]---

Usually, edac_subsys is grabbed one time by pci at initialization.
But edac_subsys may be released several times if multiple pci MCs exist.
The fix just makes the operations balanced.

Signed-off-by: Lans Zhang <jia.zhang@windriver.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_pci_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c
index dc6e905ee1a5..7684426fafa2 100644
--- a/drivers/edac/edac_pci_sysfs.c
+++ b/drivers/edac/edac_pci_sysfs.c
@@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void)
 	if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) {
 		edac_dbg(0, "called kobject_put on main kobj\n");
 		kobject_put(edac_pci_top_main_kobj);
+		edac_put_sysfs_subsys();
 	}
-	edac_put_sysfs_subsys();
 }
 
 /*
-- 
cgit v1.2.3


From 5f466cb0257998549058c16e9d44bb74804a273d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 10 Jan 2013 13:31:43 -0300
Subject: i3200_edac: Add more debug to the driver

Currently, it is not possible to know, when debug is enabled,
if the driver is using 2 DIMMS per channel mode or not. It is
not possible to know the values of the drbs registers, used
to identify the memory rank sizes.

Add debug for both, as it helps to track issues on the driver.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i3200_edac.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 4e8337602e78..20641925d313 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -106,16 +106,26 @@ static int nr_channels;
 
 static int how_many_channels(struct pci_dev *pdev)
 {
+	int n_channels;
+
 	unsigned char capid0_8b; /* 8th byte of CAPID0 */
 
 	pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b);
+
 	if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */
 		edac_dbg(0, "In single channel mode\n");
-		return 1;
+		n_channels = 1;
 	} else {
 		edac_dbg(0, "In dual channel mode\n");
-		return 2;
+		n_channels = 2;
 	}
+
+	if (capid0_8b & 0x10) /* check if both channels are filled */
+		edac_dbg(0, "2 DIMMS per channel disabled\n");
+	else
+		edac_dbg(0, "2 DIMMS per channel enabled\n");
+
+	return n_channels;
 }
 
 static unsigned long eccerrlog_syndrome(u64 log)
@@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window,
 	for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) {
 		drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK;
 		drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK;
+
+		edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]);
 	}
 }
 
-- 
cgit v1.2.3


From 61734e1835867e6e38438c8365149748641e3525 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 10 Jan 2013 13:31:47 -0300
Subject: i3200_edac: Fix the logic that detects filled memories

After running a series of tests on an HP DL320, filled with different
memory sizes, it was noticed that, when filled with just one DIMM
on such hardware, the driver wrongly detects twice the memory, and
thinks that both channels 0 and 1 are filled.

It seems to be partially caused by the BIOS and partially by the driver.

The i3200_edac current logic would be working fine if the BIOS were
disabling the unused second channel when just one DIMM is connected,
in order to do power-saving, as recommended on this chipset's datasheet.

However, the BIOS on this particular machine doesn't do it:

[   16.741421] EDAC DEBUG: how_many_channels: In dual channel mode
[   16.741424] EDAC DEBUG: how_many_channels: 2 DIMMS per channel enabled

So, the driver were assuming that 2 channels are enabled (well, they are,
but the second is unused).

Combined with that, I found two issues at the logic that creates the
EDAC data, that were failing when the two channels are not equally
filled (AFAICT, that happens only when just 1 DIMM is plugged).

The first one is that a 0 at DRB means that nothing is filled. The
driver's logic, however, do some calculation with that.

The second one is that the logic that fills the DIMM data currently
assumes that both channels are equally filled.

I tested the system already with the current configuration and my
patch and it is now working fine. So, for a 2R single DIMM 2Gb memory
at dimm slot 01 (channel 0), it is now displaying:

[   16.741406] EDAC DEBUG: i3200_get_drbs: drb[0][0] = 16, drb[1][0] = 0
[   16.741410] EDAC DEBUG: i3200_get_drbs: drb[0][1] = 32, drb[1][1] = 0
[   16.741413] EDAC DEBUG: i3200_get_drbs: drb[0][2] = 32, drb[1][2] = 0
[   16.741416] EDAC DEBUG: i3200_get_drbs: drb[0][3] = 32, drb[1][3] = 0
...
[   16.741896] EDAC DEBUG: i3200_probe1: csrow 0, channel 0, size = 1024 Mb
[   16.741899] EDAC DEBUG: i3200_probe1: csrow 1, channel 0, size = 1024 Mb

and the corresponding sysfs nodes are now properly filled.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i3200_edac.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c
index 20641925d313..aa44c1718f50 100644
--- a/drivers/edac/i3200_edac.c
+++ b/drivers/edac/i3200_edac.c
@@ -323,6 +323,9 @@ static unsigned long drb_to_nr_pages(
 	int n;
 
 	n = drbs[channel][rank];
+	if (!n)
+		return 0;
+
 	if (rank > 0)
 		n -= drbs[channel][rank - 1];
 	if (stacked && (channel == 1) &&
@@ -389,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx)
 	 * cumulative; the last one will contain the total memory
 	 * contained in all ranks.
 	 */
-	for (i = 0; i < mci->nr_csrows; i++) {
+	for (i = 0; i < I3200_DIMMS; i++) {
 		unsigned long nr_pages;
-		struct csrow_info *csrow = mci->csrows[i];
 
-		nr_pages = drb_to_nr_pages(drbs, stacked,
-			i / I3200_RANKS_PER_CHANNEL,
-			i % I3200_RANKS_PER_CHANNEL);
+		for (j = 0; j < nr_channels; j++) {
+			struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+							       mci->n_layers, i, j, 0);
 
-		if (nr_pages == 0)
-			continue;
+			nr_pages = drb_to_nr_pages(drbs, stacked, j, i);
+			if (nr_pages == 0)
+				continue;
 
-		for (j = 0; j < nr_channels; j++) {
-			struct dimm_info *dimm = csrow->channels[j]->dimm;
+			edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j,
+				 stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages));
 
 			dimm->nr_pages = nr_pages;
 			dimm->grain = nr_pages << PAGE_SHIFT;
-- 
cgit v1.2.3


From e7100478fa894c45ae33321b5721b5a8f956b814 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 19 Feb 2013 08:04:55 -0300
Subject: edac: only create sdram_scrub_rate where supported

Currently, sdram_scrub_rate sysfs node is created even if the device
doesn't support get/set the scub rate. Change the logic to only
create this device node when the operation is supported.

Reported-by: Felipe Balbi <balbi@ti.com>
Acked-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc_sysfs.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 0ca1ca71157f..963a91edb259 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -7,7 +7,7 @@
  *
  * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com
  *
- * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com>
+ * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com>
  *	The entire API were re-written, and ported to use struct device
  *
  */
@@ -677,9 +677,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev,
 	unsigned long bandwidth = 0;
 	int new_bw = 0;
 
-	if (!mci->set_sdram_scrub_rate)
-		return -ENODEV;
-
 	if (strict_strtoul(data, 10, &bandwidth) < 0)
 		return -EINVAL;
 
@@ -703,9 +700,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev,
 	struct mem_ctl_info *mci = to_mci(dev);
 	int bandwidth = 0;
 
-	if (!mci->get_sdram_scrub_rate)
-		return -ENODEV;
-
 	bandwidth = mci->get_sdram_scrub_rate(mci);
 	if (bandwidth < 0) {
 		edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
@@ -866,8 +860,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL);
 DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL);
 
 /* memory scrubber attribute file */
-DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show,
-	mci_sdram_scrub_rate_store);
+DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL);
 
 static struct attribute *mci_attrs[] = {
 	&dev_attr_reset_counters.attr,
@@ -878,7 +871,6 @@ static struct attribute *mci_attrs[] = {
 	&dev_attr_ce_noinfo_count.attr,
 	&dev_attr_ue_count.attr,
 	&dev_attr_ce_count.attr,
-	&dev_attr_sdram_scrub_rate.attr,
 	&dev_attr_max_location.attr,
 	NULL
 };
@@ -1012,6 +1004,22 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 		return err;
 	}
 
+	if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) {
+		if (mci->get_sdram_scrub_rate) {
+			dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO;
+			dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show;
+		}
+		if (mci->set_sdram_scrub_rate) {
+			dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR;
+			dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store;
+		}
+		err = device_create_file(&mci->dev,
+					 &dev_attr_sdram_scrub_rate);
+		if (err) {
+			edac_dbg(1, "failure: create sdram_scrub_rate\n");
+			goto fail2;
+		}
+	}
 	/*
 	 * Create the dimm/rank devices
 	 */
@@ -1056,6 +1064,7 @@ fail:
 			continue;
 		device_unregister(&dimm->dev);
 	}
+fail2:
 	device_unregister(&mci->dev);
 	bus_unregister(&mci->bus);
 	kfree(mci->bus.name);
-- 
cgit v1.2.3


From 52608ba20546139dc76cca8a46c1d901455d5450 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= <niklas.soderlund@ericsson.com>
Date: Wed, 8 Aug 2012 12:30:56 -0300
Subject: i5100_edac: probe for device 19 function 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Probe and store the device handle for the device 19 function 0 during
driver initialization. The device is used during fault injection.

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i5100_edac.c | 28 +++++++++++++++++++++++++++-
 include/linux/pci_ids.h   |  1 +
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index d6955b2cc99f..33c5c8e663f2 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -338,6 +338,7 @@ struct i5100_priv {
 	unsigned ranksperchan;	/* number of ranks per channel */
 
 	struct pci_dev *mc;	/* device 16 func 1 */
+	struct pci_dev *einj;	/* device 19 func 0 */
 	struct pci_dev *ch0mm;	/* device 21 func 0 */
 	struct pci_dev *ch1mm;	/* device 22 func 0 */
 
@@ -869,7 +870,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct mem_ctl_info *mci;
 	struct edac_mc_layer layers[2];
 	struct i5100_priv *priv;
-	struct pci_dev *ch0mm, *ch1mm;
+	struct pci_dev *ch0mm, *ch1mm, *einj;
 	int ret = 0;
 	u32 dw;
 	int ranksperch;
@@ -941,6 +942,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bail_disable_ch1;
 	}
 
+
+	/* device 19, func 0, Error injection */
+	einj = pci_get_device_func(PCI_VENDOR_ID_INTEL,
+				    PCI_DEVICE_ID_INTEL_5100_19, 0);
+	if (!einj) {
+		ret = -ENODEV;
+		goto bail_einj;
+	}
+
+	rc = pci_enable_device(einj);
+	if (rc < 0) {
+		ret = rc;
+		goto bail_disable_einj;
+	}
+
+
 	mci->pdev = &pdev->dev;
 
 	priv = mci->pvt_info;
@@ -948,6 +965,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	priv->mc = pdev;
 	priv->ch0mm = ch0mm;
 	priv->ch1mm = ch1mm;
+	priv->einj = einj;
 
 	INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing);
 
@@ -999,6 +1017,12 @@ bail_scrub:
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 	edac_mc_free(mci);
 
+bail_disable_einj:
+	pci_disable_device(einj);
+
+bail_einj:
+	pci_dev_put(einj);
+
 bail_disable_ch1:
 	pci_disable_device(ch1mm);
 
@@ -1036,8 +1060,10 @@ static void i5100_remove_one(struct pci_dev *pdev)
 	pci_disable_device(pdev);
 	pci_disable_device(priv->ch0mm);
 	pci_disable_device(priv->ch1mm);
+	pci_disable_device(priv->einj);
 	pci_dev_put(priv->ch0mm);
 	pci_dev_put(priv->ch1mm);
+	pci_dev_put(priv->einj);
 
 	edac_mc_free(mci);
 }
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0eb65796bcb9..d0d1e801e350 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2776,6 +2776,7 @@
 #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
+#define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
 #define PCI_DEVICE_ID_INTEL_5100_21	0x65f5
 #define PCI_DEVICE_ID_INTEL_5100_22	0x65f6
 #define PCI_DEVICE_ID_INTEL_5400_ERR	0x4030
-- 
cgit v1.2.3


From 53ceafd6a27f3e15dc83e8865f9f20029f6dfc66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= <niklas.soderlund@ericsson.com>
Date: Wed, 8 Aug 2012 12:30:57 -0300
Subject: i5100_edac: add fault injection code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add fault injection based on information datasheet for i5100, see 1. In
addition to the i5100 datasheet some missing information on injection
functions where found through experimentation and the i7300 datasheet,
see 2.

[1] Intel 5100 Memory Controller Hub Chipset
    Doc.Nr: 318378
    http://www.intel.com/content/dam/doc/datasheet/5100-
    memory-controller-hub-chipset-datasheet.pdf

[2] Intel 7300 Chipset MemoryController Hub (MCH)
    Doc.Nr: 318082
	http://www.intel.com/assets/pdf/datasheet/318082.pdf

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i5100_edac.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 33c5c8e663f2..0a0345bd7432 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -68,6 +68,14 @@
 			I5100_FERR_NF_MEM_M1ERR_MASK)
 #define	I5100_NERR_NF_MEM	0xa4	/* MC Next Non-Fatal Errors */
 #define I5100_EMASK_MEM		0xa8	/* MC Error Mask Register */
+#define I5100_MEM0EINJMSK0	0x200	/* Injection Mask0 Register Channel 0 */
+#define I5100_MEM1EINJMSK0	0x208	/* Injection Mask0 Register Channel 1 */
+#define		I5100_MEMXEINJMSK0_EINJEN	(1 << 27)
+#define I5100_MEM0EINJMSK1	0x204	/* Injection Mask1 Register Channel 0 */
+#define I5100_MEM1EINJMSK1	0x206	/* Injection Mask1 Register Channel 1 */
+
+/* Device 19, Function 0 */
+#define I5100_DINJ0 0x9a
 
 /* device 21 and 22, func 0 */
 #define I5100_MTR_0	0x154	/* Memory Technology Registers 0-3 */
@@ -344,6 +352,14 @@ struct i5100_priv {
 
 	struct delayed_work i5100_scrubbing;
 	int scrub_enable;
+
+	/* Error injection */
+	u8 inject_channel;
+	u8 inject_hlinesel;
+	u8 inject_deviceptr1;
+	u8 inject_deviceptr2;
+	u16 inject_eccmask1;
+	u16 inject_eccmask2;
 };
 
 /* map a rank/chan to a slot number on the mainboard */
@@ -864,6 +880,70 @@ static void i5100_init_csrows(struct mem_ctl_info *mci)
 	}
 }
 
+/****************************************************************************
+ *                       Error injection routines
+ ****************************************************************************/
+
+static void i5100_do_inject(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+	u32 mask0;
+	u16 mask1;
+
+	/* MEM[1:0]EINJMSK0
+	 * 31    - ADDRMATCHEN
+	 * 29:28 - HLINESEL
+	 *         00 Reserved
+	 *         01 Lower half of cache line
+	 *         10 Upper half of cache line
+	 *         11 Both upper and lower parts of cache line
+	 * 27    - EINJEN
+	 * 25:19 - XORMASK1 for deviceptr1
+	 * 9:5   - SEC2RAM for deviceptr2
+	 * 4:0   - FIR2RAM for deviceptr1
+	 */
+	mask0 = ((priv->inject_hlinesel & 0x3) << 28) |
+		I5100_MEMXEINJMSK0_EINJEN |
+		((priv->inject_eccmask1 & 0xffff) << 10) |
+		((priv->inject_deviceptr2 & 0x1f) << 5) |
+		(priv->inject_deviceptr1 & 0x1f);
+
+	/* MEM[1:0]EINJMSK1
+	 * 15:0  - XORMASK2 for deviceptr2
+	 */
+	mask1 = priv->inject_eccmask2;
+
+	if (priv->inject_channel == 0) {
+		pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0);
+		pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1);
+	} else {
+		pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0);
+		pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1);
+	}
+
+	/* Error Injection Response Function
+	 * Intel 5100 Memory Controller Hub Chipset (318378) datasheet
+	 * hints about this register but carry no data about them. All
+	 * data regarding device 19 is based on experimentation and the
+	 * Intel 7300 Chipset Memory Controller Hub (318082) datasheet
+	 * which appears to be accurate for the i5100 in this area.
+	 *
+	 * The injection code don't work without setting this register.
+	 * The register needs to be flipped off then on else the hardware
+	 * will only preform the first injection.
+	 *
+	 * Stop condition bits 7:4
+	 * 1010 - Stop after one injection
+	 * 1011 - Never stop injecting faults
+	 *
+	 * Start condition bits 3:0
+	 * 1010 - Never start
+	 * 1011 - Start immediately
+	 */
+	pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa);
+	pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab);
+}
+
 static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int rc;
@@ -993,6 +1073,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mci->set_sdram_scrub_rate = i5100_set_scrub_rate;
 	mci->get_sdram_scrub_rate = i5100_get_scrub_rate;
 
+	priv->inject_channel = 0;
+	priv->inject_hlinesel = 0;
+	priv->inject_deviceptr1 = 0;
+	priv->inject_deviceptr2 = 0;
+	priv->inject_eccmask1 = 0;
+	priv->inject_eccmask2 = 0;
+
 	i5100_init_csrows(mci);
 
 	/* this strange construction seems to be in every driver, dunno why */
-- 
cgit v1.2.3


From 9cbc6d38f25ae8fb3efd0b1c14f4f18c1d9f0369 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= <niklas.soderlund@ericsson.com>
Date: Wed, 8 Aug 2012 12:30:58 -0300
Subject: i5100_edac: connect fault injection to debugfs node
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a debugfs direcotry i5100_edac/mcX for each memory controller and
add nodes to control how fault injection is preformed.

After configuring an injection using inject_channel, inject_deviceptr1,
inject_deviceptr2, inject_eccmask1, inject_eccmask2 and inject_hlinesel
trigger the injection by writing anything to inject_enable.

Example of a CE injection:

echo 0 > /sys/kernel/debug/i5100_edac/mc0/inject_channel
echo 1 > /sys/kernel/debug/i5100_edac/mc0/inject_hlinesel
echo 61440 > /sys/kernel/debug/i5100_edac/mc0/inject_eccmask1
echo 1 > /sys/kernel/debug/i5100_edac/mc0/inject_enable

Example of UE injection:

echo 0 > /sys/kernel/debug/i5100_edac/mc0/inject_channel
echo 2 > /sys/kernel/debug/i5100_edac/mc0/inject_hlinesel
echo 65535 > /sys/kernel/debug/i5100_edac/mc0/inject_eccmask1
echo 65535 > /sys/kernel/debug/i5100_edac/mc0/inject_eccmask2
echo 17 > /sys/kernel/debug/i5100_edac/mc0/inject_deviceptr1
echo 0 > /sys/kernel/debug/i5100_edac/mc0/inject_deviceptr2
echo 1 > /sys/kernel/debug/i5100_edac/mc0/inject_enable

Sometimes it is needed to enable the injection more then once (echo to
the inject_enable node) for the injection to happen, I am not sure why.

Signed-off-by: Niklas Söderlund <niklas.soderlund@ericsson.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i5100_edac.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 0a0345bd7432..ad4cc898dc60 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -27,6 +27,7 @@
 #include <linux/edac.h>
 #include <linux/delay.h>
 #include <linux/mmzone.h>
+#include <linux/debugfs.h>
 
 #include "edac_core.h"
 
@@ -360,8 +361,12 @@ struct i5100_priv {
 	u8 inject_deviceptr2;
 	u16 inject_eccmask1;
 	u16 inject_eccmask2;
+
+	struct dentry *debugfs;
 };
 
+static struct dentry *i5100_debugfs;
+
 /* map a rank/chan to a slot number on the mainboard */
 static int i5100_rank_to_slot(const struct mem_ctl_info *mci,
 			      int chan, int rank)
@@ -944,6 +949,61 @@ static void i5100_do_inject(struct mem_ctl_info *mci)
 	pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab);
 }
 
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+static ssize_t inject_enable_write(struct file *file, const char __user *data,
+		size_t count, loff_t *ppos)
+{
+	struct device *dev = file->private_data;
+	struct mem_ctl_info *mci = to_mci(dev);
+
+	i5100_do_inject(mci);
+
+	return count;
+}
+
+static int inject_enable_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static const struct file_operations i5100_inject_enable_fops = {
+	.open = inject_enable_open,
+	.write = inject_enable_write,
+	.llseek = generic_file_llseek,
+};
+
+static int i5100_setup_debugfs(struct mem_ctl_info *mci)
+{
+	struct i5100_priv *priv = mci->pvt_info;
+
+	if (!i5100_debugfs)
+		return -ENODEV;
+
+	priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+
+	if (!priv->debugfs)
+		return -ENOMEM;
+
+	debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_channel);
+	debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_hlinesel);
+	debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_deviceptr1);
+	debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_deviceptr2);
+	debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_eccmask1);
+	debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs,
+			&priv->inject_eccmask2);
+	debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs,
+			&mci->dev, &i5100_inject_enable_fops);
+
+	return 0;
+
+}
+
 static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int rc;
@@ -1097,6 +1157,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto bail_scrub;
 	}
 
+	i5100_setup_debugfs(mci);
+
 	return ret;
 
 bail_scrub:
@@ -1141,6 +1203,9 @@ static void i5100_remove_one(struct pci_dev *pdev)
 
 	priv = mci->pvt_info;
 
+	if (priv->debugfs)
+		debugfs_remove_recursive(priv->debugfs);
+
 	priv->scrub_enable = 0;
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
 
@@ -1173,13 +1238,17 @@ static int __init i5100_init(void)
 {
 	int pci_rc;
 
-	pci_rc = pci_register_driver(&i5100_driver);
+	i5100_debugfs = debugfs_create_dir("i5100_edac", NULL);
 
+	pci_rc = pci_register_driver(&i5100_driver);
 	return (pci_rc < 0) ? pci_rc : 0;
 }
 
 static void __exit i5100_exit(void)
 {
+	if (i5100_debugfs)
+		debugfs_remove(i5100_debugfs);
+
 	pci_unregister_driver(&i5100_driver);
 }
 
-- 
cgit v1.2.3


From 59b9796d1e0e5edb6eb3d5ae550eac0d53d27adb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 21 Feb 2013 11:01:23 -0300
Subject: i5100_edac: Remove two checkpatch warnings

The last changeset introduced a few checkpatch warnings:

WARNING: debugfs_remove_recursive(NULL) is safe this check is probably not required
261: FILE: drivers/edac/i5100_edac.c:1207:
+       if (priv->debugfs)
+               debugfs_remove_recursive(priv->debugfs);

WARNING: debugfs_remove(NULL) is safe this check is probably not required
290: FILE: drivers/edac/i5100_edac.c:1250:
+       if (i5100_debugfs)
+               debugfs_remove(i5100_debugfs);

Get rid of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/i5100_edac.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index ad4cc898dc60..6ed11b1881b7 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -1203,8 +1203,7 @@ static void i5100_remove_one(struct pci_dev *pdev)
 
 	priv = mci->pvt_info;
 
-	if (priv->debugfs)
-		debugfs_remove_recursive(priv->debugfs);
+	debugfs_remove_recursive(priv->debugfs);
 
 	priv->scrub_enable = 0;
 	cancel_delayed_work_sync(&(priv->i5100_scrubbing));
@@ -1246,8 +1245,7 @@ static int __init i5100_init(void)
 
 static void __exit i5100_exit(void)
 {
-	if (i5100_debugfs)
-		debugfs_remove(i5100_debugfs);
+	debugfs_remove(i5100_debugfs);
 
 	pci_unregister_driver(&i5100_driver);
 }
-- 
cgit v1.2.3


From 3d958823e26979a73d73c0343041d64813702a5b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 07:51:25 -0300
Subject: edac: better report error conditions in debug mode

It is hard to find what's wrong without a proper error
report. Improve it, in debug mode.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc_sysfs.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 963a91edb259..4f4b6137d74e 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci)
 		if (!nr_pages_per_csrow(csrow))
 			continue;
 		err = edac_create_csrow_object(mci, mci->csrows[i], i);
-		if (err < 0)
+		if (err < 0) {
+			edac_dbg(1,
+				 "failure: create csrow objects for csrow %d\n",
+				 i);
 			goto error;
+		}
 	}
 	return 0;
 
@@ -999,6 +1003,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
 	edac_dbg(0, "creating device %s\n", dev_name(&mci->dev));
 	err = device_add(&mci->dev);
 	if (err < 0) {
+		edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
 		bus_unregister(&mci->bus);
 		kfree(mci->bus.name);
 		return err;
-- 
cgit v1.2.3


From 4ab19b06acffd2278cd37995927c85a9acfd00db Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 07:57:50 -0300
Subject: edac: initialize the core earlier

In order for it to work with it builtin, the EDAC core should
be initialized earlier, otherwise the ghes_edac driver initializes
before edac_mc_sysfs_init() being called:

...
[    4.998373] EDAC MC0: Giving out device to 'ghes_edac.c' 'ghes_edac': DEV ghes
...
[    4.998373] EDAC MC1: Giving out device to 'ghes_edac.c' 'ghes_edac': DEV ghes
[    6.519495] EDAC MC: Ver: 3.0.0
[    6.523749] EDAC DEBUG: edac_mc_sysfs_init: device mc created

The net result is that no EDAC sysfs nodes will appear.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_module.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c
index 12c951a2c33d..a66941fea5a4 100644
--- a/drivers/edac/edac_module.c
+++ b/drivers/edac/edac_module.c
@@ -146,7 +146,7 @@ static void __exit edac_exit(void)
 /*
  * Inform the kernel of our entry and exit points
  */
-module_init(edac_init);
+subsys_initcall(edac_init);
 module_exit(edac_exit);
 
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From c66b5a79a9348ccd6d1cd81416027d0e12da965d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 07:21:08 -0300
Subject: edac: add a new memory layer type

There are some cases where the memory controller layout is
completely hidden. This is the case of firmware-driven error
code, like the one provided by GHES. Add a new layer to be
used on such memory error report mechanisms.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 1 +
 include/linux/edac.h   | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index fb219bc5cb2c..78d8c7d6e76a 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -900,6 +900,7 @@ const char *edac_layer_name[] = {
 	[EDAC_MC_LAYER_CHANNEL] = "channel",
 	[EDAC_MC_LAYER_SLOT] = "slot",
 	[EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
+	[EDAC_MC_LAYER_ALL_MEM] = "memory",
 };
 EXPORT_SYMBOL_GPL(edac_layer_name);
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 4784213c819d..1b7744c219b8 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -375,6 +375,9 @@ enum scrub_type {
  * @EDAC_MC_LAYER_CHANNEL:	memory layer is named "channel"
  * @EDAC_MC_LAYER_SLOT:		memory layer is named "slot"
  * @EDAC_MC_LAYER_CHIP_SELECT:	memory layer is named "chip select"
+ * @EDAC_MC_LAYER_ALL_MEM:	memory layout is unknown. All memory is mapped
+ *				as a single memory area. This is used when
+ *				retrieving errors from a firmware driven driver.
  *
  * This enum is used by the drivers to tell edac_mc_sysfs what name should
  * be used when describing a memory stick location.
@@ -384,6 +387,7 @@ enum edac_mc_layer_type {
 	EDAC_MC_LAYER_CHANNEL,
 	EDAC_MC_LAYER_SLOT,
 	EDAC_MC_LAYER_CHIP_SELECT,
+	EDAC_MC_LAYER_ALL_MEM,
 };
 
 /**
-- 
cgit v1.2.3


From c2c93dbc97622e26dc19edc71e50ebaa996d7804 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 19 Feb 2013 06:50:05 -0300
Subject: edac: remove proc_name from mci structure

proc_name isn't used anywhere. Remove it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/edac.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 1b7744c219b8..ff18efc754f3 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -48,7 +48,6 @@ static inline void opstate_init(void)
 }
 
 #define EDAC_MC_LABEL_LEN	31
-#define MC_PROC_NAME_MAX_LEN	7
 
 /**
  * enum dev_type - describe the type of memory DRAM chips used at the stick
@@ -633,7 +632,6 @@ struct mem_ctl_info {
 	const char *mod_ver;
 	const char *ctl_name;
 	const char *dev_name;
-	char proc_name[MC_PROC_NAME_MAX_LEN + 1];
 	void *pvt_info;
 	unsigned long start_time;	/* mci load start time (in jiffies) */
 
-- 
cgit v1.2.3


From 80cc7d87d5eb34375f916d282450a0906a8ead60 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Oct 2012 10:42:29 -0300
Subject: edac: lock module owner to avoid error report conflicts

APEI GHES and i7core_edac/sb_edac currently can be loaded at
the same time, but those are Highlander modules:
	"There can be only one".

There are two reasons for that:

1) Each driver assumes that it is the only one registering at
   the EDAC core, as it is driver's responsibility to number
   the memory controllers, and all of them start from 0;

2) If BIOS is handling the memory errors, the OS can't also be
   doing it, as one will mangle with the other.

So, we need to add an module owner's lock at the EDAC core,
in order to avoid having two different modules handling memory
errors at the same time. The best way for doing this lock seems
to use the driver's name, as this is unique, and won't require
changes on every driver.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 78d8c7d6e76a..34eb9703ed33 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -42,6 +42,12 @@
 static DEFINE_MUTEX(mem_ctls_mutex);
 static LIST_HEAD(mc_devices);
 
+/*
+ * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
+ *	apei/ghes and i7core_edac to be used at the same time.
+ */
+static void const *edac_mc_owner;
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 			         unsigned len)
 {
@@ -659,9 +665,9 @@ fail1:
 	return 1;
 }
 
-static void del_mc_from_global_list(struct mem_ctl_info *mci)
+static int del_mc_from_global_list(struct mem_ctl_info *mci)
 {
-	atomic_dec(&edac_handlers);
+	int handlers = atomic_dec_return(&edac_handlers);
 	list_del_rcu(&mci->link);
 
 	/* these are for safe removal of devices from global list while
@@ -669,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
 	 */
 	synchronize_rcu();
 	INIT_LIST_HEAD(&mci->link);
+
+	return handlers;
 }
 
 /**
@@ -712,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find);
 /* FIXME - should a warning be printed if no error detection? correction? */
 int edac_mc_add_mc(struct mem_ctl_info *mci)
 {
+	int ret = -EINVAL;
 	edac_dbg(0, "\n");
 
 #ifdef CONFIG_EDAC_DEBUG
@@ -742,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 #endif
 	mutex_lock(&mem_ctls_mutex);
 
+	if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
+		ret = -EPERM;
+		goto fail0;
+	}
+
 	if (add_mc_to_global_list(mci))
 		goto fail0;
 
@@ -768,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 
+	edac_mc_owner = mci->mod_name;
+
 	mutex_unlock(&mem_ctls_mutex);
 	return 0;
 
@@ -776,7 +792,7 @@ fail1:
 
 fail0:
 	mutex_unlock(&mem_ctls_mutex);
-	return 1;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 
@@ -802,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 		return NULL;
 	}
 
-	del_mc_from_global_list(mci);
+	if (!del_mc_from_global_list(mci))
+		edac_mc_owner = NULL;
 	mutex_unlock(&mem_ctls_mutex);
 
 	/* flush workq processes */
-- 
cgit v1.2.3


From c7ef7645544131b0750478d1cf94cdfa945c809d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 21 Feb 2013 13:36:45 -0300
Subject: edac: reduce stack pressure by using a pre-allocated buffer

The number of variables at the stack is too big.
Reduces the stack usage by using a pre-allocated error
buffer.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_mc.c | 81 ++++++++++++++++++++++++++++++--------------------
 include/linux/edac.h   | 56 ++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 33 deletions(-)

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 34eb9703ed33..4f18dd755939 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1065,7 +1065,6 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
-#define OTHER_LABEL " or "
 
 /**
  * edac_mc_handle_error - reports a memory event to userspace
@@ -1097,19 +1096,28 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const char *msg,
 			  const char *other_detail)
 {
-	/* FIXME: too much for stack: move it to some pre-alocated area */
-	char detail[80], location[80];
-	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
+	char detail[80];
 	char *p;
 	int row = -1, chan = -1;
 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
-	int i;
-	long grain;
-	bool enable_per_layer_report = false;
+	int i, n_labels = 0;
 	u8 grain_bits;
+	struct edac_raw_error_desc *e = &mci->error_desc;
 
 	edac_dbg(3, "MC%d\n", mci->mc_idx);
 
+	/* Fills the error report buffer */
+	memset(e, 0, sizeof (*e));
+	e->error_count = error_count;
+	e->top_layer = top_layer;
+	e->mid_layer = mid_layer;
+	e->low_layer = low_layer;
+	e->page_frame_number = page_frame_number;
+	e->offset_in_page = offset_in_page;
+	e->syndrome = syndrome;
+	e->msg = msg;
+	e->other_detail = other_detail;
+
 	/*
 	 * Check if the event report is consistent and if the memory
 	 * location is known. If it is known, enable_per_layer_report will be
@@ -1132,7 +1140,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			pos[i] = -1;
 		}
 		if (pos[i] >= 0)
-			enable_per_layer_report = true;
+			e->enable_per_layer_report = true;
 	}
 
 	/*
@@ -1146,8 +1154,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	 * where each memory belongs to a separate channel within the same
 	 * branch.
 	 */
-	grain = 0;
-	p = label;
+	p = e->label;
 	*p = '\0';
 
 	for (i = 0; i < mci->tot_dimms; i++) {
@@ -1161,8 +1168,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			continue;
 
 		/* get the max grain, over the error match range */
-		if (dimm->grain > grain)
-			grain = dimm->grain;
+		if (dimm->grain > e->grain)
+			e->grain = dimm->grain;
 
 		/*
 		 * If the error is memory-controller wide, there's no need to
@@ -1170,8 +1177,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		 * channel/memory controller/...  may be affected.
 		 * Also, don't show errors for empty DIMM slots.
 		 */
-		if (enable_per_layer_report && dimm->nr_pages) {
-			if (p != label) {
+		if (e->enable_per_layer_report && dimm->nr_pages) {
+			if (n_labels >= EDAC_MAX_LABELS) {
+				e->enable_per_layer_report = false;
+				break;
+			}
+			n_labels++;
+			if (p != e->label) {
 				strcpy(p, OTHER_LABEL);
 				p += strlen(OTHER_LABEL);
 			}
@@ -1198,12 +1210,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 		}
 	}
 
-	if (!enable_per_layer_report) {
-		strcpy(label, "any memory");
+	if (!e->enable_per_layer_report) {
+		strcpy(e->label, "any memory");
 	} else {
 		edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan);
-		if (p == label)
-			strcpy(label, "unknown memory");
+		if (p == e->label)
+			strcpy(e->label, "unknown memory");
 		if (type == HW_EVENT_ERR_CORRECTED) {
 			if (row >= 0) {
 				mci->csrows[row]->ce_count += error_count;
@@ -1216,7 +1228,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	}
 
 	/* Fill the RAM location data */
-	p = location;
+	p = e->location;
 
 	for (i = 0; i < mci->n_layers; i++) {
 		if (pos[i] < 0)
@@ -1226,32 +1238,35 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			     edac_layer_name[mci->layers[i].type],
 			     pos[i]);
 	}
-	if (p > location)
+	if (p > e->location)
 		*(p - 1) = '\0';
 
 	/* Report the error via the trace interface */
-	grain_bits = fls_long(grain) + 1;
-	trace_mc_event(type, msg, label, error_count,
-		       mci->mc_idx, top_layer, mid_layer, low_layer,
-		       PAGES_TO_MiB(page_frame_number) | offset_in_page,
-		       grain_bits, syndrome, other_detail);
+	grain_bits = fls_long(e->grain) + 1;
+	trace_mc_event(type, e->msg, e->label, e->error_count,
+		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
+		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
+		       grain_bits, e->syndrome, other_detail);
 
 	/* Memory type dependent details about the error */
 	if (type == HW_EVENT_ERR_CORRECTED) {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-			page_frame_number, offset_in_page,
-			grain, syndrome);
-		edac_ce_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report,
-			      page_frame_number, offset_in_page, grain);
+			e->page_frame_number, e->offset_in_page,
+			e->grain, e->syndrome);
+		edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
+			      e->label, detail, other_detail,
+			      e->enable_per_layer_report,
+			      e->page_frame_number, e->offset_in_page,
+			      e->grain);
 	} else {
 		snprintf(detail, sizeof(detail),
 			"page:0x%lx offset:0x%lx grain:%ld",
-			page_frame_number, offset_in_page, grain);
+			page_frame_number, offset_in_page, e->grain);
 
-		edac_ue_error(mci, error_count, pos, msg, location, label,
-			      detail, other_detail, enable_per_layer_report);
+		edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
+			      e->label, detail, other_detail,
+			      e->enable_per_layer_report);
 	}
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
diff --git a/include/linux/edac.h b/include/linux/edac.h
index ff18efc754f3..096b7fcdf484 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -47,8 +47,18 @@ static inline void opstate_init(void)
 	return;
 }
 
+/* Max length of a DIMM label*/
 #define EDAC_MC_LABEL_LEN	31
 
+/* Maximum size of the location string */
+#define LOCATION_SIZE 80
+
+/* Defines the maximum number of labels that can be reported */
+#define EDAC_MAX_LABELS		8
+
+/* String used to join two or more labels */
+#define OTHER_LABEL " or "
+
 /**
  * enum dev_type - describe the type of memory DRAM chips used at the stick
  * @DEV_UNKNOWN:	Can't be determined, or MC doesn't support detect it
@@ -553,6 +563,46 @@ struct errcount_attribute_data {
 	int layer0, layer1, layer2;
 };
 
+/**
+ * edac_raw_error_desc - Raw error report structure
+ * @grain:			minimum granularity for an error report, in bytes
+ * @error_count:		number of errors of the same type
+ * @top_layer:			top layer of the error (layer[0])
+ * @mid_layer:			middle layer of the error (layer[1])
+ * @low_layer:			low layer of the error (layer[2])
+ * @page_frame_number:		page where the error happened
+ * @offset_in_page:		page offset
+ * @syndrome:			syndrome of the error (or 0 if unknown or if
+ * 				the syndrome is not applicable)
+ * @msg:			error message
+ * @location:			location of the error
+ * @label:			label of the affected DIMM(s)
+ * @other_detail:		other driver-specific detail about the error
+ * @enable_per_layer_report:	if false, the error affects all layers
+ *				(typically, a memory controller error)
+ */
+struct edac_raw_error_desc {
+	/*
+	 * NOTE: everything before grain won't be cleaned by
+	 * edac_raw_error_desc_clean()
+	 */
+	char location[LOCATION_SIZE];
+	char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS];
+	long grain;
+
+	/* the vars below and grain will be cleaned on every new error report */
+	u16 error_count;
+	int top_layer;
+	int mid_layer;
+	int low_layer;
+	unsigned long page_frame_number;
+	unsigned long offset_in_page;
+	unsigned long syndrome;
+	const char *msg;
+	const char *other_detail;
+	bool enable_per_layer_report;
+};
+
 /* MEMORY controller information structure
  */
 struct mem_ctl_info {
@@ -660,6 +710,12 @@ struct mem_ctl_info {
 	/* work struct for this MC */
 	struct delayed_work work;
 
+	/*
+	 * Used to report an error - by being at the global struct
+	 * makes the memory allocated by the EDAC core
+	 */
+	struct edac_raw_error_desc error_desc;
+
 	/* the internal state of this controller instance */
 	int op_state;
 
-- 
cgit v1.2.3


From e7e248304c8ccf02b89e04c3b3b66006b993b5a7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 31 Oct 2012 13:46:11 -0300
Subject: edac: add support for raw error reports

That allows APEI GHES driver to report errors directly, using
the EDAC error report API.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/edac/edac_core.h |  5 ++++
 drivers/edac/edac_mc.c   | 64 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 47 insertions(+), 22 deletions(-)

diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 23bb99fa44f1..3c2625e7980d 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev);
 extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev);
 extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci,
 				      unsigned long page);
+
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+			      struct mem_ctl_info *mci,
+			      struct edac_raw_error_desc *e);
+
 void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  struct mem_ctl_info *mci,
 			  const u16 error_count,
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 4f18dd755939..cdb81aa73ab7 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -1065,6 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci,
 	edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count);
 }
 
+/**
+ * edac_raw_mc_handle_error - reports a memory event to userspace without doing
+ *			      anything to discover the error location
+ *
+ * @type:		severity of the error (CE/UE/Fatal)
+ * @mci:		a struct mem_ctl_info pointer
+ * @e:			error description
+ *
+ * This raw function is used internally by edac_mc_handle_error(). It should
+ * only be called directly when the hardware error come directly from BIOS,
+ * like in the case of APEI GHES driver.
+ */
+void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type,
+			      struct mem_ctl_info *mci,
+			      struct edac_raw_error_desc *e)
+{
+	char detail[80];
+	int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer };
+
+	/* Memory type dependent details about the error */
+	if (type == HW_EVENT_ERR_CORRECTED) {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
+			e->page_frame_number, e->offset_in_page,
+			e->grain, e->syndrome);
+		edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+			      detail, e->other_detail, e->enable_per_layer_report,
+			      e->page_frame_number, e->offset_in_page, e->grain);
+	} else {
+		snprintf(detail, sizeof(detail),
+			"page:0x%lx offset:0x%lx grain:%ld",
+			e->page_frame_number, e->offset_in_page, e->grain);
+
+		edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label,
+			      detail, e->other_detail, e->enable_per_layer_report);
+	}
+
+
+}
+EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error);
 
 /**
  * edac_mc_handle_error - reports a memory event to userspace
@@ -1096,7 +1136,6 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 			  const char *msg,
 			  const char *other_detail)
 {
-	char detail[80];
 	char *p;
 	int row = -1, chan = -1;
 	int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
@@ -1246,27 +1285,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type,
 	trace_mc_event(type, e->msg, e->label, e->error_count,
 		       mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer,
 		       PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page,
-		       grain_bits, e->syndrome, other_detail);
+		       grain_bits, e->syndrome, e->other_detail);
 
-	/* Memory type dependent details about the error */
-	if (type == HW_EVENT_ERR_CORRECTED) {
-		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx",
-			e->page_frame_number, e->offset_in_page,
-			e->grain, e->syndrome);
-		edac_ce_error(mci, e->error_count, pos, e->msg, e->location,
-			      e->label, detail, other_detail,
-			      e->enable_per_layer_report,
-			      e->page_frame_number, e->offset_in_page,
-			      e->grain);
-	} else {
-		snprintf(detail, sizeof(detail),
-			"page:0x%lx offset:0x%lx grain:%ld",
-			page_frame_number, offset_in_page, e->grain);
-
-		edac_ue_error(mci, e->error_count, pos, e->msg, e->location,
-			      e->label, detail, other_detail,
-			      e->enable_per_layer_report);
-	}
+	edac_raw_mc_handle_error(type, mci, e);
 }
 EXPORT_SYMBOL_GPL(edac_mc_handle_error);
-- 
cgit v1.2.3


From 8dd93d450bff251575c56b8f058393124e1f00fb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 19 Feb 2013 21:26:22 -0300
Subject: edac: add support for error type "Info"

The CPER spec defines a forth type of error: informational
logs. Add support for it at the edac API and at the
trace event interface.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/edac.h    | 16 ++++++++++++++++
 include/ras/ras_event.h |  4 +---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/linux/edac.h b/include/linux/edac.h
index 096b7fcdf484..4fd4999ccb5b 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -109,8 +109,24 @@ enum hw_event_mc_err_type {
 	HW_EVENT_ERR_CORRECTED,
 	HW_EVENT_ERR_UNCORRECTED,
 	HW_EVENT_ERR_FATAL,
+	HW_EVENT_ERR_INFO,
 };
 
+static inline char *mc_event_error_type(const unsigned int err_type)
+{
+	switch (err_type) {
+	case HW_EVENT_ERR_CORRECTED:
+		return "Corrected";
+	case HW_EVENT_ERR_UNCORRECTED:
+		return "Uncorrected";
+	case HW_EVENT_ERR_FATAL:
+		return "Fatal";
+	default:
+	case HW_EVENT_ERR_INFO:
+		return "Info";
+	}
+}
+
 /**
  * enum mem_type - memory types. For a more detailed reference, please see
  *			http://en.wikipedia.org/wiki/DRAM
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 260470e72483..21cdb0b7b0fb 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -78,9 +78,7 @@ TRACE_EVENT(mc_event,
 
 	TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)",
 		  __entry->error_count,
-		  (__entry->error_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" :
-			((__entry->error_type == HW_EVENT_ERR_FATAL) ?
-			"Fatal" : "Uncorrected"),
+		  mc_event_error_type(__entry->error_type),
 		  __entry->error_count > 1 ? "s" : "",
 		  ((char *)__get_str(msg))[0] ? " " : "",
 		  __get_str(msg),
-- 
cgit v1.2.3


From 40e064153814ce534a28714b25cb98259f107d96 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 05:41:22 -0300
Subject: ghes: move structures/enum to a header file

As a ghes_edac driver will need to access ghes structures, in order
to properly handle the errors, move those structures to a separate
header file. No functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/acpi/apei/ghes.c | 47 ++---------------------------------------------
 include/acpi/ghes.h      | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 45 deletions(-)
 create mode 100644 include/acpi/ghes.h

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 7ae2750bb457..6d0e146e0fee 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -48,8 +48,8 @@
 #include <linux/genalloc.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
-#include <acpi/apei.h>
-#include <acpi/hed.h>
+
+#include <acpi/ghes.h>
 #include <asm/mce.h>
 #include <asm/tlbflush.h>
 #include <asm/nmi.h>
@@ -84,42 +84,6 @@
 	((struct acpi_hest_generic_status *)				\
 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
-/*
- * One struct ghes is created for each generic hardware error source.
- * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
- * handler.
- *
- * estatus: memory buffer for error status block, allocated during
- * HEST parsing.
- */
-#define GHES_TO_CLEAR		0x0001
-#define GHES_EXITING		0x0002
-
-struct ghes {
-	struct acpi_hest_generic *generic;
-	struct acpi_hest_generic_status *estatus;
-	u64 buffer_paddr;
-	unsigned long flags;
-	union {
-		struct list_head list;
-		struct timer_list timer;
-		unsigned int irq;
-	};
-};
-
-struct ghes_estatus_node {
-	struct llist_node llnode;
-	struct acpi_hest_generic *generic;
-};
-
-struct ghes_estatus_cache {
-	u32 estatus_len;
-	atomic_t count;
-	struct acpi_hest_generic *generic;
-	unsigned long long time_in;
-	struct rcu_head rcu;
-};
-
 bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);
 
@@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes)
 	apei_unmap_generic_address(&ghes->generic->error_status_address);
 }
 
-enum {
-	GHES_SEV_NO = 0x0,
-	GHES_SEV_CORRECTED = 0x1,
-	GHES_SEV_RECOVERABLE = 0x2,
-	GHES_SEV_PANIC = 0x3,
-};
-
 static inline int ghes_severity(int severity)
 {
 	switch (severity) {
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
new file mode 100644
index 000000000000..3eb8dc483449
--- /dev/null
+++ b/include/acpi/ghes.h
@@ -0,0 +1,45 @@
+#include <acpi/apei.h>
+#include <acpi/hed.h>
+
+/*
+ * One struct ghes is created for each generic hardware error source.
+ * It provides the context for APEI hardware error timer/IRQ/SCI/NMI
+ * handler.
+ *
+ * estatus: memory buffer for error status block, allocated during
+ * HEST parsing.
+ */
+#define GHES_TO_CLEAR		0x0001
+#define GHES_EXITING		0x0002
+
+struct ghes {
+	struct acpi_hest_generic *generic;
+	struct acpi_hest_generic_status *estatus;
+	u64 buffer_paddr;
+	unsigned long flags;
+	union {
+		struct list_head list;
+		struct timer_list timer;
+		unsigned int irq;
+	};
+};
+
+struct ghes_estatus_node {
+	struct llist_node llnode;
+	struct acpi_hest_generic *generic;
+};
+
+struct ghes_estatus_cache {
+	u32 estatus_len;
+	atomic_t count;
+	struct acpi_hest_generic *generic;
+	unsigned long long time_in;
+	struct rcu_head rcu;
+};
+
+enum {
+	GHES_SEV_NO = 0x0,
+	GHES_SEV_CORRECTED = 0x1,
+	GHES_SEV_RECOVERABLE = 0x2,
+	GHES_SEV_PANIC = 0x3,
+};
-- 
cgit v1.2.3


From 21480547c8b85be6c08c4d77ed514673b73eda8a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 06:10:39 -0300
Subject: ghes: add the needed hooks for EDAC error report

In order to allow reporting errors via EDAC, add hooks for:

1) register an EDAC driver;
2) unregister an EDAC driver;
3) report errors via EDAC.

As the EDAC driver will need to access the ghes structure, adds it
as one of the parameters for ghes_do_proc.

Acked-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/acpi/apei/ghes.c | 24 +++++++++++++++++++-----
 include/acpi/ghes.h      | 27 +++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 6d0e146e0fee..d668a8ae602b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -409,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes)
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
-static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
+static void ghes_do_proc(struct ghes *ghes,
+			 const struct acpi_hest_generic_status *estatus)
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
@@ -421,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus)
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
 			mem_err = (struct cper_sec_mem_err *)(gdata+1);
+			ghes_edac_report_mem_error(ghes, sev, mem_err);
+
 #ifdef CONFIG_X86_MCE
 			apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
 						  mem_err);
@@ -639,7 +642,7 @@ static int ghes_proc(struct ghes *ghes)
 		if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus))
 			ghes_estatus_cache_add(ghes->generic, ghes->estatus);
 	}
-	ghes_do_proc(ghes->estatus);
+	ghes_do_proc(ghes, ghes->estatus);
 out:
 	ghes_clear_estatus(ghes);
 	return 0;
@@ -732,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 		estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 		len = apei_estatus_len(estatus);
 		node_len = GHES_ESTATUS_NODE_LEN(len);
-		ghes_do_proc(estatus);
+		ghes_do_proc(estatus_node->ghes, estatus);
 		if (!ghes_estatus_cached(estatus)) {
 			generic = estatus_node->generic;
 			if (ghes_print_estatus(NULL, generic, estatus))
@@ -821,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs)
 		estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool,
 						      node_len);
 		if (estatus_node) {
+			estatus_node->ghes = ghes;
 			estatus_node->generic = ghes->generic;
 			estatus = GHES_ESTATUS_FROM_NODE(estatus_node);
 			memcpy(estatus, ghes->estatus, len);
@@ -899,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		ghes = NULL;
 		goto err;
 	}
+
+	rc = ghes_edac_register(ghes, &ghes_dev->dev);
+	if (rc < 0)
+		goto err;
+
 	switch (generic->notify.type) {
 	case ACPI_HEST_NOTIFY_POLLED:
 		ghes->timer.function = ghes_poll_func;
@@ -911,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) {
 			pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
 			       generic->header.source_id);
-			goto err;
+			goto err_edac_unreg;
 		}
 		if (request_irq(ghes->irq, ghes_irq_func,
 				0, "GHES IRQ", ghes)) {
 			pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
 			       generic->header.source_id);
-			goto err;
+			goto err_edac_unreg;
 		}
 		break;
 	case ACPI_HEST_NOTIFY_SCI:
@@ -943,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev)
 	platform_set_drvdata(ghes_dev, ghes);
 
 	return 0;
+err_edac_unreg:
+	ghes_edac_unregister(ghes);
 err:
 	if (ghes) {
 		ghes_fini(ghes);
@@ -995,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev)
 	}
 
 	ghes_fini(ghes);
+
+	ghes_edac_unregister(ghes);
+
 	kfree(ghes);
 
 	platform_set_drvdata(ghes_dev, NULL);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 3eb8dc483449..720446cb243e 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -27,6 +27,7 @@ struct ghes {
 struct ghes_estatus_node {
 	struct llist_node llnode;
 	struct acpi_hest_generic *generic;
+	struct ghes *ghes;
 };
 
 struct ghes_estatus_cache {
@@ -43,3 +44,29 @@ enum {
 	GHES_SEV_RECOVERABLE = 0x2,
 	GHES_SEV_PANIC = 0x3,
 };
+
+/* From drivers/edac/ghes_edac.c */
+
+#ifdef CONFIG_EDAC_GHES
+void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+				struct cper_sec_mem_err *mem_err);
+
+int ghes_edac_register(struct ghes *ghes, struct device *dev);
+
+void ghes_edac_unregister(struct ghes *ghes);
+
+#else
+static inline void ghes_edac_report_mem_error(struct ghes *ghes, int sev,
+				       struct cper_sec_mem_err *mem_err)
+{
+}
+
+static inline int ghes_edac_register(struct ghes *ghes, struct device *dev)
+{
+	return 0;
+}
+
+static inline void ghes_edac_unregister(struct ghes *ghes)
+{
+}
+#endif
-- 
cgit v1.2.3


From 77c5f5d2f212e1963063e427fc57c44bf6eae9fb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Fri, 15 Feb 2013 06:11:57 -0300
Subject: