From c79c49826270b8b0061b2fca840fc3f013c8a78a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 26 Feb 2013 12:51:27 -0500 Subject: xen/pat: Disable PAT using pat_enabled value. The git commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1 (xen/pat: Disable PAT support for now) explains in details why we want to disable PAT for right now. However that change was not enough and we should have also disabled the pat_enabled value. Otherwise we end up with: mmap-example:3481 map pfn expected mapping type write-back for [mem 0x00010000-0x00010fff], got uncached-minus ------------[ cut here ]------------ WARNING: at /build/buildd/linux-3.8.0/arch/x86/mm/pat.c:774 untrack_pfn+0xb8/0xd0() mem 0x00010000-0x00010fff], got uncached-minus ------------[ cut here ]------------ WARNING: at /build/buildd/linux-3.8.0/arch/x86/mm/pat.c:774 untrack_pfn+0xb8/0xd0() ... Pid: 3481, comm: mmap-example Tainted: GF 3.8.0-6-generic #13-Ubuntu Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] untrack_pfn+0xb8/0xd0 [] unmap_single_vma+0xac/0x100 [] unmap_vmas+0x49/0x90 [] exit_mmap+0x98/0x170 [] mmput+0x64/0x100 [] dup_mm+0x445/0x660 [] copy_process.part.22+0xa5f/0x1510 [] do_fork+0x91/0x350 [] sys_clone+0x16/0x20 [] stub_clone+0x69/0x90 [] ? system_call_fastpath+0x1a/0x1f ---[ end trace 4918cdd0a4c9fea4 ]--- (a similar message shows up if you end up launching 'mcelog') The call chain is (as analyzed by Liu, Jinsong): do_fork --> copy_process --> dup_mm --> dup_mmap --> copy_page_range --> track_pfn_copy --> reserve_pfn_range --> line 624: flags != want_flags It comes from different memory types of page table (_PAGE_CACHE_WB) and MTRR (_PAGE_CACHE_UC_MINUS). Stefan Bader dug in this deep and found out that: "That makes it clearer as this will do reserve_memtype(...) --> pat_x_mtrr_type --> mtrr_type_lookup --> __mtrr_type_lookup And that can return -1/0xff in case of MTRR not being enabled/initialized. Which is not the case (given there are no messages for it in dmesg). This is not equal to MTRR_TYPE_WRBACK and thus becomes _PAGE_CACHE_UC_MINUS. It looks like the problem starts early in reserve_memtype: if (!pat_enabled) { /* This is identical to page table setting without PAT */ if (new_type) { if (req_type == _PAGE_CACHE_WC) *new_type = _PAGE_CACHE_UC_MINUS; else *new_type = req_type & _PAGE_CACHE_MASK; } return 0; } This would be what we want, that is clearing the PWT and PCD flags from the supported flags - if pat_enabled is disabled." This patch does that - disabling PAT. CC: stable@vger.kernel.org # 3.3 and further Reported-by: Sander Eikelenboom Reported-and-Tested-by: Konrad Rzeszutek Wilk Reported-and-Tested-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 39928d16be3b..c8e1c7b95c3b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -67,6 +67,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI #include @@ -1417,7 +1418,14 @@ asmlinkage void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - +#ifdef CONFIG_X86_PAT + /* + * For right now disable the PAT. We should remove this once + * git commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1 + * (xen/pat: Disable PAT support for now) is reverted. + */ + pat_enabled = 0; +#endif /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -- cgit v1.2.3 From 884ac2978a295b7df3c4a686d3bff6932bbbb460 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 28 Feb 2013 09:05:41 -0500 Subject: xen/pci: We don't do multiple MSI's. There is no hypercall to setup multiple MSI per PCI device. As such with these two new commits: - 08261d87f7d1b6253ab3223756625a5c74532293 PCI/MSI: Enable multiple MSIs with pci_enable_msi_block_auto() - 5ca72c4f7c412c2002363218901eba5516c476b1 AHCI: Support multiple MSIs we would call the PHYSDEVOP_map_pirq 'nvec' times with the same contents of the PCI device. Sander discovered that we would get the same PIRQ value 'nvec' times and return said values to the caller. That of course meant that the device was configured only with one MSI and AHCI would fail with: ahci 0000:00:11.0: version 3.0 xen: registering gsi 19 triggering 0 polarity 1 xen: --> pirq=19 -> irq=19 (gsi=19) (XEN) [2013-02-27 19:43:07] IOAPIC[0]: Set PCI routing entry (6-19 -> 0x99 -> IRQ 19 Mode:1 Active:1) ahci 0000:00:11.0: AHCI 0001.0200 32 slots 4 ports 6 Gbps 0xf impl SATA mode ahci 0000:00:11.0: flags: 64bit ncq sntf ilck pm led clo pmp pio slum part ahci: probe of 0000:00:11.0 failed with error -22 That is b/c in ahci_host_activate the second call to devm_request_threaded_irq would return -EINVAL as we passed in (on the second run) an IRQ that was never initialized. CC: stable@vger.kernel.org Reported-and-Tested-by: Sander Eikelenboom Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 56ab74989cf1..94e76620460f 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -162,6 +162,9 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *msidesc; int *v; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); if (!v) return -ENOMEM; @@ -220,6 +223,9 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *msidesc; struct msi_msg msg; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(msidesc, &dev->msi_list, list) { __read_msi_msg(msidesc, &msg); pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | @@ -263,6 +269,9 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret = 0; struct msi_desc *msidesc; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(msidesc, &dev->msi_list, list) { struct physdev_map_pirq map_irq; domid_t domid; -- cgit v1.2.3