From 2de50e9674fc4ca3c6174b04477f69eb26b4ee31 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Mon, 8 Feb 2016 15:08:20 +1100 Subject: powerpc/powernv: Remove support for p5ioc2 "p5ioc2 is used by approximately 2 machines in the world, and has never ever been a supported configuration." The code for p5ioc2 is essentially unused and complicates what is already a very complicated codebase. Its removal is essentially a "free win" in the effort to simplify the powernv PCI code. In addition, support for p5ioc2 has been dropped from skiboot. There's no reason to keep it around in the kernel. Signed-off-by: Russell Currey Acked-by: Gavin Shan Acked-by: Stewart Smith Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/pci-p5ioc2.c | 271 ---------------------------- arch/powerpc/platforms/powernv/pci.c | 17 +- arch/powerpc/platforms/powernv/pci.h | 152 ++++++++-------- 4 files changed, 74 insertions(+), 368 deletions(-) delete mode 100644 arch/powerpc/platforms/powernv/pci-p5ioc2.c diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index f1516b5ecec9..cd9711e72df6 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o obj-y += opal-kmsg.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o -obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o +obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c deleted file mode 100644 index f2bdfea3b68d..000000000000 --- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Support PCI/PCIe on PowerNV platforms - * - * Currently supports only P5IOC2 - * - * Copyright 2011 Benjamin Herrenschmidt, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "powernv.h" -#include "pci.h" - -/* For now, use a fixed amount of TCE memory for each p5ioc2 - * hub, 16M will do - */ -#define P5IOC2_TCE_MEMORY 0x01000000 - -#ifdef CONFIG_PCI_MSI -static int pnv_pci_p5ioc2_msi_setup(struct pnv_phb *phb, struct pci_dev *dev, - unsigned int hwirq, unsigned int virq, - unsigned int is_64, struct msi_msg *msg) -{ - if (WARN_ON(!is_64)) - return -ENXIO; - msg->data = hwirq - phb->msi_base; - msg->address_hi = 0x10000000; - msg->address_lo = 0; - - return 0; -} - -static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) -{ - unsigned int count; - const __be32 *prop = of_get_property(phb->hose->dn, - "ibm,opal-msi-ranges", NULL); - if (!prop) - return; - - /* Don't do MSI's on p5ioc2 PCI-X are they are not properly - * verified in HW - */ - if (of_device_is_compatible(phb->hose->dn, "ibm,p5ioc2-pcix")) - return; - phb->msi_base = be32_to_cpup(prop); - count = be32_to_cpup(prop + 1); - if (msi_bitmap_alloc(&phb->msi_bmp, count, phb->hose->dn)) { - pr_err("PCI %d: Failed to allocate MSI bitmap !\n", - phb->hose->global_number); - return; - } - phb->msi_setup = pnv_pci_p5ioc2_msi_setup; - phb->msi32_support = 0; - pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n", - count, phb->msi_base); -} -#else -static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { } -#endif /* CONFIG_PCI_MSI */ - -static struct iommu_table_ops pnv_p5ioc2_iommu_ops = { - .set = pnv_tce_build, -#ifdef CONFIG_IOMMU_API - .exchange = pnv_tce_xchg, -#endif - .clear = pnv_tce_free, - .get = pnv_tce_get, -}; - -static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb, - struct pci_dev *pdev) -{ - struct iommu_table *tbl = phb->p5ioc2.table_group.tables[0]; - - if (!tbl->it_map) { - tbl->it_ops = &pnv_p5ioc2_iommu_ops; - iommu_init_table(tbl, phb->hose->node); - iommu_register_group(&phb->p5ioc2.table_group, - pci_domain_nr(phb->hose->bus), phb->opal_id); - INIT_LIST_HEAD_RCU(&tbl->it_group_list); - pnv_pci_link_table_and_group(phb->hose->node, 0, - tbl, &phb->p5ioc2.table_group); - } - - set_iommu_table_base(&pdev->dev, tbl); - iommu_add_device(&pdev->dev); -} - -static const struct pci_controller_ops pnv_pci_p5ioc2_controller_ops = { - .dma_dev_setup = pnv_pci_dma_dev_setup, -#ifdef CONFIG_PCI_MSI - .setup_msi_irqs = pnv_setup_msi_irqs, - .teardown_msi_irqs = pnv_teardown_msi_irqs, -#endif -}; - -static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id, - void *tce_mem, u64 tce_size) -{ - struct pnv_phb *phb; - const __be64 *prop64; - u64 phb_id; - int64_t rc; - static int primary = 1; - struct iommu_table_group *table_group; - struct iommu_table *tbl; - - pr_info(" Initializing p5ioc2 PHB %s\n", np->full_name); - - prop64 = of_get_property(np, "ibm,opal-phbid", NULL); - if (!prop64) { - pr_err(" Missing \"ibm,opal-phbid\" property !\n"); - return; - } - phb_id = be64_to_cpup(prop64); - pr_devel(" PHB-ID : 0x%016llx\n", phb_id); - pr_devel(" TCE AT : 0x%016lx\n", __pa(tce_mem)); - pr_devel(" TCE SZ : 0x%016llx\n", tce_size); - - rc = opal_pci_set_phb_tce_memory(phb_id, __pa(tce_mem), tce_size); - if (rc != OPAL_SUCCESS) { - pr_err(" Failed to set TCE memory, OPAL error %lld\n", rc); - return; - } - - phb = memblock_virt_alloc(sizeof(struct pnv_phb), 0); - phb->hose = pcibios_alloc_controller(np); - if (!phb->hose) { - pr_err(" Failed to allocate PCI controller\n"); - return; - } - - spin_lock_init(&phb->lock); - phb->hose->first_busno = 0; - phb->hose->last_busno = 0xff; - phb->hose->private_data = phb; - phb->hose->controller_ops = pnv_pci_p5ioc2_controller_ops; - phb->hub_id = hub_id; - phb->opal_id = phb_id; - phb->type = PNV_PHB_P5IOC2; - phb->model = PNV_PHB_MODEL_P5IOC2; - - phb->regs = of_iomap(np, 0); - - if (phb->regs == NULL) - pr_err(" Failed to map registers !\n"); - else { - pr_devel(" P_BUID = 0x%08x\n", in_be32(phb->regs + 0x100)); - pr_devel(" P_IOSZ = 0x%08x\n", in_be32(phb->regs + 0x1b0)); - pr_devel(" P_IO_ST = 0x%08x\n", in_be32(phb->regs + 0x1e0)); - pr_devel(" P_MEM1_H = 0x%08x\n", in_be32(phb->regs + 0x1a0)); - pr_devel(" P_MEM1_L = 0x%08x\n", in_be32(phb->regs + 0x190)); - pr_devel(" P_MSZ1_L = 0x%08x\n", in_be32(phb->regs + 0x1c0)); - pr_devel(" P_MEM_ST = 0x%08x\n", in_be32(phb->regs + 0x1d0)); - pr_devel(" P_MEM2_H = 0x%08x\n", in_be32(phb->regs + 0x2c0)); - pr_devel(" P_MEM2_L = 0x%08x\n", in_be32(phb->regs + 0x2b0)); - pr_devel(" P_MSZ2_H = 0x%08x\n", in_be32(phb->regs + 0x2d0)); - pr_devel(" P_MSZ2_L = 0x%08x\n", in_be32(phb->regs + 0x2e0)); - } - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(phb->hose, np, primary); - primary = 0; - - phb->hose->ops = &pnv_pci_ops; - - /* Setup MSI support */ - pnv_pci_init_p5ioc2_msis(phb); - - /* Setup TCEs */ - phb->dma_dev_setup = pnv_pci_p5ioc2_dma_dev_setup; - pnv_pci_setup_iommu_table(&phb->p5ioc2.iommu_table, - tce_mem, tce_size, 0, - IOMMU_PAGE_SHIFT_4K); - /* - * We do not allocate iommu_table as we do not support - * hotplug or SRIOV on P5IOC2 and therefore iommu_free_table() - * should not be called for phb->p5ioc2.table_group.tables[0] ever. - */ - tbl = phb->p5ioc2.table_group.tables[0] = &phb->p5ioc2.iommu_table; - table_group = &phb->p5ioc2.table_group; - table_group->tce32_start = tbl->it_offset << tbl->it_page_shift; - table_group->tce32_size = tbl->it_size << tbl->it_page_shift; -} - -void __init pnv_pci_init_p5ioc2_hub(struct device_node *np) -{ - struct device_node *phbn; - const __be64 *prop64; - u64 hub_id; - void *tce_mem; - uint64_t tce_per_phb; - int64_t rc; - int phb_count = 0; - - pr_info("Probing p5ioc2 IO-Hub %s\n", np->full_name); - - prop64 = of_get_property(np, "ibm,opal-hubid", NULL); - if (!prop64) { - pr_err(" Missing \"ibm,opal-hubid\" property !\n"); - return; - } - hub_id = be64_to_cpup(prop64); - pr_info(" HUB-ID : 0x%016llx\n", hub_id); - - /* Count child PHBs and calculate TCE space per PHB */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) - phb_count++; - } - - if (phb_count <= 0) { - pr_info(" No PHBs for Hub %s\n", np->full_name); - return; - } - - tce_per_phb = __rounddown_pow_of_two(P5IOC2_TCE_MEMORY / phb_count); - pr_info(" Allocating %lld MB of TCE memory per PHB\n", - tce_per_phb >> 20); - - /* Currently allocate 16M of TCE memory for every Hub - * - * XXX TODO: Make it chip local if possible - */ - tce_mem = memblock_virt_alloc(P5IOC2_TCE_MEMORY, P5IOC2_TCE_MEMORY); - pr_debug(" TCE : 0x%016lx..0x%016lx\n", - __pa(tce_mem), __pa(tce_mem) + P5IOC2_TCE_MEMORY - 1); - rc = opal_pci_set_hub_tce_memory(hub_id, __pa(tce_mem), - P5IOC2_TCE_MEMORY); - if (rc != OPAL_SUCCESS) { - pr_err(" Failed to allocate TCE memory, OPAL error %lld\n", rc); - return; - } - - /* Initialize PHBs */ - for_each_child_of_node(np, phbn) { - if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") || - of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) { - pnv_pci_init_p5ioc2_phb(phbn, hub_id, - tce_mem, tce_per_phb); - tce_mem += tce_per_phb; - } - } -} diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 2f55c86df703..8de0140332b2 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -380,10 +380,7 @@ static void pnv_pci_config_check_eeh(struct pci_dn *pdn) */ pe_no = pdn->pe_number; if (pe_no == IODA_INVALID_PE) { - if (phb->type == PNV_PHB_P5IOC2) - pe_no = 0; - else - pe_no = phb->ioda.reserved_pe; + pe_no = phb->ioda.reserved_pe; } /* @@ -779,7 +776,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk); void __init pnv_pci_init(void) { struct device_node *np; - bool found_ioda = false; pci_add_flags(PCI_CAN_SKIP_ISA_ALIGN); @@ -787,20 +783,11 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; - /* Look for IODA IO-Hubs. We don't support mixing IODA - * and p5ioc2 due to the need to change some global - * probing flags - */ + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); - found_ioda = true; } - /* Look for p5ioc2 IO-Hubs */ - if (!found_ioda) - for_each_compatible_node(np, NULL, "ibm,p5ioc2") - pnv_pci_init_p5ioc2_hub(np); - /* Look for ioda2 built-in PHB3's */ for_each_compatible_node(np, NULL, "ibm,ioda2-phb") pnv_pci_init_ioda2_phb(np); diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 7f56313e8d72..32cae3d8e011 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -4,16 +4,14 @@ struct pci_dn; enum pnv_phb_type { - PNV_PHB_P5IOC2 = 0, - PNV_PHB_IODA1 = 1, - PNV_PHB_IODA2 = 2, - PNV_PHB_NPU = 3, + PNV_PHB_IODA1 = 0, + PNV_PHB_IODA2 = 1, + PNV_PHB_NPU = 2, }; /* Precise PHB model for error management */ enum pnv_phb_model { PNV_PHB_MODEL_UNKNOWN, - PNV_PHB_MODEL_P5IOC2, PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, PNV_PHB_MODEL_NPU, @@ -121,81 +119,74 @@ struct pnv_phb { void (*freeze_pe)(struct pnv_phb *phb, int pe_no); int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt); - union { - struct { - struct iommu_table iommu_table; - struct iommu_table_group table_group; - } p5ioc2; - - struct { - /* Global bridge info */ - unsigned int total_pe; - unsigned int reserved_pe; - - /* 32-bit MMIO window */ - unsigned int m32_size; - unsigned int m32_segsize; - unsigned int m32_pci_base; - - /* 64-bit MMIO window */ - unsigned int m64_bar_idx; - unsigned long m64_size; - unsigned long m64_segsize; - unsigned long m64_base; - unsigned long m64_bar_alloc; - - /* IO ports */ - unsigned int io_size; - unsigned int io_segsize; - unsigned int io_pci_base; - - /* PE allocation bitmap */ - unsigned long *pe_alloc; - /* PE allocation mutex */ - struct mutex pe_alloc_mutex; - - /* M32 & IO segment maps */ - unsigned int *m32_segmap; - unsigned int *io_segmap; - struct pnv_ioda_pe *pe_array; - - /* IRQ chip */ - int irq_chip_init; - struct irq_chip irq_chip; - - /* Sorted list of used PE's based - * on the sequence of creation - */ - struct list_head pe_list; - struct mutex pe_list_mutex; - - /* Reverse map of PEs, will have to extend if - * we are to support more than 256 PEs, indexed - * bus { bus, devfn } - */ - unsigned char pe_rmap[0x10000]; - - /* 32-bit TCE tables allocation */ - unsigned long tce32_count; - - /* Total "weight" for the sake of DMA resources - * allocation - */ - unsigned int dma_weight; - unsigned int dma_pe_count; - - /* Sorted list of used PE's, sorted at - * boot for resource allocation purposes - */ - struct list_head pe_dma_list; - - /* TCE cache invalidate registers (physical and - * remapped) - */ - phys_addr_t tce_inval_reg_phys; - __be64 __iomem *tce_inval_reg; - } ioda; - }; + struct { + /* Global bridge info */ + unsigned int total_pe; + unsigned int reserved_pe; + + /* 32-bit MMIO window */ + unsigned int m32_size; + unsigned int m32_segsize; + unsigned int m32_pci_base; + + /* 64-bit MMIO window */ + unsigned int m64_bar_idx; + unsigned long m64_size; + unsigned long m64_segsize; + unsigned long m64_base; + unsigned long m64_bar_alloc; + + /* IO ports */ + unsigned int io_size; + unsigned int io_segsize; + unsigned int io_pci_base; + + /* PE allocation bitmap */ + unsigned long *pe_alloc; + /* PE allocation mutex */ + struct mutex pe_alloc_mutex; + + /* M32 & IO segment maps */ + unsigned int *m32_segmap; + unsigned int *io_segmap; + struct pnv_ioda_pe *pe_array; + + /* IRQ chip */ + int irq_chip_init; + struct irq_chip irq_chip; + + /* Sorted list of used PE's based + * on the sequence of creation + */ + struct list_head pe_list; + struct mutex pe_list_mutex; + + /* Reverse map of PEs, will have to extend if + * we are to support more than 256 PEs, indexed + * bus { bus, devfn } + */ + unsigned char pe_rmap[0x10000]; + + /* 32-bit TCE tables allocation */ + unsigned long tce32_count; + + /* Total "weight" for the sake of DMA resources + * allocation + */ + unsigned int dma_weight; + unsigned int dma_pe_count; + + /* Sorted list of used PE's, sorted at + * boot for resource allocation purposes + */ + struct list_head pe_dma_list; + + /* TCE cache invalidate registers (physical and + * remapped) + */ + phys_addr_t tce_inval_reg_phys; + __be64 __iomem *tce_inval_reg; + } ioda; /* PHB and hub status structure */ union { @@ -232,7 +223,6 @@ extern void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, u64 dma_offset, unsigned page_shift); -extern void pnv_pci_init_p5ioc2_hub(struct device_node *np); extern void pnv_pci_init_ioda_hub(struct device_node *np); extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_init_npu_phb(struct device_node *np); -- cgit v1.2.3 From 2e34057929cad8a90b775581216886d22b642e0a Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 27 Jan 2016 11:29:44 +1100 Subject: powerpc/xmon: fix typo in usage message Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 07a8508cb7fa..d5c8a156f63c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -233,7 +233,7 @@ Commands:\n\ " S print special registers\n\ t print backtrace\n\ x exit monitor and recover\n\ - X exit monitor and dont recover\n" + X exit monitor and don't recover\n" #if defined(CONFIG_PPC64) && !defined(CONFIG_PPC_BOOK3E) " u dump segment table or SLB\n" #elif defined(CONFIG_PPC_STD_MMU_32) -- cgit v1.2.3 From 31f6a4ada14de04ee6cd7ff03c8b6b5e282a13f0 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Mon, 8 Feb 2016 14:39:19 +1100 Subject: powerpc/eeh: fix incorrect function name in comment The comment block above pcibios_set_pcie_reset_state() incorrectly refers to pcibios_set_pcie_slot_reset(). Fix the comment accordingly. Signed-off-by: Andrew Donnellan Acked-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/eeh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 40e4d4a27663..8c6005cf1583 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -739,7 +739,7 @@ static void *eeh_restore_dev_state(void *data, void *userdata) } /** - * pcibios_set_pcie_slot_reset - Set PCI-E reset state + * pcibios_set_pcie_reset_state - Set PCI-E reset state * @dev: pci device struct * @state: reset state to enter * -- cgit v1.2.3 From 9b4fffa14906fce7aabf1f032ddd7efc7a031bba Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 9 Feb 2016 18:17:48 +1100 Subject: powerpc/powernv: new function to access OPAL msglog Currently, the OPAL msglog/console buffer is exposed as a sysfs file, with the sysfs read handler responsible for retrieving the log from the OPAL buffer. We'd like to be able to use it in xmon as well. Refactor the OPAL msglog code to create a new function, opal_msglog_copy(), that copies to an arbitrary buffer. Separate the initialisation code into generic memcons init and sysfs file creation. Signed-off-by: Andrew Donnellan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 3 +++ arch/powerpc/platforms/powernv/opal-msglog.c | 29 ++++++++++++++++++---------- arch/powerpc/platforms/powernv/opal.c | 7 +++++-- 3 files changed, 27 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 07a99e638449..9d86c6651716 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -248,6 +248,7 @@ extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); extern void opal_msglog_init(void); +extern void opal_msglog_sysfs_init(void); extern int opal_async_comp_init(void); extern int opal_sensor_init(void); extern int opal_hmi_handler_init(void); @@ -273,6 +274,8 @@ void opal_free_sg_list(struct opal_sg_list *sg); extern int opal_error_code(int rc); +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 44ed78af1a0d..59fa6e1cbc9b 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -31,26 +31,25 @@ struct memcons { __be32 in_cons; }; -static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, - struct bin_attribute *bin_attr, char *to, - loff_t pos, size_t count) +static struct memcons *opal_memcons = NULL; + +ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count) { - struct memcons *mc = bin_attr->private; const char *conbuf; ssize_t ret; size_t first_read = 0; uint32_t out_pos, avail; - if (!mc) + if (!opal_memcons) return -ENODEV; - out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos)); + out_pos = be32_to_cpu(ACCESS_ONCE(opal_memcons->out_pos)); /* Now we've read out_pos, put a barrier in before reading the new * data it points to in conbuf. */ smp_rmb(); - conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys)); + conbuf = phys_to_virt(be64_to_cpu(opal_memcons->obuf_phys)); /* When the buffer has wrapped, read from the out_pos marker to the end * of the buffer, and then read the remaining data as in the un-wrapped @@ -58,7 +57,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, if (out_pos & MEMCONS_OUT_POS_WRAP) { out_pos &= MEMCONS_OUT_POS_MASK; - avail = be32_to_cpu(mc->obuf_size) - out_pos; + avail = be32_to_cpu(opal_memcons->obuf_size) - out_pos; ret = memory_read_from_buffer(to, count, &pos, conbuf + out_pos, avail); @@ -76,7 +75,7 @@ static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, } /* Sanity check. The firmware should not do this to us. */ - if (out_pos > be32_to_cpu(mc->obuf_size)) { + if (out_pos > be32_to_cpu(opal_memcons->obuf_size)) { pr_err("OPAL: memory console corruption. Aborting read.\n"); return -EINVAL; } @@ -91,6 +90,13 @@ out: return ret; } +static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + return opal_msglog_copy(to, pos, count); +} + static struct bin_attribute opal_msglog_attr = { .attr = {.name = "msglog", .mode = 0444}, .read = opal_msglog_read @@ -117,8 +123,11 @@ void __init opal_msglog_init(void) return; } - opal_msglog_attr.private = mc; + opal_memcons = mc; +} +void __init opal_msglog_sysfs_init(void) +{ if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0) pr_warn("OPAL: sysfs file creation failed\n"); } diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 4e0da5af94a1..0256d0729252 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -724,6 +724,9 @@ static int __init opal_init(void) of_node_put(leds); } + /* Initialise OPAL message log interface */ + opal_msglog_init(); + /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { @@ -739,8 +742,8 @@ static int __init opal_init(void) opal_platform_dump_init(); /* Setup system parameters interface */ opal_sys_param_init(); - /* Setup message log interface. */ - opal_msglog_init(); + /* Setup message log sysfs interface. */ + opal_msglog_sysfs_init(); } /* Initialize platform devices: IPMI backend, PRD & flash interface */ -- cgit v1.2.3 From fde93a0f774f510bfaabccd5ba00f97972be1e12 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Tue, 9 Feb 2016 18:17:49 +1100 Subject: powerpc/xmon: add command to dump OPAL msglog Add the 'do' command to dump the OPAL msglog in xmon. Signed-off-by: Andrew Donnellan [mpe: Reduce the amount of ifdefery required] Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index d5c8a156f63c..47e195d66a9a 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -47,6 +47,9 @@ #include #include +#include +#include + #ifdef CONFIG_PPC64 #include #include @@ -119,6 +122,16 @@ static void dump(void); static void prdump(unsigned long, long); static int ppc_inst_dump(unsigned long, long, int); static void dump_log_buf(void); + +#ifdef CONFIG_PPC_POWERNV +static void dump_opal_msglog(void); +#else +static inline void dump_opal_msglog(void) +{ + printf("Machine is not running OPAL firmware.\n"); +} +#endif + static void backtrace(struct pt_regs *); static void excprint(struct pt_regs *); static void prregs(struct pt_regs *); @@ -202,6 +215,10 @@ Commands:\n\ df dump float values\n\ dd dump double values\n\ dl dump the kernel log buffer\n" +#ifdef CONFIG_PPC_POWERNV + "\ + do dump the OPAL message log\n" +#endif #ifdef CONFIG_PPC64 "\ dp[#] dump paca for current cpu, or cpu #\n\ @@ -2253,6 +2270,8 @@ dump(void) last_cmd = "di\n"; } else if (c == 'l') { dump_log_buf(); + } else if (c == 'o') { + dump_opal_msglog(); } else if (c == 'r') { scanhex(&ndump); if (ndump == 0) @@ -2395,6 +2414,45 @@ dump_log_buf(void) catch_memory_errors = 0; } +#ifdef CONFIG_PPC_POWERNV +static void dump_opal_msglog(void) +{ + unsigned char buf[128]; + ssize_t res; + loff_t pos = 0; + + if (!firmware_has_feature(FW_FEATURE_OPAL)) { + printf("Machine is not running OPAL firmware.\n"); + return; + } + + if (setjmp(bus_error_jmp) != 0) { + printf("Error dumping OPAL msglog!\n"); + return; + } + + catch_memory_errors = 1; + sync(); + + xmon_start_pagination(); + while ((res = opal_msglog_copy(buf, pos, sizeof(buf) - 1))) { + if (res < 0) { + printf("Error dumping OPAL msglog! Error: %zd\n", res); + break; + } + buf[res] = '\0'; + printf("%s", buf); + pos += res; + } + xmon_end_pagination(); + + sync(); + /* wait a little while to see if we get a machine check */ + __delay(200); + catch_memory_errors = 0; +} +#endif + /* * Memory operations - move, set, print differences */ -- cgit v1.2.3 From 6dfb54049f9a99b24fe5d5cd2d3af19eadc8f31f Mon Sep 17 00:00:00 2001 From: Douglas Miller Date: Mon, 23 Nov 2015 09:01:15 -0600 Subject: powerpc/xmon: Add xmon command to dump process/task similar to ps(1) Add 'P' command with optional task_struct address to dump all/one task's information: task pointer, kernel stack pointer, PID, PPID, state (interpreted), CPU where (last) running, and command. Signed-off-by: Douglas Miller Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 47e195d66a9a..942796fa4767 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -163,6 +163,7 @@ static int cpu_cmd(void); static void csum(void); static void bootcmds(void); static void proccall(void); +static void show_tasks(void); void dump_segments(void); static void symbol_lookup(void); static void xmon_show_stack(unsigned long sp, unsigned long lr, @@ -238,6 +239,7 @@ Commands:\n\ mz zero a block of memory\n\ mi show information about memory allocation\n\ p call a procedure\n\ + P list processes/tasks\n\ r print registers\n\ s single step\n" #ifdef CONFIG_SPU_BASE @@ -967,6 +969,9 @@ cmds(struct pt_regs *excp) case 'p': proccall(); break; + case 'P': + show_tasks(); + break; #ifdef CONFIG_PPC_STD_MMU case 'u': dump_segments(); @@ -2566,6 +2571,61 @@ memzcan(void) printf("%.8x\n", a - mskip); } +static void show_task(struct task_struct *tsk) +{ + char state; + + /* + * Cloned from kdb_task_state_char(), which is not entirely + * appropriate for calling from xmon. This could be moved + * to a common, generic, routine used by both. + */ + state = (tsk->state == 0) ? 'R' : + (tsk->state < 0) ? 'U' : + (tsk->state & TASK_UNINTERRUPTIBLE) ? 'D' : + (tsk->state & TASK_STOPPED) ? 'T' : + (tsk->state & TASK_TRACED) ? 'C' : + (tsk->exit_state & EXIT_ZOMBIE) ? 'Z' : + (tsk->exit_state & EXIT_DEAD) ? 'E' : + (tsk->state & TASK_INTERRUPTIBLE) ? 'S' : '?'; + + printf("%p %016lx %6d %6d %c %2d %s\n", tsk, + tsk->thread.ksp, + tsk->pid, tsk->parent->pid, + state, task_thread_info(tsk)->cpu, + tsk->comm); +} + +static void show_tasks(void) +{ + unsigned long tskv; + struct task_struct *tsk = NULL; + + printf(" task_struct ->thread.ksp PID PPID S P CMD\n"); + + if (scanhex(&tskv)) + tsk = (struct task_struct *)tskv; + + if (setjmp(bus_error_jmp) != 0) { + catch_memory_errors = 0; + printf("*** Error dumping task %p\n", tsk); + return; + } + + catch_memory_errors = 1; + sync(); + + if (tsk) + show_task(tsk); + else + for_each_process(tsk) + show_task(tsk); + + sync(); + __delay(200); + catch_memory_errors = 0; +} + static void proccall(void) { unsigned long args[8]; -- cgit v1.2.3 From ccc9662da5494a7c4ff5ed5d167285b5a28d5fb3 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 9 Feb 2016 15:50:24 +1100 Subject: powerpc/powernv: Simplify definitions of EEH debugfs handlers The EEH debugfs handlers have same prototype. This introduces a macro to define them, then to simplify the code. No logical changes. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 60 ++++++++++------------------ 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 5f152b95ca0c..3f1cb35d9cdf 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -167,42 +167,26 @@ static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) return 0; } -static int pnv_eeh_outb_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xD10, val); -} - -static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xD10, val); -} - -static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xD90, val); -} - -static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xD90, val); -} - -static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val) -{ - return pnv_eeh_dbgfs_set(data, 0xE10, val); -} - -static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val) -{ - return pnv_eeh_dbgfs_get(data, 0xE10, val); -} +#define PNV_EEH_DBGFS_ENTRY(name, reg) \ +static int pnv_eeh_dbgfs_set_##name(void *data, u64 val) \ +{ \ + return pnv_eeh_dbgfs_set(data, reg, val); \ +} \ + \ +static int pnv_eeh_dbgfs_get_##name(void *data, u64 *val) \ +{ \ + return pnv_eeh_dbgfs_get(data, reg, val); \ +} \ + \ +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_dbgfs_ops_##name, \ + pnv_eeh_dbgfs_get_##name, \ + pnv_eeh_dbgfs_set_##name, \ + "0x%llx\n") + +PNV_EEH_DBGFS_ENTRY(outb, 0xD10); +PNV_EEH_DBGFS_ENTRY(inbA, 0xD90); +PNV_EEH_DBGFS_ENTRY(inbB, 0xE10); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get, - pnv_eeh_outb_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get, - pnv_eeh_inbA_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get, - pnv_eeh_inbB_dbgfs_set, "0x%llx\n"); #endif /* CONFIG_DEBUG_FS */ /** @@ -268,13 +252,13 @@ static int pnv_eeh_post_init(void) debugfs_create_file("err_injct_outbound", 0600, phb->dbgfs, hose, - &pnv_eeh_outb_dbgfs_ops); + &pnv_eeh_dbgfs_ops_outb); debugfs_create_file("err_injct_inboundA", 0600, phb->dbgfs, hose, - &pnv_eeh_inbA_dbgfs_ops); + &pnv_eeh_dbgfs_ops_inbA); debugfs_create_file("err_injct_inboundB", 0600, phb->dbgfs, hose, - &pnv_eeh_inbB_dbgfs_ops); + &pnv_eeh_dbgfs_ops_inbB); #endif /* CONFIG_DEBUG_FS */ } -- cgit v1.2.3 From b0331854190e70b9d96d39257230def45f832877 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:14 +0800 Subject: powerpc/powernv: don't enable SRIOV when VF BAR has non 64bit-prefetchable BAR On PHB3, we enable SRIOV devices by mapping IOV BAR with M64 BARs. If a SRIOV device's IOV BAR is not 64bit-prefetchable, this is not assigned from 64bit prefetchable window, which means M64 BAR can't work on it. The reason is PCI bridges support only 2 memory windows and the kernel code programs bridges in the way that one window is 32bit-nonprefetchable and the other one is 64bit-prefetchable. So if devices' IOV BAR is 64bit and non-prefetchable, it will be mapped into 32bit space and therefore M64 cannot be used for it. This patch makes this explicit and truncate IOV resource in this case to save MMIO space. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 35 +++++++++++++++++-------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 573ae1994097..58b0e230a382 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -872,9 +872,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - /* * The actual IOV BAR range is determined by the start address * and the actual size for num_vfs VFs BAR. This check is to @@ -903,9 +900,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); res2 = *res; res->start += size * offset; @@ -1263,9 +1257,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) if (!res->flags || !res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) - continue; - for (j = 0; j < vf_groups; j++) { do { win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, @@ -1552,6 +1543,12 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); if (phb->type == PNV_PHB_IODA2) { + if (!pdn->vfs_expanded) { + dev_info(&pdev->dev, "don't support this SRIOV device" + " with non 64bit-prefetchable IOV BAR\n"); + return -ENOSPC; + } + /* Calculate available PE for required VFs */ mutex_lock(&phb->ioda.pe_alloc_mutex); pdn->offset = bitmap_find_next_zero_area( @@ -2877,9 +2874,10 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) if (!res->flags || res->parent) continue; if (!pnv_pci_is_mem_pref_64(res->flags)) { - dev_warn(&pdev->dev, " non M64 VF BAR%d: %pR\n", + dev_warn(&pdev->dev, "Don't support SR-IOV with" + " non M64 VF BAR%d: %pR. \n", i, res); - continue; + goto truncate_iov; } size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); @@ -2898,11 +2896,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || res->parent) continue; - if (!pnv_pci_is_mem_pref_64(res->flags)) { - dev_warn(&pdev->dev, "Skipping expanding VF BAR%d: %pR\n", - i, res); - continue; - } dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); @@ -2912,6 +2905,16 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) i, res, mul); } pdn->vfs_expanded = mul; + + return; + +truncate_iov: + /* To save MMIO space, IOV BAR is truncated. */ + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { + res = &pdev->resource[i + PCI_IOV_RESOURCES]; + res->flags = 0; + res->end = res->start - 1; + } } #endif /* CONFIG_PCI_IOV */ -- cgit v1.2.3 From 7fbe7a9374f8bb18db653f4693861c8625d01db1 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:15 +0800 Subject: powerpc/powernv: simplify the calculation of iov resource alignment The alignment of IOV BAR on PowerNV platform is the total size of the IOV BAR. No matter whether the IOV BAR is extended with number of roundup_pow_of_two(total_vfs) or number of max PE number (256), the total size could be calculated by (vfs_expanded * VF_BAR_size). This patch simplifies the pnv_pci_iov_resource_alignment() by removing the first case. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 58b0e230a382..15e6ff18dcd5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -3129,17 +3129,21 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) { struct pci_dn *pdn = pci_get_pdn(pdev); - resource_size_t align, iov_align; - - iov_align = resource_size(&pdev->resource[resno]); - if (iov_align) - return iov_align; + resource_size_t align; + /* + * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the + * SR-IOV. While from hardware perspective, the range mapped by M64 + * BAR should be size aligned. + * + * This function returns the total IOV BAR size if M64 BAR is in + * Shared PE mode or just VF BAR size if not. + */ align = pci_iov_resource_size(pdev, resno); - if (pdn->vfs_expanded) - return pdn->vfs_expanded * align; + if (!pdn->vfs_expanded) + return align; - return align; + return pdn->vfs_expanded * align; } #endif /* CONFIG_PCI_IOV */ -- cgit v1.2.3 From ee8222fe95e40ade9f50b852095d4626631ebbbe Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:16 +0800 Subject: powerpc/powernv: use one M64 BAR in Single PE mode for one VF BAR In current implementation, when VF BAR is bigger than 64MB, it uses 4 M64 BARs in Single PE mode to cover the number of VFs required to be enabled. By doing so, several VFs would be in one VF Group and leads to interference between VFs in the same group. And in this patch, m64_wins is renamed to m64_map, which means index number of the M64 BAR used to map the VF BAR. Based on Gavin's comments. Also makes sure the VF BAR size is bigger than 32MB when M64 BAR is used in Single PE mode. This patch changes the design by using one M64 BAR in Single PE mode for one VF BAR. This gives absolute isolation for VFs. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 5 +- arch/powerpc/platforms/powernv/pci-ioda.c | 177 ++++++++++++------------------ 2 files changed, 75 insertions(+), 107 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 54843ca5fa2b..11d3543a57f2 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -216,10 +216,9 @@ struct pci_dn { u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ int offset; /* PE# for the first VF PE */ -#define M64_PER_IOV 4 - int m64_per_iov; + bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) - int m64_wins[PCI_SRIOV_NUM_BARS][M64_PER_IOV]; + int (*m64_map)[PCI_SRIOV_NUM_BARS]; #endif /* CONFIG_PCI_IOV */ #endif struct list_head child_list; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 15e6ff18dcd5..4004c0a842ca 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1190,29 +1190,36 @@ static void pnv_pci_ioda_setup_PEs(void) } #ifdef CONFIG_PCI_IOV -static int pnv_pci_vf_release_m64(struct pci_dev *pdev) +static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pci_dn *pdn; int i, j; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); phb = hose->private_data; pdn = pci_get_pdn(pdev); + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) { - if (pdn->m64_wins[i][j] == IODA_INVALID_M64) + for (j = 0; j < m64_bars; j++) { + if (pdn->m64_map[j][i] == IODA_INVALID_M64) continue; opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 0); - clear_bit(pdn->m64_wins[i][j], &phb->ioda.m64_bar_alloc); - pdn->m64_wins[i][j] = IODA_INVALID_M64; + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); + clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc); + pdn->m64_map[j][i] = IODA_INVALID_M64; } + kfree(pdn->m64_map); return 0; } @@ -1229,8 +1236,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) int total_vfs; resource_size_t size, start; int pe_num; - int vf_groups; - int vf_per_group; + int m64_bars; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1238,26 +1244,26 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) pdn = pci_get_pdn(pdev); total_vfs = pci_sriov_get_totalvfs(pdev); - /* Initialize the m64_wins to IODA_INVALID_M64 */ - for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) - for (j = 0; j < M64_PER_IOV; j++) - pdn->m64_wins[i][j] = IODA_INVALID_M64; + if (pdn->m64_single_mode) + m64_bars = num_vfs; + else + m64_bars = 1; + + pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); + if (!pdn->m64_map) + return -ENOMEM; + /* Initialize the m64_map to IODA_INVALID_M64 */ + for (i = 0; i < m64_bars ; i++) + for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) + pdn->m64_map[i][j] = IODA_INVALID_M64; - if (pdn->m64_per_iov == M64_PER_IOV) { - vf_groups = (num_vfs <= M64_PER_IOV) ? num_vfs: M64_PER_IOV; - vf_per_group = (num_vfs <= M64_PER_IOV)? 1: - roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - } else { - vf_groups = 1; - vf_per_group = 1; - } for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; if (!res->flags || !res->parent) continue; - for (j = 0; j < vf_groups; j++) { + for (j = 0; j < m64_bars; j++) { do { win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, phb->ioda.m64_bar_idx + 1, 0); @@ -1266,12 +1272,11 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); - pdn->m64_wins[i][j] = win; + pdn->m64_map[j][i] = win; - if (pdn->m64_per_iov == M64_PER_IOV) { + if (pdn->m64_single_mode) { size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i); - size = size * vf_per_group; start = res->start + size * j; } else { size = resource_size(res); @@ -1279,16 +1284,16 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) } /* Map the M64 here */ - if (pdn->m64_per_iov == M64_PER_IOV) { + if (pdn->m64_single_mode) { pe_num = pdn->offset + j; rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe_num, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], 0); + pdn->m64_map[j][i], 0); } rc = opal_pci_set_phb_mem_window(phb->opal_id, OPAL_M64_WINDOW_TYPE, - pdn->m64_wins[i][j], + pdn->m64_map[j][i], start, 0, /* unused */ size); @@ -1300,12 +1305,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) goto m64_failed; } - if (pdn->m64_per_iov == M64_PER_IOV) + if (pdn->m64_single_mode) rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 2); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2); else rc = opal_pci_phb_mmio_enable(phb->opal_id, - OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i][j], 1); + OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1); if (rc != OPAL_SUCCESS) { dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", @@ -1317,7 +1322,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); return -EBUSY; } @@ -1344,15 +1349,13 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); } -static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) +static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) { struct pci_bus *bus; struct pci_controller *hose; struct pnv_phb *phb; struct pnv_ioda_pe *pe, *pe_n; struct pci_dn *pdn; - u16 vf_index; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1362,35 +1365,6 @@ static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs) if (!pdev->is_physfn) return; - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++){ - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_REMOVE_PE_FROM_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to unlink same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { if (pe->parent_dev != pdev) continue; @@ -1425,14 +1399,14 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) num_vfs = pdn->num_vfs; /* Release VF PEs */ - pnv_ioda_release_vf_PE(pdev, num_vfs); + pnv_ioda_release_vf_PE(pdev); if (phb->type == PNV_PHB_IODA2) { - if (pdn->m64_per_iov == 1) + if (!pdn->m64_single_mode) pnv_pci_vf_resource_shift(pdev, -pdn->offset); /* Release M64 windows */ - pnv_pci_vf_release_m64(pdev); + pnv_pci_vf_release_m64(pdev, num_vfs); /* Release PE numbers */ bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); @@ -1451,7 +1425,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) int pe_num; u16 vf_index; struct pci_dn *pdn; - int64_t rc; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1496,37 +1469,6 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pnv_pci_ioda2_setup_dma_pe(phb, pe); } - - if (pdn->m64_per_iov == M64_PER_IOV && num_vfs > M64_PER_IOV) { - int vf_group; - int vf_per_group; - int vf_index1; - - vf_per_group = roundup_pow_of_two(num_vfs) / pdn->m64_per_iov; - - for (vf_group = 0; vf_group < M64_PER_IOV; vf_group++) { - for (vf_index = vf_group * vf_per_group; - vf_index < (vf_group + 1) * vf_per_group && - vf_index < num_vfs; - vf_index++) { - for (vf_index1 = vf_group * vf_per_group; - vf_index1 < (vf_group + 1) * vf_per_group && - vf_index1 < num_vfs; - vf_index1++) { - - rc = opal_pci_set_peltv(phb->opal_id, - pdn->offset + vf_index, - pdn->offset + vf_index1, - OPAL_ADD_PE_TO_DOMAIN); - - if (rc) - dev_warn(&pdev->dev, "%s: Failed to link same group PE#%d(%lld)\n", - __func__, - pdn->offset + vf_index1, rc); - } - } - } - } } int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) @@ -1549,6 +1491,15 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return -ENOSPC; } + /* + * When M64 BARs functions in Single PE mode, the number of VFs + * could be enabled must be less than the number of M64 BARs. + */ + if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { + dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); + return -EBUSY; + } + /* Calculate available PE for required VFs */ mutex_lock(&phb->ioda.pe_alloc_mutex); pdn->offset = bitmap_find_next_zero_area( @@ -1576,7 +1527,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) * the IOV BAR according to the PE# allocated to the VFs. * Otherwise, the PE# for the VF will conflict with others. */ - if (pdn->m64_per_iov == 1) { + if (!pdn->m64_single_mode) { ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); if (ret) goto m64_failed; @@ -1609,8 +1560,7 @@ int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Allocate PCI data */ add_dev_pci_data(pdev); - pnv_pci_sriov_enable(pdev, num_vfs); - return 0; + return pnv_pci_sriov_enable(pdev, num_vfs); } #endif /* CONFIG_PCI_IOV */ @@ -2864,9 +2814,9 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; + pdn->m64_single_mode = false; total_vfs = pci_sriov_get_totalvfs(pdev); - pdn->m64_per_iov = 1; mul = phb->ioda.total_pe; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { @@ -2886,8 +2836,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) if (size > (1 << 26)) { dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n", i, res); - pdn->m64_per_iov = M64_PER_IOV; mul = roundup_pow_of_two(total_vfs); + pdn->m64_single_mode = true; break; } } @@ -2897,8 +2847,14 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) if (!res->flags || res->parent) continue; - dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + /* + * On PHB3, the minimum size alignment of M64 BAR in single + * mode is 32MB. + */ + if (pdn->m64_single_mode && (size < SZ_32M)) + goto truncate_iov; + dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); res->end = res->start + size * mul - 1; dev_dbg(&pdev->dev, " %pR\n", res); dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", @@ -3128,6 +3084,8 @@ static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus, static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno) { + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; struct pci_dn *pdn = pci_get_pdn(pdev); resource_size_t align; @@ -3136,12 +3094,23 @@ static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, * SR-IOV. While from hardware perspective, the range mapped by M64 * BAR should be size aligned. * + * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra + * powernv-specific hardware restriction is gone. But if just use the + * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with + * in one segment of M64 #15, which introduces the PE conflict between + * PF and VF. Based on this, the minimum alignment of an IOV BAR is + * m64_segsize. + * * This function returns the total IOV BAR size if M64 BAR is in * Shared PE mode or just VF BAR size if not. + * If the M64 BAR is in Single PE mode, return the VF BAR size or + * M64 segment size if IOV BAR size is less. */ align = pci_iov_resource_size(pdev, resno); if (!pdn->vfs_expanded) return align; + if (pdn->m64_single_mode) + return max(align, (resource_size_t)phb->ioda.m64_segsize); return pdn->vfs_expanded * align; } -- cgit v1.2.3 From f2dd0afeea0ed0e740c4b066c76a556a8b870e58 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:17 +0800 Subject: powerpc/powernv: replace the hard coded boundary with gate At the moment 64bit-prefetchable window can be maximum 64GB, which is currently got from device tree. This means that in shared mode the maximum supported VF BAR size is 64GB/256=256MB. While this size could exhaust the whole 64bit-prefetchable window. This is a design decision to set a boundary to 64MB of the VF BAR size. Since VF BAR size with 64MB would occupy a quarter of the 64bit-prefetchable window, this is affordable. This patch replaces magic limit of 64MB with "gate", which is 1/4 of the M64 Segment Size(m64_segsize >> 2) and adds comment to explain the reason for it. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4004c0a842ca..b8c01fc1a6c1 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2798,8 +2798,9 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { } #ifdef CONFIG_PCI_IOV static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) { - struct pci_controller *hose; - struct pnv_phb *phb; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct pnv_phb *phb = hose->private_data; + const resource_size_t gate = phb->ioda.m64_segsize >> 2; struct resource *res; int i; resource_size_t size; @@ -2809,9 +2810,6 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) if (!pdev->is_physfn || pdev->is_added) return; - hose = pci_bus_to_host(pdev->bus); - phb = hose->private_data; - pdn = pci_get_pdn(pdev); pdn->vfs_expanded = 0; pdn->m64_single_mode = false; @@ -2832,10 +2830,22 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); - /* bigger than 64M */ - if (size > (1 << 26)) { - dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size is bigger than 64M, roundup power2\n", - i, res); + /* + * If bigger than quarter of M64 segment size, just round up + * power of two. + * + * Generally, one M64 BAR maps one IOV BAR. To avoid conflict + * with other devices, IOV BAR size is expanded to be + * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 + * segment size , the expanded size would equal to half of the + * whole M64 space size, which will exhaust the M64 Space and + * limit the system flexibility. This is a design decision to + * set the boundary to quarter of the M64 segment size. + */ + if (size > gate) { + dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size " + "is bigger than %lld, roundup power2\n", + i, res, gate); mul = roundup_pow_of_two(total_vfs); pdn->m64_single_mode = true; break; -- cgit v1.2.3 From dfcc8d45c33baa670f20fe4860adb3ffde39cecf Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:18 +0800 Subject: powerpc/powernv: boundary the total VF BAR size instead of the individual one Each VF could have 6 BARs at most. When the total BAR size exceeds the gate, after expanding it will also exhaust the M64 Window. This patch limits the boundary by checking the total VF BAR size instead of the individual BAR. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b8c01fc1a6c1..0c7e6ba80b07 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -2803,7 +2803,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) const resource_size_t gate = phb->ioda.m64_segsize >> 2; struct resource *res; int i; - resource_size_t size; + resource_size_t size, total_vf_bar_sz; struct pci_dn *pdn; int mul, total_vfs; @@ -2816,6 +2816,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) total_vfs = pci_sriov_get_totalvfs(pdev); mul = phb->ioda.total_pe; + total_vf_bar_sz = 0; for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { res = &pdev->resource[i + PCI_IOV_RESOURCES]; @@ -2828,7 +2829,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) goto truncate_iov; } - size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); + total_vf_bar_sz += pci_iov_resource_size(pdev, + i + PCI_IOV_RESOURCES); /* * If bigger than quarter of M64 segment size, just round up @@ -2842,11 +2844,11 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) * limit the system flexibility. This is a design decision to * set the boundary to quarter of the M64 segment size. */ - if (size > gate) { - dev_info(&pdev->dev, "PowerNV: VF BAR%d: %pR IOV size " - "is bigger than %lld, roundup power2\n", - i, res, gate); + if (total_vf_bar_sz > gate) { mul = roundup_pow_of_two(total_vfs); + dev_info(&pdev->dev, + "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", + total_vf_bar_sz, gate, mul); pdn->m64_single_mode = true; break; } -- cgit v1.2.3 From be283eeb7f6d9165b3c50f5222123ac25cf0d417 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 22 Oct 2015 09:22:19 +0800 Subject: powerpc/powernv: allocate sparse PE# when using M64 BAR in Single PE mode When M64 BAR is set to Single PE mode, the PE# assigned to VF could be sparse. This patch restructures the code to allocate sparse PE# for VFs when M64 BAR is set to Single PE mode. Also it rename the offset to pe_num_map to reflect the content is the PE number. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Acked-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pci-bridge.h | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 81 +++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 11d3543a57f2..b0b43f5fbc5f 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -215,7 +215,7 @@ struct pci_dn { #ifdef CONFIG_PCI_IOV u16 vfs_expanded; /* number of VFs IOV BAR expanded */ u16 num_vfs; /* number of VFs enabled*/ - int offset; /* PE# for the first VF PE */ + int *pe_num_map; /* PE# for the first VF PE or array */ bool m64_single_mode; /* Use M64 BAR in Single Mode */ #define IODA_INVALID_M64 (-1) int (*m64_map)[PCI_SRIOV_NUM_BARS]; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 0c7e6ba80b07..dc868586315d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1285,7 +1285,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) /* Map the M64 here */ if (pdn->m64_single_mode) { - pe_num = pdn->offset + j; + pe_num = pdn->pe_num_map[j]; rc = opal_pci_map_pe_mmio_window(phb->opal_id, pe_num, OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0); @@ -1389,7 +1389,7 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) struct pnv_phb *phb; struct pci_dn *pdn; struct pci_sriov *iov; - u16 num_vfs; + u16 num_vfs, i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1403,14 +1403,21 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev) if (phb->type == PNV_PHB_IODA2) { if (!pdn->m64_single_mode) - pnv_pci_vf_resource_shift(pdev, -pdn->offset); + pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map); /* Release M64 windows */ pnv_pci_vf_release_m64(pdev, num_vfs); /* Release PE numbers */ - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if (pdn->pe_num_map[i] != IODA_INVALID_PE) + pnv_ioda_free_pe(phb, pdn->pe_num_map[i]); + } + } else + bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); + /* Releasing pe_num_map */ + kfree(pdn->pe_num_map); } } @@ -1436,7 +1443,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { - pe_num = pdn->offset + vf_index; + if (pdn->m64_single_mode) + pe_num = pdn->pe_num_map[vf_index]; + else + pe_num = *pdn->pe_num_map + vf_index; pe = &phb->ioda.pe_array[pe_num]; pe->pe_number = pe_num; @@ -1478,6 +1488,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) struct pnv_phb *phb; struct pci_dn *pdn; int ret; + u16 i; bus = pdev->bus; hose = pci_bus_to_host(bus); @@ -1500,20 +1511,44 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return -EBUSY; } + /* Allocating pe_num_map */ + if (pdn->m64_single_mode) + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, + GFP_KERNEL); + else + pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); + + if (!pdn->pe_num_map) + return -ENOMEM; + + if (pdn->m64_single_mode) + for (i = 0; i < num_vfs; i++) + pdn->pe_num_map[i] = IODA_INVALID_PE; + /* Calculate available PE for required VFs */ - mutex_lock(&phb->ioda.pe_alloc_mutex); - pdn->offset = bitmap_find_next_zero_area( - phb->ioda.pe_alloc, phb->ioda.total_pe, - 0, num_vfs, 0); - if (pdn->offset >= phb->ioda.total_pe) { + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + pdn->pe_num_map[i] = pnv_ioda_alloc_pe(phb); + if (pdn->pe_num_map[i] == IODA_INVALID_PE) { + ret = -EBUSY; + goto m64_failed; + } + } + } else { + mutex_lock(&phb->ioda.pe_alloc_mutex); + *pdn->pe_num_map = bitmap_find_next_zero_area( + phb->ioda.pe_alloc, phb->ioda.total_pe, + 0, num_vfs, 0); + if (*pdn->pe_num_map >= phb->ioda.total_pe) { + mutex_unlock(&phb->ioda.pe_alloc_mutex); + dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); + kfree(pdn->pe_num_map); + return -EBUSY; + } + bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs); mutex_unlock(&phb->ioda.pe_alloc_mutex); - dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); - pdn->offset = 0; - return -EBUSY; } - bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs); pdn->num_vfs = num_vfs; - mutex_unlock(&phb->ioda.pe_alloc_mutex); /* Assign M64 window accordingly */ ret = pnv_pci_vf_assign_m64(pdev, num_vfs); @@ -1528,7 +1563,7 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) * Otherwise, the PE# for the VF will conflict with others. */ if (!pdn->m64_single_mode) { - ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); + ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map); if (ret) goto m64_failed; } @@ -1540,8 +1575,16 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) return 0; m64_failed: - bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); - pdn->offset = 0; + if (pdn->m64_single_mode) { + for (i = 0; i < num_vfs; i++) { + if