From 9859f0c7e05b424a7a42032e639a8c44dd537328 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 14 Dec 2016 10:09:45 +0530 Subject: powerpc/mm: Remove the debug hugepd_ok check We don't do this for other page table entries. So lets keep this simple and always return false for hugepd check on a 64K page size config. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable-64k.h | 5 ----- arch/powerpc/mm/hugetlbpage-hash64.c | 21 --------------------- 2 files changed, 26 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index 0d2845b44763..198aff33c380 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -35,10 +35,6 @@ static inline int pgd_huge(pgd_t pgd) } #define pgd_huge pgd_huge -#ifdef CONFIG_DEBUG_VM -extern int hugepd_ok(hugepd_t hpd); -#define is_hugepd(hpd) (hugepd_ok(hpd)) -#else /* * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't * need to setup hugepage directory for them. Our pte and page directory format @@ -49,7 +45,6 @@ static inline int hugepd_ok(hugepd_t hpd) return 0; } #define is_hugepd(pdep) 0 -#endif /* CONFIG_DEBUG_VM */ #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 37b5f91e381b..a84bb44497f9 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -116,24 +116,3 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } - -#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM) -/* - * This enables us to catch the wrong page directory format - * Moved here so that we can use WARN() in the call. - */ -int hugepd_ok(hugepd_t hpd) -{ - bool is_hugepd; - unsigned long hpdval; - - hpdval = hpd_val(hpd); - - /* - * We should not find this format in page directory, warn otherwise. - */ - is_hugepd = (((hpdval & 0x3) == 0x0) && ((hpdval & HUGEPD_SHIFT_MASK) != 0)); - WARN(is_hugepd, "Found wrong page directory format\n"); - return 0; -} -#endif -- cgit v1.2.3 From 8ad43336b5c1ad9ac945148cb5e26a1200ccd45c Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 4 Jan 2017 08:19:12 +0530 Subject: powerpc/mm/4k: don't allocate larger pmd page table for 4k We now support THP with both 64k and 4K page size configuration for radix. (hash only support THP with 64K page size). Hence we will have CONFIG_TRANSPARENT_HUGEPAGE enabled for both PPC_64K and PPC_4K config. Since we only need large pmd page table with hash configuration (to store the slot information in the second half of the table) restrict the large pmd page table to THP and 64K configs. Signed-off-by: Aneesh Kumar K.V Reviewed-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 4c935f7504f7..f7b721bbf918 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -33,9 +33,9 @@ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) #define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_PPC_64K_PAGES) /* - * only with hash we need to use the second half of pmd page table + * only with hash 64k we need to use the second half of pmd page table * to store pointer to deposited pgtable_t */ #define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) -- cgit v1.2.3 From 14a41d6b7572026cf8fc88ee72e81b6b40db2ec0 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 13 Jan 2017 14:23:49 +1030 Subject: powerpc/powernv: Report size of OPAL memcons log The OPAL memory console is reported to be size zero, as we do not initialise the struct attr with any size information due to the size being variable. This leads users to think that the console is empty. Instead report the maximum size. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-msglog.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c index 39d6ff9e5630..7a9cde0cfbd1 100644 --- a/arch/powerpc/platforms/powernv/opal-msglog.c +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -123,6 +123,10 @@ void __init opal_msglog_init(void) return; } + /* Report maximum size */ + opal_msglog_attr.size = be32_to_cpu(mc->ibuf_size) + + be32_to_cpu(mc->obuf_size); + opal_memcons = mc; } -- cgit v1.2.3 From b492f7e4e07a28e706db26cf4943bb0911435426 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 3 Nov 2016 16:10:55 +1100 Subject: powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold These functions compute an IP checksum by computing a 64-bit sum and folding it to 32 bits (the "nofold" in their names refers to folding down to 16 bits). However, doing (u32) (s + (s >> 32)) is not sufficient to fold a 64-bit sum to 32 bits correctly. The addition can produce a carry out from bit 31, which needs to be added in to the sum to produce the correct result. To fix this, we copy the from64to32() function from lib/checksum.c and use that. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/checksum.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1e8fceb308a5..5b1a6e39afa7 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,17 +53,25 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { #ifdef __powerpc64__ - unsigned long s = (__force u32)sum; + u64 s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; s += proto + len; - s += (s >> 32); - return (__force __wsum) s; + return (__force __wsum) from64to32(s); #else __asm__("\n\ addc %0,%0,%1 \n\ @@ -123,8 +131,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) for (i = 0; i < ihl - 1; i++, ptr++) s += *ptr; - s += (s >> 32); - return (__force __wsum)s; + return (__force __wsum)from64to32(s); #else __wsum sum, tmp; -- cgit v1.2.3 From d4fde568a34a93897dfb9ae64cfe9dda9d5c908c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 3 Nov 2016 16:15:42 +1100 Subject: powerpc/64: Use optimized checksum routines on little-endian Currently we have optimized hand-coded assembly checksum routines for big-endian 64-bit systems, but for little-endian we use the generic C routines. This modifies the optimized routines to work for little-endian. With this, we no longer need to enable CONFIG_GENERIC_CSUM. This also fixes a couple of comments in checksum_64.S so they accurately reflect what the associated instruction does. Signed-off-by: Paul Mackerras [mpe: Use the more common __BIG_ENDIAN__] Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/checksum.h | 4 ++++ arch/powerpc/lib/Makefile | 2 -- arch/powerpc/lib/checksum_64.S | 12 ++++++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a8ee573fe610..e022859340b7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -167,7 +167,7 @@ config PPC select HAVE_CC_STACKPROTECTOR config GENERIC_CSUM - def_bool CPU_LITTLE_ENDIAN + def_bool n config EARLY_PRINTK bool diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 5b1a6e39afa7..4e63787dc3be 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, s += (__force u32)saddr; s += (__force u32)daddr; +#ifdef __BIG_ENDIAN__ s += proto + len; +#else + s += (proto + len) << 8; +#endif return (__force __wsum) from64to32(s); #else __asm__("\n\ diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 309361e86523..0e649d72fe8d 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o -ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(BITS).o checksum_wrappers.o -endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index d0d311e108ff..d7f1a966136e 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -36,7 +36,7 @@ _GLOBAL(__csum_partial) * work to calculate the correct checksum, we ignore that case * and take the potential slowdown of unaligned loads. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcsum_aligned li r7,4 @@ -168,8 +168,12 @@ _GLOBAL(__csum_partial) beq .Lcsum_finish lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif .Lcsum_finish: addze r0,r0 /* add in final carry */ @@ -224,7 +228,7 @@ _GLOBAL(csum_partial_copy_generic) * If the source and destination are relatively unaligned we only * align the source. This keeps things simple. */ - rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */ + rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */ beq .Lcopy_aligned li r9,4 @@ -386,8 +390,12 @@ dstnr; sth r6,0(r4) beq .Lcopy_finish srcnr; lbz r6,0(r3) +#ifdef __BIG_ENDIAN__ sldi r9,r6,8 /* Pad the byte out to 16 bits */ adde r0,r0,r9 +#else + adde r0,r0,r6 +#endif dstnr; stb r6,0(r4) .Lcopy_finish: -- cgit v1.2.3 From 4ab2537c4204b976e4ca350bbdc193b4649cad28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 8 Dec 2016 09:12:13 +0530 Subject: powerpc/mm: Fixup wrong LPCR_VRMASD value In commit a4b349540a26af ("powerpc/mm: Cleanup LPCR defines") we updated LPCR_VRMASD wrongly as below. -#define LPCR_VRMASD (0x1ful << (63-16)) +#define LPCR_VRMASD_SH 47 +#define LPCR_VRMASD (ASM_CONST(1) << LPCR_VRMASD_SH) We initialize the VRMA bits in LPCR to 0x00 in kvm. Hence using a different mask value as above while updating lpcr should not have any impact. This patch updates it to the correct value. Fixes: a4b349540a26 ("powerpc/mm: Cleanup LPCR defines") Reported-by: Ram Pai Signed-off-by: Aneesh Kumar K.V Signed-off-by: Jia He Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 0d4531aa2052..818c4e878e60 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -338,7 +338,7 @@ #define LPCR_DPFD_SH 52 #define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH) #define LPCR_VRMASD_SH 47 -#define LPCR_VRMASD (ASM_CONST(1) << LPCR_VRMASD_SH) +#define LPCR_VRMASD (ASM_CONST(0x1f) << LPCR_VRMASD_SH) #define LPCR_VRMA_L ASM_CONST(0x0008000000000000) #define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000) #define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000) -- cgit v1.2.3 From fb37e12896c1ba0407012fe8cdc0b054da063b6f Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 24 Aug 2016 22:26:37 +0200 Subject: powerpc/powernv/pci: Use kmalloc_array() in two functions Use kmalloc_array(), which checks for overflow of the multiplication, rather than doing it by hand. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index b07680cd2518..939de0c2e00f 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1326,7 +1326,9 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) else m64_bars = 1; - pdn->m64_map = kmalloc(sizeof(*pdn->m64_map) * m64_bars, GFP_KERNEL); + pdn->m64_map = kmalloc_array(m64_bars, + sizeof(*pdn->m64_map), + GFP_KERNEL); if (!pdn->m64_map) return -ENOMEM; /* Initialize the m64_map to IODA_INVALID_M64 */ @@ -1593,8 +1595,9 @@ int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) /* Allocating pe_num_map */ if (pdn->m64_single_mode) - pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map) * num_vfs, - GFP_KERNEL); + pdn->pe_num_map = kmalloc_array(num_vfs, + sizeof(*pdn->pe_num_map), + GFP_KERNEL); else pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL); -- cgit v1.2.3 From dbecd5093043faa9da83c720ed0e08ec1a5b410e Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 24 Jan 2017 09:49:52 +1100 Subject: powerpc/kernel: Remove nested if statements in rtas_initialize() This removes the unnecessary nested if statements in function rtas_initialize(), to simplify the code. No functional changes introduced. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 112cc3b2ee1a..9759dcbd055d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1145,31 +1145,30 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) void __init rtas_initialize(void) { unsigned long rtas_region = RTAS_INSTANTIATE_MAX; + const __be32 *basep, *entryp, *sizep; /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. */ rtas.dev = of_find_node_by_name(NULL, "rtas"); - if (rtas.dev) { - const __be32 *basep, *entryp, *sizep; - - basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); - sizep = of_get_property(rtas.dev, "rtas-size", NULL); - if (basep != NULL && sizep != NULL) { - rtas.base = __be32_to_cpu(*basep); - rtas.size = __be32_to_cpu(*sizep); - entryp = of_get_property(rtas.dev, - "linux,rtas-entry", NULL); - if (entryp == NULL) /* Ugh */ - rtas.entry = rtas.base; - else - rtas.entry = __be32_to_cpu(*entryp); - } else - rtas.dev = NULL; - } if (!rtas.dev) return; + basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); + sizep = of_get_property(rtas.dev, "rtas-size", NULL); + if (basep == NULL || sizep == NULL) { + rtas.dev = NULL; + return; + } + + rtas.base = __be32_to_cpu(*basep); + rtas.size = __be32_to_cpu(*sizep); + entryp = of_get_property(rtas.dev, "linux,rtas-entry", NULL); + if (entryp == NULL) /* Ugh */ + rtas.entry = rtas.base; + else + rtas.entry = __be32_to_cpu(*entryp); + /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any */ -- cgit v1.2.3 From de6d2d1b7bf2b3a8d5e57ebffad9f2688fe00a7a Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 24 Jan 2017 09:49:53 +1100 Subject: powerpc/kernel: Use of_property_read_u32() in rtas_initialize() This uses of_property_read_u32() in rtas_initialize() so that we needn't explicitly care the CPU's endian. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 9759dcbd055d..ba5a4cc0e5b6 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1145,7 +1145,8 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) void __init rtas_initialize(void) { unsigned long rtas_region = RTAS_INSTANTIATE_MAX; - const __be32 *basep, *entryp, *sizep; + u32 base, size, entry; + int no_base, no_size, no_entry; /* Get RTAS dev node and fill up our "rtas" structure with infos * about it. @@ -1154,20 +1155,17 @@ void __init rtas_initialize(void) if (!rtas.dev) return; - basep = of_get_property(rtas.dev, "linux,rtas-base", NULL); - sizep = of_get_property(rtas.dev, "rtas-size", NULL); - if (basep == NULL || sizep == NULL) { + no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base); + no_size = of_property_read_u32(rtas.dev, "rtas-size", &size); + if (no_base || no_size) { rtas.dev = NULL; return; } - rtas.base = __be32_to_cpu(*basep); - rtas.size = __be32_to_cpu(*sizep); - entryp = of_get_property(rtas.dev, "linux,rtas-entry", NULL); - if (entryp == NULL) /* Ugh */ - rtas.entry = rtas.base; - else - rtas.entry = __be32_to_cpu(*entryp); + rtas.base = base; + rtas.size = size; + no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry); + rtas.entry = no_entry ? rtas.base : entry; /* If RTAS was found, allocate the RMO buffer for it and look for * the stop-self token if any -- cgit v1.2.3 From 8b2577832100706124fd6fe09f887992c8d7c0c6 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 24 Jan 2017 09:49:54 +1100 Subject: powerpc/kernel: Fix unbalanced refcount on RTAS device node The RTAS device-tree node's refcount has been increased by one in the function call of_find_node_by_name(), but it's missed to be decreased by one in the error path. It leads to unbalanced refcount on RTAS device-tree node. This fixes above issue by decreasing RTAS device-tree node's refcount in error path. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/rtas.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index ba5a4cc0e5b6..b8a4987f58cf 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1158,6 +1158,7 @@ void __init rtas_initialize(void) no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base); no_size = of_property_read_u32(rtas.dev, "rtas-size", &size); if (no_base || no_size) { + of_node_put(rtas.dev); rtas.dev = NULL; return; } -- cgit v1.2.3 From 3c4b66a6d0d2b9f2418f9a09d528e42e2dc18acf Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 21 Jan 2017 15:30:15 +0100 Subject: powerpc/sstep: Return directly after a failed address_ok() in emulate_step() Setting err and going to ldst_done just returns 0, without using err, so just return 0 directly. We already do that for other call sites in this function. Signed-off-by: Markus Elfring [mpe: Rewrite change log] Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 06c7e9b88408..846dba2c6360 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1803,9 +1803,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ - err = -EFAULT; if (!address_ok(regs, op.ea, size)) - goto ldst_done; + return 0; err = 0; switch (size) { case 4: @@ -1828,9 +1827,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ - err = -EFAULT; if (!address_ok(regs, op.ea, size)) - goto ldst_done; + return 0; err = 0; switch (size) { case 4: -- cgit v1.2.3 From a967f161abc7c4a6936ceb15737f75c52bfd07f2 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 21 Jan 2017 16:10:50 +0100 Subject: powerpc/mm: Return directly after a failed __copy_from_user() in sys_subpage_prot() This function already has multiple exit points, so there's no harm adding another. Although it looks odd to return directly in a function which takes a lock, we've actually just dropped the mmap_sem in this code, so there's really no reason to go via a label. And it means we can drop the unhelpfully named out2 label. Signed-off-by: Markus Elfring [mpe: Rewrite change log] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/subpage-prot.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 5c096c01e8bd..94210940112f 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -248,9 +248,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) nw = (next - addr) >> PAGE_SHIFT; up_write(&mm->mmap_sem); - err = -EFAULT; if (__copy_from_user(spp, map, nw * sizeof(u32))) - goto out2; + return -EFAULT; map += nw; down_write(&mm->mmap_sem); @@ -262,6 +261,5 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) err = 0; out: up_write(&mm->mmap_sem); - out2: return err; } -- cgit v1.2.3 From 052de33ca4f840bf35587eacdf78b3bf8d347bb8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Jan 2017 22:40:00 +0530 Subject: powerpc/bpf: Remove redundant check for non-null image We have a check earlier to ensure we don't proceed if image is NULL. As such, the redundant check can be removed. Signed-off-by: Daniel Borkmann [Added similar changes for classic BPF JIT] Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Signed-off-by: Michael Ellerman --- arch/powerpc/net/bpf_jit_comp.c | 17 +++++++++-------- arch/powerpc/net/bpf_jit_comp64.c | 16 ++++++++-------- 2 files changed, 17 insertions(+), 16 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 7e706f36e364..f9941b3b5770 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -662,16 +662,17 @@ void bpf_jit_compile(struct bpf_prog *fp) */ bpf_jit_dump(flen, proglen, pass, code_base); - if (image) { - bpf_flush_icache(code_base, code_base + (proglen/4)); + bpf_flush_icache(code_base, code_base + (proglen/4)); + #ifdef CONFIG_PPC64 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; #endif - fp->bpf_func = (void *)image; - fp->jited = 1; - } + + fp->bpf_func = (void *)image; + fp->jited = 1; + out: kfree(addrs); return; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 73a5cf18fd84..935a10f77cfe 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1046,16 +1046,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ bpf_jit_dump(flen, proglen, pass, code_base); - if (image) { - bpf_flush_icache(bpf_hdr, image + alloclen); + bpf_flush_icache(bpf_hdr, image + alloclen); + #ifdef PPC64_ELF_ABI_v1 - /* Function descriptor nastiness: Address + TOC */ - ((u64 *)image)[0] = (u64)code_base; - ((u64 *)image)[1] = local_paca->kernel_toc; + /* Function descriptor nastiness: Address + TOC */ + ((u64 *)image)[0] = (u64)code_base; + ((u64 *)image)[1] = local_paca->kernel_toc; #endif - fp->bpf_func = (void *)image; - fp->jited = 1; - } + + fp->bpf_func = (void *)image; + fp->jited = 1; out: kfree(addrs); -- cgit v1.2.3 From 10528b9c45cfb9e8f45217ef2f5ef8b876bbd3f5 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 13 Jan 2017 22:40:01 +0530 Subject: powerpc/bpf: Flush the entire JIT buffer With bpf_jit_binary_alloc(), we allocate at a page granularity and fill the rest of the space with illegal instructions to mitigate BPF spraying attacks, while having the actual JIT'ed BPF program at a random location within the allocated space. Under this scenario, it would be better to flush the entire allocated buffer rather than just the part containing the actual program. We already flush the buffer from start to the end of the BPF program. Extend this to include the illegal instructions after the BPF program. Signed-off-by: Naveen N. Rao Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Michael Ellerman --- arch/powerpc/net/bpf_jit_comp64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 935a10f77cfe..d4ed7a0872b1 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1046,8 +1046,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) */ bpf_jit_dump(flen, proglen, pass, code_base); - bpf_flush_icache(bpf_hdr, image + alloclen); - #ifdef PPC64_ELF_ABI_v1 /* Function descriptor nastiness: Address + TOC */ ((u64 *)image)[0] = (u64)code_base; @@ -1057,6 +1055,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp->bpf_func = (void *)image; fp->jited = 1; + bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + out: kfree(addrs); -- cgit v1.2.3 From d3918e7fd4a27564f93ec46d0359a9739c5deb8d Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 22 Dec 2016 04:29:25 +1000 Subject: KVM: PPC: Book3S: Change interrupt call to reduce scratch space use on HV Change the calling convention to put the trap number together with CR in two halves of r12, which frees up HSTATE_SCRATCH2 in the HV handler. The 64-bit PR handler entry translates the calling convention back to match the previous call convention (i.e., shared with 32-bit), for simplicity. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 24 +++++++++++------------- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 16 +++++++++------- arch/powerpc/kvm/book3s_segment.S | 25 ++++++++++++++++++------- 3 files changed, 38 insertions(+), 27 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9a3eee661297..a02a268bde6b 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -233,7 +233,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif -#define __KVM_HANDLER_PROLOG(area, n) \ +#define __KVM_HANDLER(area, h, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ std r10,HSTATE_CFAR(r13); \ @@ -243,30 +243,28 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,HSTATE_PPR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ ld r10,area+EX_R10(r13); \ - stw r9,HSTATE_SCRATCH1(r13); \ - ld r9,area+EX_R9(r13); \ std r12,HSTATE_SCRATCH0(r13); \ - -#define __KVM_HANDLER(area, h, n) \ - __KVM_HANDLER_PROLOG(area, n) \ - li r12,n; \ + sldi r12,r9,32; \ + ori r12,r12,(n); \ + ld r9,area+EX_R9(r13); \ b kvmppc_interrupt #define __KVM_HANDLER_SKIP(area, h, n) \ cmpwi r10,KVM_GUEST_MODE_SKIP; \ - ld r10,area+EX_R10(r13); \ beq 89f; \ - stw r9,HSTATE_SCRATCH1(r13); \ BEGIN_FTR_SECTION_NESTED(948) \ - ld r9,area+EX_PPR(r13); \ - std r9,HSTATE_PPR(r13); \ + ld r10,area+EX_PPR(r13); \ + std r10,HSTATE_PPR(r13); \ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \ - ld r9,area+EX_R9(r13); \ + ld r10,area+EX_R10(r13); \ std r12,HSTATE_SCRATCH0(r13); \ - li r12,n; \ + sldi r12,r9,32; \ + ori r12,r12,(n); \ + ld r9,area+EX_R9(r13); \ b kvmppc_interrupt; \ 89: mtocrf 0x80,r9; \ ld r9,area+EX_R9(r13); \ + ld r10,area+EX_R10(r13); \ b kvmppc_skip_##h##interrupt #ifdef CONFIG_KVM_BOOK3S_64_HANDLER diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 9338a818e05c..11882aac8216 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1057,19 +1057,18 @@ hdec_soon: kvmppc_interrupt_hv: /* * Register contents: - * R12 = interrupt vector + * R12 = (guest CR << 32) | interrupt vector * R13 = PACA - * guest CR, R12 saved in shadow VCPU SCRATCH1/0 + * guest R12 saved in shadow VCPU SCRATCH0 * guest R13 saved in SPRN_SCRATCH0 */ - std r9, HSTATE_SCRATCH2(r13) - + std r9, HSTATE_SCRATCH1(r13) lbz r9, HSTATE_IN_GUEST(r13) cmpwi r9, KVM_GUEST_MODE_HOST_HV beq kvmppc_bad_host_intr #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE cmpwi r9, KVM_GUEST_MODE_GUEST - ld r9, HSTATE_SCRATCH2(r13) + ld r9, HSTATE_SCRATCH1(r13) beq kvmppc_interrupt_pr #endif /* We're now back in the host but in guest MMU context */ @@ -1089,13 +1088,14 @@ kvmppc_interrupt_hv: std r6, VCPU_GPR(R6)(r9) std r7, VCPU_GPR(R7)(r9) std r8, VCPU_GPR(R8)(r9) - ld r0, HSTATE_SCRATCH2(r13) + ld r0, HSTATE_SCRATCH1(r13) std r0, VCPU_GPR(R9)(r9) std r10, VCPU_GPR(R10)(r9) std r11, VCPU_GPR(R11)(r9) ld r3, HSTATE_SCRATCH0(r13) - lwz r4, HSTATE_SCRATCH1(r13) std r3, VCPU_GPR(R12)(r9) + /* CR is in the high half of r12 */ + srdi r4, r12, 32 stw r4, VCPU_CR(r9) BEGIN_FTR_SECTION ld r3, HSTATE_CFAR(r13) @@ -1114,6 +1114,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) mfspr r11, SPRN_SRR1 std r10, VCPU_SRR0(r9) std r11, VCPU_SRR1(r9) + /* trap is in the low half of r12, clear CR from the high half */ + clrldi r12, r12, 32 andi. r0, r12, 2 /* need to read HSRR0/1? */ beq 1f mfspr r10, SPRN_HSRR0 diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S index ca8f174289bb..68e45080cf93 100644 --- a/arch/powerpc/kvm/book3s_segment.S +++ b/arch/powerpc/kvm/book3s_segment.S @@ -167,20 +167,31 @@ kvmppc_handler_trampoline_enter_end: * * *****************************************************************************/ -.global kvmppc_handler_trampoline_exit -kvmppc_handler_trampoline_exit: - .global kvmppc_interrupt_pr kvmppc_interrupt_pr: + /* 64-bit entry. Register usage at this point: + * + * SPRG_SCRATCH0 = guest R13 + * R12 = (guest CR << 32) | exit handler id + * R13 = PACA + * HSTATE.SCRATCH0 = guest R12 + */ +#ifdef CONFIG_PPC64 + /* Match 32-bit entry */ + rotldi r12, r12, 32 /* Flip R12 halves for stw */ + stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */ + srdi r12, r12, 32 /* shift trap into low half */ +#endif +.global kvmppc_handler_trampoline_exit +kvmppc_handler_trampoline_exit: /* Register usage at this point: * - * SPRG_SCRATCH0 = guest R13 - * R12 = exit handler id - * R13 = shadow vcpu (32-bit) or PACA (64-bit) + * SPRG_SCRATCH0 = guest R13 + * R12 = exit handler id + * R13 = shadow vcpu (32-bit) or PACA (64-bit) * HSTATE.SCRATCH0 = guest R12 * HSTATE.SCRATCH1 = guest CR - * */ /* Save registers */ -- cgit v1.2.3 From 7ede531773ea69fa56b02a873ed83ce3507eb8d5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 22 Dec 2016 04:29:26 +1000 Subject: KVM: PPC: Book3S: Move 64-bit KVM interrupt handler out from alt section A subsequent patch to make KVM handlers relocation-safe makes them unusable from within alt section "else" cases (due to the way fixed addresses are taken from within fixed section head code). Stop open-coding the KVM handlers, and add them both as normal. A more optimal fix may be to allow some level of alternate feature patching in the exception macros themselves, but for now this will do. The TRAMP_KVM handlers must be moved to the "virt" fixed section area (name is arbitrary) in order to be closer to .text and avoid the dreaded "relocation truncated to fit" error. Signed-off-by: Nicholas Piggin Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/head-64.h | 2 +- arch/powerpc/kernel/exceptions-64s.S | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index fca7033839a9..9bd81619d090 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -218,7 +218,7 @@ name: #ifdef CONFIG_KVM_BOOK3S_64_HANDLER #define TRAMP_KVM_BEGIN(name) \ - TRAMP_REAL_BEGIN(name) + TRAMP_VIRT_BEGIN(name) #else #define TRAMP_KVM_BEGIN(name) #endif diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d39d6118c6e9..89b4f122aec6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -717,13 +717,9 @@ hardware_interrupt_hv: BEGIN_FTR_SECTION _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_HV, SOFTEN_TEST_HV) -do_kvm_H0x500: - KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502) FTR_SECTION_ELSE _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt_common, EXC_STD, SOFTEN_TEST_PR) -do_kvm_0x500: - KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500) ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x600) @@ -737,6 +733,8 @@ hardware_interrupt_relon_hv: ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_VIRT_END(hardware_interrupt, 0x4500, 0x4600) +TRAMP_KVM(PACA_EXGEN, 0x500) +TRAMP_KVM_HV(PACA_EXGEN, 0x500) EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ) -- cgit v1.2.3 From 79270e0a3fd124388a0407f9edbd6ace75eacb69 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 28 Jan 2017 21:18:40 +0530 Subject: powerpc/mm/hash: Properly mask the ESID bits when building proto VSID The proto VSID is built using both the MMU context id and effective segment ID (ESID). We should not have overlapping bits between those. That could result in us having a VSID collision. With the current code we missed masking the top bits of the ESID. This implies for kernel address we ended up using the top 4 bits of the ESID as part of the proto VSID, which is wrong. The current code use the top 4 context values (0x7fffc - 0x7ffff) for the kernel. With those context IDs used for the kernel, we don't run into VSID collisions because we get the same proto VSID irrespective of whether we mask the ESID bits or not. eg: ea = 0xf000000000000000 context = 0x7ffff w/out masking: proto_vsid = (0x7ffff << 6 | 0xf000000000000000 >> 40) = (0x1ffffc0 | 0xf00000) = 0x1ffffc0 with masking: proto_vsid = (0x7ffff << 6 | ((0xf000000000000000 >> 40) & 0x3f)) = (0x1ffffc0 | (0xf00000 & 0x3f)) = 0x1ffffc0 | 0) = 0x1ffffc0 So although there is no bug, the code is still overly subtle, so fix it to save ourselves pain in future. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2e6a823fa502..823015cff149 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -525,6 +525,9 @@ extern void slb_set_size(u16 size); #define ESID_BITS 18 #define ESID_BITS_1T 6 +#define ESID_BITS_MASK ((1 << ESID_BITS) - 1) +#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1) + /* * 256MB segment * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments @@ -660,9 +663,9 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, if (ssize == MMU_SEGSIZE_256M) return vsid_scramble((context << ESID_BITS) - | (ea >> SID_SHIFT), 256M); + | ((ea >> SID_SHIFT) & ESID_BITS_MASK), 256M); return vsid_scramble((context << ESID_BITS_1T) - | (ea >> SID_SHIFT_1T), 1T); + | ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK), 1T); } /* -- cgit v1.2.3 From f2a5e8f0023eba847ad2adb145b2f631934bb12b Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 24 Oct 2016 23:51:51 +0530 Subject: powerpc/fadump: Fix the race in crash_fadump(). There are chances that multiple CPUs can call crash_fadump() simultaneously and would start duplicating same info to vmcoreinfo ELF note section. This causes makedumpfile to fail during kdump capture. One example is, triggering dumprestart from HMC which sends system reset to all the CPUs at once. makedumpfile --dump-dmesg /proc/vmcore read_vmcoreinfo_basic_info: Invalid data in /tmp/vmcoreinfoyjgxlL: CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971 makedumpfile Failed. Running makedumpfile --dump-dmesg /proc/vmcore failed (1). makedumpfile -d 31 -l /proc/vmcore read_vmcoreinfo_basic_info: Invalid data in /tmp/vmcoreinfo1mmVdO: CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971CRASHTIME=1475605971 makedumpfile Failed. Running makedumpfile -d 31 -l /proc/vmcore failed (1). Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/fadump.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 8f0c7c5d93f2..8ff0dd4e77a7 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -406,12 +406,35 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) void crash_fadump(struct pt_regs *regs, const char *str) { struct fadump_crash_info_header *fdh = NULL; + int old_cpu, this_cpu; if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) return; + /* + * old_cpu == -1 means this is the first CPU which has come here, + * go ahead and trigger fadump. + * + * old_cpu != -1 means some other CPU has already on it's way + * to trigger fadump, just keep looping here. + */ + this_cpu = smp_processor_id(); + old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); + + if (old_cpu != -1) { + /* + * We can't loop here indefinitely. Wait as long as fadump + * is in force. If we race with fadump un-registration this + * loop will break and then we go down to normal panic path + * and reboot. If fadump is in force the first crashing + * cpu will definitely trigger fadump. + */ + while (fw_dump.dump_registered) + cpu_relax(); + return; + } + fdh = __va(fw_dump.fadumphdr_addr); - crashing_cpu = smp_processor_id(); fdh->crashing_cpu = crashing_cpu; crash_save_vmcoreinfo(); -- cgit v1.2.3 From 7656cd8e8e23ac4b059f4d96939cb73eb3121ae9 Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Thu, 13 Oct 2016 13:45:30 -0500 Subject: powerpc/mm: Simplify loop control in parse_numa_properties() The flow of the main loop in parse_numa_properties() is overly complicated. Simplify it to be less confusing and easier to read. No functional change. The end of the main loop in parse_numa_properties() looks like this: for_each_node_by_type(...) { ... if (!condition) { if (--ranges) goto new_range; else continue; } statement(); if (--ranges) goto new_range; /* else * continue; <- implicit, this is the end of the loop */ } The only effect of !condition is to skip execution of statement(). This can be rewritten in a simpler way: for_each_node_by_type(...) { ... if (condition) statement(); if (--ranges) goto new_range; } Signed-off-by: Reza Arbab Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b1099cb2f393..51fe1c5b6d71 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -786,14 +786,9 @@ new_range: fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid); node_set_online(nid); - if (!(size = numa_enforce_memory_limit(start, size))) { - if (--ranges) - goto new_range; - else - continue; - } - - memblock_set_node(start, size, &memblock.memory, nid); + size = numa_enforce_memory_limit(start, size); + if (size) + memblock_set_node(start, size, &memblock.memory, nid); if (--ranges) goto new_range; -- cgit v1.2.3 From 2a8628d41602dc9f988af051a657eef648eec5c0 Mon Sep 17 00:00:00 2001 From: Reza Arbab Date: Wed, 16 Nov 2016 10:45:03 -0600 Subject: powerpc/mm: Allow memory hotplug into an offline node Relax the check preventing us from hotplugging into an offline node. This limitation was added in commit 482ec7c403d2 ("[PATCH] powerpc numa: Support sparse online node map") to prevent adding resources to an uninitialized node. These days, there is no harm in doing so. The addition will actually cause the node to be initialized and onlined; add_memory_resource() calls hotadd_new_pgdat() (if necessary) and node_set_online(). Signed-off-by: Reza Arbab Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 51fe1c5b6d71..16267ff8c86c 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1093,7 +1093,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) nid = hot_add_node_scn_to_nid(scn_addr); } - if (nid < 0 || !node_online(nid)) + if (nid < 0 || !node_possible(nid)) nid = first_online_node; return nid; -- cgit v1.2.3 From 1d0761d2557d1540727723e4f05395d53321d555 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Wed, 14 Dec 2016 13:36:51 +1100 Subject: powerpc/powernv: Initialise nest mmu POWER9 contains an off core mmu called the nest mmu (NMMU). This is used by other hardware units on the chip to translate virtual addresses into real addresses. The unit attempting an address translation provides the majority of the context required for the translation request except for the base address of the partition table (ie. the PTCR) which needs to be programmed into the NMMU. This patch adds a call to OPAL to set the PTCR for the nest mmu in opal_init(). Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal-api.h | 3 ++- arch/powerpc/include/asm/opal.h | 1 + arch/powerpc/include/asm/powernv.h | 19 +++++++++++++++++++ arch/powerpc/mm/pgtable-radix.c | 2 ++ arch/powerpc/mm/pgtable_64.c | 6 +++++- arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 11 +++++++++++ 7 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/include/asm/powernv.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 0e2e57bcab50..a0aa285869b5 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -167,7 +167,8 @@ #define OPAL_INT_EOI 124 #define OPAL_INT_SET_MFRR 125 #define OPAL_PCI_TCE_KILL 126 -#define OPAL_LAST 126 +#define OPAL_NMMU_SET_PTCR 127 +#define OPAL_LAST 127 /* Device tree flags */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 5c7db0f1a708..08ddea966601 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -232,6 +232,7 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, int64_t opal_rm_pci_tce_kill(uint64_t phb_id, uint32_t kill_type, uint32_t pe_num, uint32_t tce_size, uint64_t dma_addr, uint32_t npages); +int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h new file mode 100644 index 000000000000..0e9c2402dd20 --- /dev/null +++ b/arch/powerpc/include/asm/powernv.h @@ -0,0 +1,19 @@ +/* + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ASM_POWERNV_H +#define _ASM_POWERNV_H + +#ifdef CONFIG_PPC_POWERNV +extern void powernv_set_nmmu_ptcr(unsigned long ptcr); +#else +static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { } +#endif + +#endif /* _ASM_POWERNV_H */ diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index cfa53ccc8baf..086522b7c60f 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -438,6 +439,7 @@ void radix__mmu_cleanup_all(void) lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT); mtspr(SPRN_PTCR, 0); + powernv_set_nmmu_ptcr(0); radix__flush_tlb_all(); } } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 8bca7f58afc4..4ee9c9d18760 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -436,6 +437,7 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) void __init mmu_partition_table_init(void) { unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + unsigned long ptcr; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -448,7 +450,9 @@ void __init mmu_partition_table_init(void) * update partition table control register, * 64 K size. */ - mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); + mtspr(SPRN_PTCR, ptcr); + powernv_set_nmmu_ptcr(ptcr); } void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3aa40f1b20f5..f7c19c9c57ed 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -311,4 +311,5 @@ OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI); OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR); OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL); +OPAL_CALL(opal_nmmu_set_ptcr, OPAL_NMMU_SET_PTCR); OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 282293572dc8..86d9fde93c17 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -875,6 +875,17 @@ int opal_error_code(int rc) } } +void powernv_set_nmmu_ptcr(unsigned long ptcr) +{ + int rc; + + if (firmware_has_feature(FW_FEATURE_OPAL)) { + rc = opal_nmmu_set_ptcr(-1UL, ptcr); + if (rc != OPAL_SUCCESS && rc != OPAL_UNSUPPORTED) + pr_warn("%s: Unable to set nest mmu ptcr\n", __func__); + } +} + EXPORT_SYMBOL_GPL(opal_poll_events); EXPORT_SYMBOL_GPL(opal_rtc_read); EXPORT_SYMBOL_GPL(opal_rtc_write); -- cgit v1.2.3 From 616badd2fb499320d3ac3b54462f55dededd0e0f Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 10 Jan 2017 15:41:44 +1100 Subject: powerpc/powernv: Use OPAL call for TCE kill on NVLink2 Add detection of NPU2 PHBs. NPU2/NVLink2 has a different register layout for the TCE kill register therefore TCE invalidation should be done via the OPAL call rather than using the register directly as it is for PHB3 and NVLink1. This changes TCE invalidation to use the OPAL call in the case of a NPU2 PHB model. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 9 ++++++++- arch/powerpc/platforms/powernv/pci.c | 7 +++++++ arch/powerpc/platforms/powernv/pci.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 939de0c2e00f..ddfa069c5e7e 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1953,7 +1953,12 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl, struct pnv_phb *phb = pe->phb; unsigned int shift = tbl->it_page_shift; - if (phb->type == PNV_PHB_NPU) { + /* + * NVLink1 can use the TCE kill register directly as + * it's the same as PHB3. NVLink2 is different and + * should go via the OPAL call. + */ + if (phb->model == PNV_PHB_MODEL_NPU) { /* * The NVLink hardware does not support TCE kill * per TCE entry so we have to invalidate @@ -3674,6 +3679,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->model = PNV_PHB_MODEL_PHB3; else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) phb->model = PNV_PHB_MODEL_NPU; + else if (of_device_is_compatible(np, "ibm,power9-npu-pciex")) + phb->model = PNV_PHB_MODEL_NPU2; else phb->model = PNV_PHB_MODEL_UNKNOWN; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index c6d554fe585c..eb835e977e33 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -940,6 +940,13 @@ void __init pnv_pci_init(void) for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") pnv_pci_init_npu_phb(np); + /* + * Look for NPU2 PHBs which we treat mostly as NPU PHBs with + * the exception of TCE kill which requires an OPAL call. + */ + for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") + pnv_pci_init_npu_phb(np); + /* Configure IOMMU DMA hooks */ set_pci_dma_ops(&dma_iommu_ops); } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index e64df7894d6e..e1d3e5526b54 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -19,6 +19,7 @@ enum pnv_phb_model { PNV_PHB_MODEL_P7IOC, PNV_PHB_MODEL_PHB3, PNV_PHB_MODEL_NPU, + PNV_PHB_MODEL_NPU2, }; #define PNV_PCI_DIAG_BUF_SIZE 8192 -- cgit v1.2.3 From 823b7bd5156a93872d9561b3f033dfe5cb80204e Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 25 Jan 2017 14:06:25 +0530 Subject: powernv:idle: Add IDLE_STATE_ENTER_SEQ_NORET macro Currently all the low-power idle states are expected to wake up at reset vector 0x100. Which is why the macro IDLE_STATE_ENTER_SEQ that puts the CPU to an idle state and never returns. On ISA v3.0, when the ESL and EC bits in the PSSCR are zero, the CPU is expected to wake up at the next instruction of the idle instruction. This patch adds a new macro named IDLE_STATE_ENTER_SEQ_NORET for the no-return variant and reuses the name IDLE_STATE_ENTER_SEQ for a variant that allows resuming operation at the instruction next to the idle-instruction. Acked-by: Balbir Singh Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 5 ++++- arch/powerpc/kernel/exceptions-64s.S | 6 +++--- arch/powerpc/kernel/idle_book3s.S | 10 +++++----- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 3919332965af..0a3255b12587 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -21,7 +21,7 @@ extern u64 pnv_first_deep_stop_state; /* Idle state entry routines */ #ifdef CONFIG_PPC_P7_NAP -#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ +#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ std r0,0(r1); \ ptesync; \ @@ -29,6 +29,9 @@ extern u64 pnv_first_deep_stop_state; 1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ + +#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ + IDLE_STATE_ENTER_SEQ(IDLE_INST) \ b . #endif /* CONFIG_PPC_P7_NAP */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d39d6118c6e9..069aac8af909 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -381,12 +381,12 @@ EXC_COMMON_BEGIN(machine_check_handle_early) lbz r3,PACA_THREAD_IDLE_STATE(r13) cmpwi r3,PNV_THREAD_NAP bgt 10f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 10: cmpwi r3,PNV_THREAD_SLEEP bgt 2f - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) /* No return */ 2: @@ -400,7 +400,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early) */ ori r13,r13,1 SET_PACA(r13) - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* No return */ 4: #endif diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 72dac0b58061..be90e2f62bba 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -205,7 +205,7 @@ pnv_enter_arch207_idle_mode: stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f - IDLE_STATE_ENTER_SEQ(PPC_NAP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) /* No return */ 2: /* Sleep or winkle */ @@ -239,7 +239,7 @@ pnv_fastsleep_workaround_at_entry: common_enter: /* common code for all the threads entering sleep or winkle */ bgt cr3,enter_winkle - IDLE_STATE_ENTER_SEQ(PPC_SLEEP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP) fastsleep_workaround_at_entry: ori r15,r15,PNV_CORE_IDLE_LOCK_BIT @@ -261,7 +261,7 @@ fastsleep_workaround_at_entry: enter_winkle: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_WINKLE) + IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* * r3 - requested stop state @@ -280,7 +280,7 @@ power_enter_stop: ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 bge 2f - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) 2: /* * Entering deep idle state. @@ -302,7 +302,7 @@ lwarx_loop_stop: bl save_sprs_to_stack - IDLE_STATE_ENTER_SEQ(PPC_STOP) + IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) _GLOBAL(power7_idle) /* Now check if user or arch enabled NAP mode */ -- cgit v1.2.3 From dd34c74c97b6c3ed1ac7caec0b46267142659aff Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 25 Jan 2017 14:06:26 +0530 Subject: powernv:stop: Rename pnv_arch300_idle_init to pnv_power9_idle_init Balbir pointed out that the name of the function pnv_arch300_idle_init was inconsistent with the names of the variables and functions pertaining to POWER9 features in book3s_idle.S. This patch renames pnv_arch300_idle_init to pnv_power9_idle_init. This patch does not change any behaviour. Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 479c25601612..57bec031291b 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -298,7 +298,7 @@ u64 pnv_deepest_stop_state; * @dt_idle_states: Number of idle state entries * Returns 0 on success */ -static int __init pnv_arch300_idle_init(struct device_node *np, u32 *flags, +static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, int dt_idle_states) { u64 *psscr_val = NULL; @@ -373,7 +373,7 @@ static void __init pnv_probe_idle_states(void) } if (cpu_has_feature(CPU_FTR_ARCH_300)) { - if (pnv_arch300_idle_init(np, flags, dt_idle_states)) + if (pnv_power9_idle_init(np, flags, dt_idle_states)) goto out; } -- cgit v1.2.3 From 09206b600c76f20984e80d99f3b5343c79332a97 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Wed, 25 Jan 2017 14:06:28 +0530 Subject: powernv: Pass PSSCR value and mask to power9_idle_stop The power9_idle_stop method currently takes only the requested stop level as a parameter and picks up the rest of the PSSCR bits from a hand-coded macro. This is not a very flexible design, especially when the firmware has the capability to communicate the psscr value and the mask associated with a particular stop state via device tree. This patch modifies the power9_idle_stop API to take as parameters the PSSCR value and the PSSCR mask corresponding to the stop state that needs to be set. These PSSCR value and mask are respectively obtained by parsing the "ibm,cpu-idle-state-psscr" and "ibm,cpu-idle-state-psscr-mask" fields from the device tree. In addition to this, the patch adds support for handling stop states for which ESL and EC bits in the PSSCR are zero. As per the architecture, a wakeup from these stop states resumes execution from the subsequent instruction as opposed to waking up at the System Vector. The older firmware sets only the Requested Level (RL) field in the psscr and psscr-mask exposed in the device tree. For older firmware where psscr-mask=0xf, this patch will set the default sane values that the set for for remaining PSSCR fields (i.e PSLL, MTL, ESL, EC, and TR). For the new firmware, the patch will validate that the invariants required by the ISA for the psscr values are maintained by the firmware. This skiboot patch that exports fully populated PSSCR values and the mask for all the stop states can be found here: https://lists.ozlabs.org/pipermail/skiboot/2016-September/004869.html [Optimize the number of instructions before entering STOP with ESL=EC=0, validate the PSSCR values provided by the firimware maintains the invariants required as per the ISA suggested by Balbir Singh] Acked-by: Balbir Singh Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 44 ++++++++++ arch/powerpc/include/asm/processor.h | 3 +- arch/powerpc/kernel/idle_book3s.S | 30 ++++--- arch/powerpc/platforms/powernv/idle.c | 138 ++++++++++++++++++++++++++++--- arch/powerpc/platforms/powernv/powernv.h | 3 +- arch/powerpc/platforms/powernv/smp.c | 14 ++-- 6 files changed, 200 insertions(+), 32 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 0a3255b12587..fd321eb423cb 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -10,11 +10,55 @@ #define PNV_CORE_IDLE_LOCK_BIT 0x100 #define PNV_CORE_IDLE_THREAD_BITS 0x0FF +/* + * ============================ NOTE ================================= + * The older firmware populates only the RL field in the psscr_val and + * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the + * remaining PSSCR fields to default values as follows: + * + * - ESL and EC bits are to 1. So wakeup from any stop state will be + * at vector 0x100. + * + * - MTL and PSLL are set to the maximum allowed value as per the ISA, + * i.e. 15. + * + * - The Transition Rate, TR is set to the Maximum value 3. + */ +#define PSSCR_HV_DEFAULT_VAL (PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK) + +#define PSSCR_HV_DEFAULT_MASK (PSSCR_ESL | PSSCR_EC | \ + PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ + PSSCR_MTL_MASK | PSSCR_RL_MASK) +#define PSSCR_EC_SHIFT 20 +#define PSSCR_ESL_SHIFT 21 +#define GET_PSSCR_EC(x) (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT) +#define GET_PSSCR_ESL(x) (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT) +#define GET_PSSCR_RL(x) ((x) & PSSCR_RL_MASK) + +#define ERR_EC_ESL_MISMATCH -1 +#define ERR_DEEP_STATE_ESL_MISMATCH -2 + #ifndef __ASSEMBLY__ extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; extern u64 pnv_first_deep_stop_state; + +int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags); +static inline void report_invalid_psscr_val(u64 psscr_val, int err) +{ + switch (err) { + case ERR_EC_ESL_MISMATCH: + pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal", + psscr_val); + break; + case ERR_DEEP_STATE_ESL_MISMATCH: + pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state", + psscr_val); + } +} #endif #endif diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 1ba814436c73..21e0b52685b5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -454,7 +454,8 @@ extern int powersave_nap; /* set if nap mode can be used in idle loop */ extern unsigned long power7_nap(int check_irq); extern unsigned long power7_sleep(void); extern unsigned long power7_winkle(void); -extern unsigned long power9_idle_stop(unsigned long stop_level); +extern unsigned long power9_idle_stop(unsigned long stop_psscr_val, + unsigned long stop_psscr_mask); extern void flush_instruction_cache(void); extern void hard_reset_now(void); diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index be90e2f62bba..4f6cf5596235 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -40,9 +40,7 @@ #define _WORC GPR11 #define _PTCR GPR12 -#define PSSCR_HV_TEMPLATE PSSCR_ESL | PSSCR_EC | \ - PSSCR_PSLL_MASK | PSSCR_TR_MASK | \ - PSSCR_MTL_MASK +#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16 .text @@ -264,7 +262,7 @@ enter_winkle: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE) /* - * r3 - requested stop state + * r3 - PS