From e0f82bdf2dd04c08a167b5e9b47d65834ac84ba9 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:19 +0200 Subject: powerpc: unexport csum_tcpudp_magic csum_tcpudp_magic is now an inline function, so there is nothing to export Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/lib/ppc_ksyms.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c index c7f8e9586316..f5e427e7bc0d 100644 --- a/arch/powerpc/lib/ppc_ksyms.c +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -20,7 +20,6 @@ EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); EXPORT_SYMBOL(ip_fast_csum); -EXPORT_SYMBOL(csum_tcpudp_magic); #endif EXPORT_SYMBOL(__copy_tofrom_user); -- cgit v1.2.3 From 11dfbf588ae697bc5362c24294c2605f09c2d3d4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:21 +0200 Subject: powerpc: mark xer clobbered in csum_add() addc uses carry so xer is clobbered in csum_add() Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index e8d9ef4755a4..d2ca07bb8837 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -141,7 +141,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #else asm("addc %0,%0,%1;" "addze %0,%0;" - : "+r" (csum) : "r" (addend)); + : "+r" (csum) : "r" (addend) : "xer"); return csum; #endif } -- cgit v1.2.3 From 03bc8b0fc87616c75c6dd060a2191e7fc8faacb6 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:23 +0200 Subject: powerpc32: checksum_wrappers_64 becomes checksum_wrappers The powerpc64 checksum wrapper functions adds csum_and_copy_to_user() which otherwise is implemented in include/net/checksum.h by using csum_partial() then copy_to_user() Those two wrapper fonctions are also applicable to powerpc32 as it is based on the use of csum_partial_copy_generic() which also exists on powerpc32 This patch renames arch/powerpc/lib/checksum_wrappers_64.c to arch/powerpc/lib/checksum_wrappers.c and makes it non-conditional to CONFIG_WORD_SIZE Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 9 --- arch/powerpc/lib/Makefile | 3 +- arch/powerpc/lib/checksum_wrappers.c | 102 ++++++++++++++++++++++++++++++++ arch/powerpc/lib/checksum_wrappers_64.c | 102 -------------------------------- 4 files changed, 103 insertions(+), 113 deletions(-) create mode 100644 arch/powerpc/lib/checksum_wrappers.c delete mode 100644 arch/powerpc/lib/checksum_wrappers_64.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index d2ca07bb8837..afa6722cb7d2 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -47,21 +47,12 @@ extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len, __wsum sum, int *src_err, int *dst_err); -#ifdef __powerpc64__ #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr); #define HAVE_CSUM_COPY_USER extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum sum, int *err_ptr); -#else -/* - * the same as csum_partial, but copies from src to dst while it - * checksums. - */ -#define csum_partial_copy_from_user(src, dst, len, sum, errp) \ - csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL) -#endif #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index a47e14277fd8..e46b06822bea 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -22,8 +22,7 @@ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o ifeq ($(CONFIG_GENERIC_CSUM),) -obj-y += checksum_$(CONFIG_WORD_SIZE).o -obj-$(CONFIG_PPC64) += checksum_wrappers_64.o +obj-y += checksum_$(CONFIG_WORD_SIZE).o checksum_wrappers.o endif obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c new file mode 100644 index 000000000000..08e3a3356c40 --- /dev/null +++ b/arch/powerpc/lib/checksum_wrappers.c @@ -0,0 +1,102 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2010 + * + * Author: Anton Blanchard + */ +#include +#include +#include +#include +#include + +__wsum csum_and_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { + *err_ptr = -EFAULT; + csum = (__force unsigned int)sum; + goto out; + } + + csum = csum_partial_copy_generic((void __force *)src, dst, + len, sum, err_ptr, NULL); + + if (unlikely(*err_ptr)) { + int missing = __copy_from_user(dst, src, len); + + if (missing) { + memset(dst + len - missing, 0, missing); + *err_ptr = -EFAULT; + } else { + *err_ptr = 0; + } + + csum = csum_partial(dst, len, sum); + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_from_user); + +__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, + __wsum sum, int *err_ptr) +{ + unsigned int csum; + + might_sleep(); + + *err_ptr = 0; + + if (!len) { + csum = 0; + goto out; + } + + if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + goto out; + } + + csum = csum_partial_copy_generic(src, (void __force *)dst, + len, sum, NULL, err_ptr); + + if (unlikely(*err_ptr)) { + csum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + csum = -1; /* invalid checksum */ + } + } + +out: + return (__force __wsum)csum; +} +EXPORT_SYMBOL(csum_and_copy_to_user); diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c deleted file mode 100644 index 08e3a3356c40..000000000000 --- a/arch/powerpc/lib/checksum_wrappers_64.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2010 - * - * Author: Anton Blanchard - */ -#include -#include -#include -#include -#include - -__wsum csum_and_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) -{ - unsigned int csum; - - might_sleep(); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) { - *err_ptr = -EFAULT; - csum = (__force unsigned int)sum; - goto out; - } - - csum = csum_partial_copy_generic((void __force *)src, dst, - len, sum, err_ptr, NULL); - - if (unlikely(*err_ptr)) { - int missing = __copy_from_user(dst, src, len); - - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } else { - *err_ptr = 0; - } - - csum = csum_partial(dst, len, sum); - } - -out: - return (__force __wsum)csum; -} -EXPORT_SYMBOL(csum_and_copy_from_user); - -__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, - __wsum sum, int *err_ptr) -{ - unsigned int csum; - - might_sleep(); - - *err_ptr = 0; - - if (!len) { - csum = 0; - goto out; - } - - if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - goto out; - } - - csum = csum_partial_copy_generic(src, (void __force *)dst, - len, sum, NULL, err_ptr); - - if (unlikely(*err_ptr)) { - csum = csum_partial(src, len, sum); - - if (copy_to_user(dst, src, len)) { - *err_ptr = -EFAULT; - csum = -1; /* invalid checksum */ - } - } - -out: - return (__force __wsum)csum; -} -EXPORT_SYMBOL(csum_and_copy_to_user); -- cgit v1.2.3 From 37e08cad8f177f7bf6226a9b3724234ac3d3c81d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:25 +0200 Subject: powerpc: inline ip_fast_csum() In several architectures, ip_fast_csum() is inlined There are functions like ip_send_check() which do nothing much more than calling ip_fast_csum(). Inlining ip_fast_csum() allows the compiler to optimise better Suggested-by: Eric Dumazet Signed-off-by: Christophe Leroy [scottwood: whitespace and cast fixes] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 45 +++++++++++++++++++++++++++++++------ arch/powerpc/lib/checksum_32.S | 21 ----------------- arch/powerpc/lib/checksum_64.S | 27 ---------------------- arch/powerpc/lib/ppc_ksyms.c | 1 - 4 files changed, 38 insertions(+), 56 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index afa6722cb7d2..1778f75bf769 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -9,16 +9,9 @@ * 2 of the License, or (at your option) any later version. */ -/* - * This is a version of ip_compute_csum() optimized for IP headers, - * which always checksum on 4 octet boundaries. ihl is the number - * of 32-bit words and is always >= 5. - */ #ifdef CONFIG_GENERIC_CSUM #include #else -extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); - /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) @@ -137,6 +130,44 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #endif } +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. ihl is the number + * of 32-bit words and is always >= 5. + */ +static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl) +{ + const u32 *ptr = (const u32 *)iph + 1; +#ifdef __powerpc64__ + unsigned int i; + u64 s = *(const u32 *)iph; + + for (i = 0; i < ihl - 1; i++, ptr++) + s += *ptr; + s += (s >> 32); + return (__force __wsum)s; +#else + __wsum sum, tmp; + + asm("mtctr %3;" + "addc %0,%4,%5;" + "1: lwzu %1, 4(%2);" + "adde %0,%0,%1;" + "bdnz 1b;" + "addze %0,%0;" + : "=r" (sum), "=r" (tmp), "+b" (ptr) + : "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr) + : "ctr", "xer", "memory"); + + return sum; +#endif +} + +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return csum_fold(ip_fast_csum_nofold(iph, ihl)); +} + #endif #endif /* __KERNEL__ */ #endif diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 6d67e057f15e..0d7eba31d93f 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -19,27 +19,6 @@ .text -/* - * ip_fast_csum(buf, len) -- Optimized for IP header - * len is in words and is always >= 5. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - /* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index f3ef35436612..f53f4abb9282 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -17,33 +17,6 @@ #include #include -/* - * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header - * len is in words and is always >= 5. - * - * In practice len == 5, but this is not guaranteed. So this code does not - * attempt to use doubleword instructions. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rldicl r4,r0,32,0 /* fold two 32-bit halves together */ - add r0,r0,r4 - srdi r0,r0,32 - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - /* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c index f5e427e7bc0d..8cd5c0b2986c 100644 --- a/arch/powerpc/lib/ppc_ksyms.c +++ b/arch/powerpc/lib/ppc_ksyms.c @@ -19,7 +19,6 @@ EXPORT_SYMBOL(strncmp); #ifndef CONFIG_GENERIC_CSUM EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); -EXPORT_SYMBOL(ip_fast_csum); #endif EXPORT_SYMBOL(__copy_tofrom_user); -- cgit v1.2.3 From 7aef4136566b0539a1a98391181e188905e33401 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:27 +0200 Subject: powerpc32: rewrite csum_partial_copy_generic() based on copy_tofrom_user() csum_partial_copy_generic() does the same as copy_tofrom_user and also calculates the checksum during the copy. Unlike copy_tofrom_user(), the existing version of csum_partial_copy_generic() doesn't take benefit of the cache. This patch is a rewrite of csum_partial_copy_generic() based on copy_tofrom_user(). The previous version of csum_partial_copy_generic() was handling errors. Now we have the checksum wrapper functions to handle the error case like in powerpc64 so we can make the error case simple: just return -EFAULT. copy_tofrom_user() only has r12 available => we use it for the checksum r7 and r8 which contains pointers to error feedback are used, so we stack them. On a TCP benchmark using socklib on the loopback interface on which checksum offload and scatter/gather have been deactivated, we get about 20% performance increase. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/lib/checksum_32.S | 320 +++++++++++++++++++++++++++-------------- 1 file changed, 209 insertions(+), 111 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 0d7eba31d93f..347237253d1e 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -66,123 +67,220 @@ _GLOBAL(csum_partial) * * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) */ +#define CSUM_COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ + adde r12,r12,r7; \ +8 ## n ## 5: \ + stw r8,8(r6); \ + adde r12,r12,r8; \ +8 ## n ## 6: \ + stw r9,12(r6); \ + adde r12,r12,r9; \ +8 ## n ## 7: \ + stwu r10,16(r6); \ + adde r12,r12,r10 + +#define CSUM_COPY_16_BYTES_EXCODE(n) \ +.section __ex_table,"a"; \ + .align 2; \ + .long 8 ## n ## 0b,src_error; \ + .long 8 ## n ## 1b,src_error; \ + .long 8 ## n ## 2b,src_error; \ + .long 8 ## n ## 3b,src_error; \ + .long 8 ## n ## 4b,dst_error; \ + .long 8 ## n ## 5b,dst_error; \ + .long 8 ## n ## 6b,dst_error; \ + .long 8 ## n ## 7b,dst_error; \ + .text + + .text + .stabs "arch/powerpc/lib/",N_SO,0,0,0f + .stabs "checksum_32.S",N_SO,0,0,0f +0: + +CACHELINE_BYTES = L1_CACHE_BYTES +LG_CACHELINE_BYTES = L1_CACHE_SHIFT +CACHELINE_MASK = (L1_CACHE_BYTES-1) + _GLOBAL(csum_partial_copy_generic) - addic r0,r6,0 - subi r3,r3,4 - subi r4,r4,4 - srwi. r6,r5,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r9,r4,2 /* Align dst to longword boundary */ - beq+ 1f -81: lhz r6,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r5,r5,2 -91: sth r6,4(r4) - addi r4,r4,2 - addc r0,r0,r6 - srwi. r6,r5,2 /* # words to do */ - beq 3f -1: srwi. r6,r5,4 /* # groups of 4 words to do */ - beq 10f - mtctr r6 -71: lwz r6,4(r3) -72: lwz r9,8(r3) -73: lwz r10,12(r3) -74: lwzu r11,16(r3) - adde r0,r0,r6 -75: stw r6,4(r4) - adde r0,r0,r9 -76: stw r9,8(r4) - adde r0,r0,r10 -77: stw r10,12(r4) - adde r0,r0,r11 -78: stwu r11,16(r4) - bdnz 71b -10: rlwinm. r6,r5,30,30,31 /* # words left to do */ - beq 13f - mtctr r6 -82: lwzu r9,4(r3) -92: stwu r9,4(r4) - adde r0,r0,r9 - bdnz 82b -13: andi. r5,r5,3 -3: cmpwi 0,r5,2 - blt+ 4f -83: lhz r6,4(r3) - addi r3,r3,2 - subi r5,r5,2 -93: sth r6,4(r4) + stwu r1,-16(r1) + stw r7,12(r1) + stw r8,8(r1) + + andi. r0,r4,1 /* is destination address even ? */ + cmplwi cr7,r0,0 + addic r12,r6,0 + addi r6,r4,-4 + neg r0,r4 + addi r4,r3,-4 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + mtctr r8 + beq+ 61f + li r3,0 +70: lbz r9,4(r4) /* do some bytes */ + addi r4,r4,1 + slwi r3,r3,8 + rlwimi r3,r9,0,24,31 +71: stb r9,4(r6) + addi r6,r6,1 + bdnz 70b + adde r12,r12,r3 +61: subf r5,r0,r5 + srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + adde r12,r12,r9 +73: stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + beq 63f + + /* Here we decide how far ahead to prefetch the source */ + li r3,4 + cmpwi r0,1 + li r7,0 + ble 114f + li r7,1 +#if MAX_COPY_PREFETCH > 1 + /* Heuristically, for large transfers we prefetch + MAX_COPY_PREFETCH cachelines ahead. For small transfers + we prefetch 1 cacheline ahead. */ + cmpwi r0,MAX_COPY_PREFETCH + ble 112f + li r7,MAX_COPY_PREFETCH +112: mtctr r7 +111: dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES + bdnz 111b +#else + dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES +#endif /* MAX_COPY_PREFETCH > 1 */ + +114: subf r8,r7,r0 + mr r0,r7 + mtctr r8 + +53: dcbt r3,r4 +54: dcbz r11,r6 +/* the main body of the cacheline loop */ + CSUM_COPY_16_BYTES_WITHEX(0) +#if L1_CACHE_BYTES >= 32 + CSUM_COPY_16_BYTES_WITHEX(1) +#if L1_CACHE_BYTES >= 64 + CSUM_COPY_16_BYTES_WITHEX(2) + CSUM_COPY_16_BYTES_WITHEX(3) +#if L1_CACHE_BYTES >= 128 + CSUM_COPY_16_BYTES_WITHEX(4) + CSUM_COPY_16_BYTES_WITHEX(5) + CSUM_COPY_16_BYTES_WITHEX(6) + CSUM_COPY_16_BYTES_WITHEX(7) +#endif +#endif +#endif + bdnz 53b + cmpwi r0,0 + li r3,4 + li r7,0 + bne 114b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + adde r12,r12,r0 +31: stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,2 + beq+ 65f +40: lhz r0,4(r4) addi r4,r4,2 - adde r0,r0,r6 -4: cmpwi 0,r5,1 - bne+ 5f -84: lbz r6,4(r3) -94: stb r6,4(r4) - slwi r6,r6,8 /* Upper byte of word */ - adde r0,r0,r6 -5: addze r3,r0 /* add in final carry */ +41: sth r0,4(r6) + adde r12,r12,r0 + addi r6,r6,2 +65: andi. r0,r5,1 + beq+ 66f +50: lbz r0,4(r4) +51: stb r0,4(r6) + slwi r0,r0,8 + adde r12,r12,r0 +66: addze r3,r12 + addi r1,r1,16 + beqlr+ cr7 + rlwinm r3,r3,8,0,31 /* swap bytes for odd destination */ blr -/* These shouldn't go in the fixup section, since that would - cause the ex_table addresses to get out of order. */ - -src_error_4: - mfctr r6 /* update # bytes remaining from ctr */ - rlwimi r5,r6,4,0,27 - b 79f -src_error_1: - li r6,0 - subi r5,r5,2 -95: sth r6,4(r4) - addi r4,r4,2 -79: srwi. r6,r5,2 - beq 3f - mtctr r6 -src_error_2: - li r6,0 -96: stwu r6,4(r4) - bdnz 96b -3: andi. r5,r5,3 - beq src_error -src_error_3: - li r6,0 - mtctr r5 - addi r4,r4,3 -97: stbu r6,1(r4) - bdnz 97b +/* read fault */ src_error: - cmpwi 0,r7,0 - beq 1f - li r6,-EFAULT - stw r6,0(r7) -1: addze r3,r0 + lwz r7,12(r1) + addi r1,r1,16 + cmpwi cr0,r7,0 + beqlr + li r0,-EFAULT + stw r0,0(r7) blr - +/* write fault */ dst_error: - cmpwi 0,r8,0 - beq 1f - li r6,-EFAULT - stw r6,0(r8) -1: addze r3,r0 + lwz r8,8(r1) + addi r1,r1,16 + cmpwi cr0,r8,0 + beqlr + li r0,-EFAULT + stw r0,0(r8) blr -.section __ex_table,"a" - .long 81b,src_error_1 - .long 91b,dst_error - .long 71b,src_error_4 - .long 72b,src_error_4 - .long 73b,src_error_4 - .long 74b,src_error_4 - .long 75b,dst_error - .long 76b,dst_error - .long 77b,dst_error - .long 78b,dst_error - .long 82b,src_error_2 - .long 92b,dst_error - .long 83b,src_error_3 - .long 93b,dst_error - .long 84b,src_error_3 - .long 94b,dst_error - .long 95b,dst_error - .long 96b,dst_error - .long 97b,dst_error + .section __ex_table,"a" + .align 2 + .long 70b,src_error + .long 71b,dst_error + .long 72b,src_error + .long 73b,dst_error + .long 54b,dst_error + .text + +/* + * this stuff handles faults in the cacheline loop and branches to either + * src_error (if in read part) or dst_error (if in write part) + */ + CSUM_COPY_16_BYTES_EXCODE(0) +#if L1_CACHE_BYTES >= 32 + CSUM_COPY_16_BYTES_EXCODE(1) +#if L1_CACHE_BYTES >= 64 + CSUM_COPY_16_BYTES_EXCODE(2) + CSUM_COPY_16_BYTES_EXCODE(3) +#if L1_CACHE_BYTES >= 128 + CSUM_COPY_16_BYTES_EXCODE(4) + CSUM_COPY_16_BYTES_EXCODE(5) + CSUM_COPY_16_BYTES_EXCODE(6) + CSUM_COPY_16_BYTES_EXCODE(7) +#endif +#endif +#endif + + .section __ex_table,"a" + .align 2 + .long 30b,src_error + .long 31b,dst_error + .long 40b,src_error + .long 41b,dst_error + .long 50b,src_error + .long 51b,dst_error -- cgit v1.2.3 From 48821a34b1bdc5d89505cb814b3f7c166940f200 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:29 +0200 Subject: powerpc32: optimise a few instructions in csum_partial() r5 does contain the value to be updated, so lets use r5 all way long for that. It makes the code more readable. To avoid confusion, it is better to use adde instead of addc The first addition is useless. Its only purpose is to clear carry. As r4 is a signed int that is always positive, this can be done by using srawi instead of srwi Let's also remove the comment about bdnz having no overhead as it is not correct on all powerpc, at least on MPC8xx In the last part, in our situation, the remaining quantity of bytes to be proceeded is between 0 and 3. Therefore, we can base that part on the value of bit 31 and bit 30 of r4 instead of anding r4 with 3 then proceding on comparisons and substractions. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/lib/checksum_32.S | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 347237253d1e..9c126028ab9c 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -27,35 +27,32 @@ * csum_partial(buff, len, sum) */ _GLOBAL(csum_partial) - addic r0,r5,0 subi r3,r3,4 - srwi. r6,r4,2 + srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ beq 3f /* if we're doing < 4 bytes */ - andi. r5,r3,2 /* Align buffer to longword boundary */ + andi. r0,r3,2 /* Align buffer to longword boundary */ beq+ 1f - lhz r5,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 + lhz r0,4(r3) /* do 2 bytes to get aligned */ subi r4,r4,2 - addc r0,r0,r5 + addi r3,r3,2 srwi. r6,r4,2 /* # words to do */ + adde r5,r5,r0 beq 3f 1: mtctr r6 -2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */ - adde r0,r0,r5 /* be unnecessary to unroll this loop */ +2: lwzu r0,4(r3) + adde r5,r5,r0 bdnz 2b - andi. r4,r4,3 -3: cmpwi 0,r4,2 - blt+ 4f - lhz r5,4(r3) +3: andi. r0,r4,2 + beq+ 4f + lhz r0,4(r3) addi r3,r3,2 - subi r4,r4,2 - adde r0,r0,r5 -4: cmpwi 0,r4,1 - bne+ 5f - lbz r5,4(r3) - slwi r5,r5,8 /* Upper byte of word */ - adde r0,r0,r5 -5: addze r3,r0 /* add in final carry */ + adde r5,r5,r0 +4: andi. r0,r4,1 + beq+ 5f + lbz r0,4(r3) + slwi r0,r0,8 /* Upper byte of word */ + adde r5,r5,r0 +5: addze r3,r5 /* add in final carry */ blr /* -- cgit v1.2.3 From f867d556dd8525fe6ff0d22a34249528e590f994 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:32 +0200 Subject: powerpc32: optimise csum_partial() loop On the 8xx, load latency is 2 cycles and taking branches also takes 2 cycles. So let's unroll the loop. This patch improves csum_partial() speed by around 10% on both: * 8xx (single issue processor with parallel execution) * 83xx (superscalar 6xx processor with dual instruction fetch and parallel execution) Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/lib/checksum_32.S | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 9c126028ab9c..0d34f47c8a5e 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -38,10 +38,24 @@ _GLOBAL(csum_partial) srwi. r6,r4,2 /* # words to do */ adde r5,r5,r0 beq 3f -1: mtctr r6 +1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ + beq 21f + mtctr r6 2: lwzu r0,4(r3) adde r5,r5,r0 bdnz 2b +21: srwi. r6,r4,4 /* # blocks of 4 words to do */ + beq 3f + mtctr r6 +22: lwz r0,4(r3) + lwz r6,8(r3) + lwz r7,12(r3) + lwzu r8,16(r3) + adde r5,r5,r0 + adde r5,r5,r6 + adde r5,r5,r7 + adde r5,r5,r8 + bdnz 22b 3: andi. r0,r4,2 beq+ 4f lhz r0,4(r3) -- cgit v1.2.3 From 5a8847c83ce6072d6fdf0d15d9aa060c0b83537f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Sep 2015 16:34:34 +0200 Subject: powerpc: simplify csum_add(a, b) in case a or b is constant 0 Simplify csum_add(a, b) in case a or b is constant 0 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/checksum.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 1778f75bf769..74cd8d82628a 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -119,7 +119,13 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) { #ifdef __powerpc64__ u64 res = (__force u64)csum; +#endif + if (__builtin_constant_p(csum) && csum == 0) + return addend; + if (__builtin_constant_p(addend) && addend == 0) + return csum; +#ifdef __powerpc64__ res += (__force u64)addend; return (__force __wsum)((u32)res + (res >> 32)); #else -- cgit v1.2.3 From ebb9d30a6a74f4ced5b7d35446ebb6362a724393 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Thu, 24 Dec 2015 08:39:57 +0800 Subject: powerpc/mm: any thread in one core can be the first to setup TLB1 On e6500, in the case of cpu hotplug, either thread in one core may be the first thread initilzing the TLB1. The subsequent threads must not setup it again. The code is derived from the comment of Scott Wood. Signed-off-by: Chenhui Zhao Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cputhreads.h | 8 ++++++++ arch/powerpc/mm/tlb_nohash.c | 4 +--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index ba42e46ea58e..ea9623147b87 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -94,6 +94,14 @@ static inline int cpu_last_thread_sibling(int cpu) return cpu | (threads_per_core - 1); } +static inline u32 get_tensr(void) +{ +#ifdef CONFIG_BOOKE + if (cpu_has_feature(CPU_FTR_SMT)) + return mfspr(SPRN_TENSR); +#endif + return 1; +} #endif /* _ASM_POWERPC_CPUTHREADS_H */ diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index bb04e4df3100..f4668488512c 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -640,9 +640,7 @@ static void early_init_this_mmu(void) * transient mapping would cause problems. */ #ifdef CONFIG_SMP - if (cpu != boot_cpuid && - (cpu != cpu_first_thread_sibling(cpu) || - cpu == cpu_first_thread_sibling(boot_cpuid))) + if (hweight32(get_tensr()) > 1) map = false; #endif -- cgit v1.2.3 From e7affb1dba0e9068aeb3978e858f39753e0dc20a Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:13:58 +0800 Subject: powerpc/cache: add cache flush operation for various e500 Various e500 core have different cache architecture, so they need different cache flush operations. Therefore, add a callback function cpu_flush_caches to the struct cpu_spec. The cache flush operation for the specific kind of e500 is selected at init time. The callback function will flush all caches inside the current cpu. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cacheflush.h | 2 - arch/powerpc/include/asm/cputable.h | 8 +++ arch/powerpc/kernel/asm-offsets.c | 1 + arch/powerpc/kernel/cpu_setup_fsl_booke.S | 112 ++++++++++++++++++++++++++++++ arch/powerpc/kernel/cputable.c | 4 ++ arch/powerpc/kernel/head_fsl_booke.S | 74 -------------------- arch/powerpc/platforms/85xx/smp.c | 5 +- 7 files changed, 128 insertions(+), 78 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 6229e6b6037b..47add2e2364a 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -30,8 +30,6 @@ extern void flush_dcache_page(struct page *page); #define flush_dcache_mmap_lock(mapping) do { } while (0) #define flush_dcache_mmap_unlock(mapping) do { } while (0) -extern void __flush_disable_L1(void); - extern void flush_icache_range(unsigned long, unsigned long); extern void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, unsigned long addr, diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 94ace9b4c4e1..df4fb5faba43 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -43,6 +43,11 @@ extern int machine_check_e500(struct pt_regs *regs); extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); +extern void cpu_down_flush_e500v2(void); +extern void cpu_down_flush_e500mc(void); +extern void cpu_down_flush_e5500(void); +extern void cpu_down_flush_e6500(void); + /* NOTE WELL: Update identify_cpu() if fields are added or removed! */ struct cpu_spec { /* CPU is matched via (PVR & pvr_mask) == pvr_value */ @@ -59,6 +64,9 @@ struct cpu_spec { unsigned int icache_bsize; unsigned int dcache_bsize; + /* flush caches inside the current cpu */ + void (*cpu_down_flush)(void); + /* number of performance monitor counters */ unsigned int num_pmcs; enum powerpc_pmc_type pmc_type; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 10d5eab19458..0d0183d3180a 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -376,6 +376,7 @@ int main(void) DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore)); + DEFINE(CPU_DOWN_FLUSH, offsetof(struct cpu_spec, cpu_down_flush)); DEFINE(pbe_address, offsetof(struct pbe, address)); DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S index dddba3e94260..462aed9bcf51 100644 --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S @@ -13,11 +13,13 @@ * */ +#include #include #include #include #include #include +#include _GLOBAL(__e500_icache_setup) mfspr r0, SPRN_L1CSR1 @@ -233,3 +235,113 @@ _GLOBAL(__setup_cpu_e5500) mtlr r5 blr #endif + +/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */ +_GLOBAL(flush_dcache_L1) + mfmsr r10 + wrteei 0 + + mfspr r3,SPRN_L1CFG0 + rlwinm r5,r3,9,3 /* Extract cache block size */ + twlgti r5,1 /* Only 32 and 64 byte cache blocks + * are currently defined. + */ + li r4,32 + subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - + * log2(number of ways) + */ + slw r5,r4,r5 /* r5 = cache block size */ + + rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ + mulli r7,r7,13 /* An 8-way cache will require 13 + * loads per set. + */ + slw r7,r7,r6 + + /* save off HID0 and set DCFA */ + mfspr r8,SPRN_HID0 + ori r9,r8,HID0_DCFA@l + mtspr SPRN_HID0,r9 + isync + + LOAD_REG_IMMEDIATE(r6, KERNELBASE) + mr r4, r6 + mtctr r7 + +1: lwz r3,0(r4) /* Load... */ + add r4,r4,r5 + bdnz 1b + + msync + mr r4, r6 + mtctr r7 + +1: dcbf 0,r4 /* ...and flush. */ + add r4,r4,r5 + bdnz 1b + + /* restore HID0 */ + mtspr SPRN_HID0,r8 + isync + + wrtee r10 + + blr + +has_L2_cache: + /* skip L2 cache on P2040/P2040E as they have no L2 cache */ + mfspr r3, SPRN_SVR + /* shift right by 8 bits and clear E bit of SVR */ + rlwinm r4, r3, 24, ~0x800 + + lis r3, SVR_P2040@h + ori r3, r3, SVR_P2040@l + cmpw r4, r3 + beq 1f + + li r3, 1 + blr +1: + li r3, 0 + blr + +/* flush backside L2 cache */ +flush_backside_L2_cache: + mflr r10 + bl has_L2_cache + mtlr r10 + cmpwi r3, 0 + beq 2f + + /* Flush the L2 cache */ + mfspr r3, SPRN_L2CSR0 + ori r3, r3, L2CSR0_L2FL@l + msync + isync + mtspr SPRN_L2CSR0,r3 + isync + + /* check if it is complete */ +1: mfspr r3,SPRN_L2CSR0 + andi. r3, r3, L2CSR0_L2FL@l + bne 1b +2: + blr + +_GLOBAL(cpu_down_flush_e500v2) + mflr r0 + bl flush_dcache_L1 + mtlr r0 + blr + +_GLOBAL(cpu_down_flush_e500mc) +_GLOBAL(cpu_down_flush_e5500) + mflr r0 + bl flush_dcache_L1 + bl flush_backside_L2_cache + mtlr r0 + blr + +/* L1 Data Cache of e6500 contains no modified data, no flush is required */ +_GLOBAL(cpu_down_flush_e6500) + blr diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index be4d73053bed..6c662b8de90d 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2050,6 +2050,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_e500v2, .machine_check = machine_check_e500, .platform = "ppc8548", + .cpu_down_flush = cpu_down_flush_e500v2, }, #else { /* e500mc */ @@ -2069,6 +2070,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500mc, .platform = "ppce500mc", + .cpu_down_flush = cpu_down_flush_e500mc, }, #endif /* CONFIG_PPC_E500MC */ #endif /* CONFIG_PPC32 */ @@ -2093,6 +2095,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif .machine_check = machine_check_e500mc, .platform = "ppce5500", + .cpu_down_flush = cpu_down_flush_e5500, }, { /* e6500 */ .pvr_mask = 0xffff0000, @@ -2115,6 +2118,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif .machine_check = machine_check_e500mc, .platform = "ppce6500", + .cpu_down_flush = cpu_down_flush_e6500, }, #endif /* CONFIG_PPC_E500MC */ #ifdef CONFIG_PPC32 diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index f705171b924b..3bfa3150911f 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -1037,80 +1037,6 @@ _GLOBAL(set_context) isync /* Force context change */ blr -_GLOBAL(flush_dcache_L1) - mfspr r3,SPRN_L1CFG0 - - rlwinm r5,r3,9,3 /* Extract cache block size */ - twlgti r5,1 /* Only 32 and 64 byte cache blocks - * are currently defined. - */ - li r4,32 - subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) - - * log2(number of ways) - */ - slw r5,r4,r5 /* r5 = cache block size */ - - rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */ - mulli r7,r7,13 /* An 8-way cache will require 13 - * loads per set. - */ - slw r7,r7,r6 - - /* save off HID0 and set DCFA */ - mfspr r8,SPRN_HID0 - ori r9,r8,HID0_DCFA@l - mtspr SPRN_HID0,r9 - isync - - lis r4,KERNELBASE@h - mtctr r7 - -1: lwz r3,0(r4) /* Load... */ - add r4,r4,r5 - bdnz 1b - - msync - lis r4,KERNELBASE@h - mtctr r7 - -1: dcbf 0,r4 /* ...and flush. */ - add r4,r4,r5 - bdnz 1b - - /* restore HID0 */ - mtspr SPRN_HID0,r8 - isync - - blr - -/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */ -_GLOBAL(__flush_disable_L1) - mflr r10 - bl flush_dcache_L1 /* Flush L1 d-cache */ - mtlr r10 - - mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */ - li r5, 2 - rlwimi r4, r5, 0, 3 - - msync - isync - mtspr SPRN_L1CSR0, r4 - isync - -1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */ - andi. r4, r4, 2 - bne 1b - - mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */ - li r5, 2 - rlwimi r4, r5, 0, 3 - - mtspr SPRN_L1CSR1, r4 - isync - - blr - #ifdef CONFIG_SMP /* When we get here, r24 needs to hold the CPU # */ .globl __secondary_start diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 6b107cea1c08..4a7841616c7f 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -139,7 +139,8 @@ static void smp_85xx_mach_cpu_die(void) mtspr(SPRN_TCR, 0); - __flush_disable_L1(); + cur_cpu_spec->cpu_down_flush(); + tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; mtspr(SPRN_HID0, tmp); isync(); @@ -359,7 +360,7 @@ void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary) local_irq_disable(); if (secondary) { - __flush_disable_L1(); + cur_cpu_spec->cpu_down_flush(); atomic_inc(&kexec_down_cpus); /* loop forever */ while (1); -- cgit v1.2.3 From d17799f9c10e283cccd4d598d3416e6fac336ab9 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:13:59 +0800 Subject: powerpc/rcpm: add RCPM driver There is a RCPM (Run Control/Power Management) in Freescale QorIQ series processors. The device performs tasks associated with device run control and power management. The driver implements some features: mask/unmask irq, enter/exit low power states, freeze time base, etc. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian [scottwood: remove __KERNEL__ ifdef] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/cputhreads.h | 1 + arch/powerpc/include/asm/fsl_pm.h | 51 +++++ arch/powerpc/kernel/head_64.S | 19 ++ arch/powerpc/platforms/85xx/Kconfig | 1 + arch/powerpc/platforms/85xx/common.c | 3 + arch/powerpc/sysdev/Kconfig | 5 + arch/powerpc/sysdev/Makefile | 1 + arch/powerpc/sysdev/fsl_rcpm.c | 385 ++++++++++++++++++++++++++++++++++ 8 files changed, 466 insertions(+) create mode 100644 arch/powerpc/include/asm/fsl_pm.h create mode 100644 arch/powerpc/sysdev/fsl_rcpm.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h index ea9623147b87..81f5b338c76d 100644 --- a/arch/powerpc/include/asm/cputhreads.h +++ b/arch/powerpc/include/asm/cputhreads.h @@ -103,6 +103,7 @@ static inline u32 get_tensr(void) return 1; } +void book3e_stop_thread(int thread); #endif /* _ASM_POWERPC_CPUTHREADS_H */ diff --git a/arch/powerpc/include/asm/fsl_pm.h b/arch/powerpc/include/asm/fsl_pm.h new file mode 100644 index 000000000000..47df55e36d4f --- /dev/null +++ b/arch/powerpc/include/asm/fsl_pm.h @@ -0,0 +1,51 @@ +/* + * Support Power Management + * + * Copyright 2014-2015 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#ifndef __PPC_FSL_PM_H +#define __PPC_FSL_PM_H + +#define E500_PM_PH10 1 +#define E500_PM_PH15 2 +#define E500_PM_PH20 3 +#define E500_PM_PH30 4 +#define E500_PM_DOZE E500_PM_PH10 +#define E500_PM_NAP E500_PM_PH15 + +#define PLAT_PM_SLEEP 20 +#define PLAT_PM_LPM20 30 + +#define FSL_PM_SLEEP (1 << 0) +#define FSL_PM_DEEP_SLEEP (1 << 1) + +struct fsl_pm_ops { + /* mask pending interrupts to the RCPM from MPIC */ + void (*irq_mask)(int cpu); + + /* unmask pending interrupts to the RCPM from MPIC */ + void (*irq_unmask)(int cpu); + void (*cpu_enter_state)(int cpu, int state); + void (*cpu_exit_state)(int cpu, int state); + void (*cpu_up_prepare)(int cpu); + void (*cpu_die)(int cpu); + int (*plat_enter_sleep)(void); + void (*freeze_time_base)(bool freeze); + + /* keep the power of IP blocks during sleep/deep sleep */ + void (*set_ip_power)(bool enable, u32 mask); + + /* get platform supported power management modes */ + unsigned int (*get_pm_modes)(void); +}; + +extern const struct fsl_pm_ops *qoriq_pm_ops; + +int __init fsl_rcpm_init(void); + +#endif /* __PPC_FSL_PM_H */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1b779560728f..603625368ceb 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -181,6 +181,25 @@ exception_marker: #endif #ifdef CONFIG_PPC_BOOK3E +/* + * stop a thread in the same core + * input parameter: + * r3 = the thread physical id + */ +_GLOBAL(book3e_stop_thread) + cmpi 0, r3, 0 + beq 10f + cmpi 0, r3, 1 + beq 10f + /* If the thread id is invalid, just exit. */ + b 13f +10: + li r4, 1 + sld r4, r4, r3 + mtspr SPRN_TENC, r4 +13: + blr + _GLOBAL(fsl_secondary_thread_init) mfspr r4,SPRN_BUCSR diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 97915feffd42..e626461a63bd 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -8,6 +8,7 @@ menuconfig FSL_SOC_BOOKE select FSL_PCI if PCI select SERIAL_8250_EXTENDED if SERIAL_8250 select SERIAL_8250_SHARE_IRQ if SERIAL_8250 + select FSL_CORENET_RCPM if PPC_E500MC default y if FSL_SOC_BOOKE diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index 949f22c86e61..28720a4ded7b 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -9,11 +9,14 @@ #include #include +#include #include #include #include "mpc85xx.h" +const struct fsl_pm_ops *qoriq_pm_ops; + static const struct of_device_id mpc85xx_common_ids[] __initconst = { { .type = "soc", }, { .compatible = "soc", }, diff --git a/arch/powerpc/sysdev/Kconfig b/arch/powerpc/sysdev/Kconfig index a19332a38715..52dc165c0efb 100644 --- a/arch/powerpc/sysdev/Kconfig +++ b/arch/powerpc/sysdev/Kconfig @@ -40,3 +40,8 @@ config SCOM_DEBUGFS config GE_FPGA bool default n + +config FSL_CORENET_RCPM + bool + help + This option enables support for RCPM (Run Control/Power Management). diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index bd6bd729969c..a254824719f1 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_MMIO_NVRAM) += mmio_nvram.o obj-$(CONFIG_FSL_SOC) += fsl_soc.o fsl_mpic_err.o obj-$(CONFIG_FSL_PCI) += fsl_pci.o $(fsl-msi-obj-y) obj-$(CONFIG_FSL_PMC) += fsl_pmc.o +obj-$(CONFIG_FSL_CORENET_RCPM) += fsl_rcpm.o obj-$(CONFIG_FSL_LBC) += fsl_lbc.o obj-$(CONFIG_FSL_GTM) += fsl_gtm.o obj-$(CONFIG_FSL_85XX_CACHE_SRAM) += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o diff --git a/arch/powerpc/sysdev/fsl_rcpm.c b/arch/powerpc/sysdev/fsl_rcpm.c new file mode 100644 index 000000000000..656d9ca057e5 --- /dev/null +++ b/arch/powerpc/sysdev/fsl_rcpm.c @@ -0,0 +1,385 @@ +/* + * RCPM(Run Control/Power Management) support + * + * Copyright 2012-2015 Freescale Semiconductor Inc. + * + * Author: Chenhui Zhao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include + +#include +#include +#include +#include + +static struct ccsr_rcpm_v1 __iomem *rcpm_v1_regs; +static struct ccsr_rcpm_v2 __iomem *rcpm_v2_regs; +static unsigned int fsl_supported_pm_modes; + +static void rcpm_v1_irq_mask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + setbits32(&rcpm_v1_regs->cpmimr, mask); + setbits32(&rcpm_v1_regs->cpmcimr, mask); + setbits32(&rcpm_v1_regs->cpmmcmr, mask); + setbits32(&rcpm_v1_regs->cpmnmimr, mask); +} + +static void rcpm_v2_irq_mask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + setbits32(&rcpm_v2_regs->tpmimr0, mask); + setbits32(&rcpm_v2_regs->tpmcimr0, mask); + setbits32(&rcpm_v2_regs->tpmmcmr0, mask); + setbits32(&rcpm_v2_regs->tpmnmimr0, mask); +} + +static void rcpm_v1_irq_unmask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + clrbits32(&rcpm_v1_regs->cpmimr, mask); + clrbits32(&rcpm_v1_regs->cpmcimr, mask); + clrbits32(&rcpm_v1_regs->cpmmcmr, mask); + clrbits32(&rcpm_v1_regs->cpmnmimr, mask); +} + +static void rcpm_v2_irq_unmask(int cpu) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + clrbits32(&rcpm_v2_regs->tpmimr0, mask); + clrbits32(&rcpm_v2_regs->tpmcimr0, mask); + clrbits32(&rcpm_v2_regs->tpmmcmr0, mask); + clrbits32(&rcpm_v2_regs->tpmnmimr0, mask); +} + +static void rcpm_v1_set_ip_power(bool enable, u32 mask) +{ + if (enable) + setbits32(&rcpm_v1_regs->ippdexpcr, mask); + else + clrbits32(&rcpm_v1_regs->ippdexpcr, mask); +} + +static void rcpm_v2_set_ip_power(bool enable, u32 mask) +{ + if (enable) + setbits32(&rcpm_v2_regs->ippdexpcr[0], mask); + else + clrbits32(&rcpm_v2_regs->ippdexpcr[0], mask); +} + +static void rcpm_v1_cpu_enter_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + switch (state) { + case E500_PM_PH10: + setbits32(&rcpm_v1_regs->cdozcr, mask); + break; + case E500_PM_PH15: + setbits32(&rcpm_v1_regs->cnapcr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + break; + } +} + +static void rcpm_v2_cpu_enter_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + u32 mask = 1 << cpu_core_index_of_thread(cpu); + + switch (state) { + case E500_PM_PH10: + /* one bit corresponds to one thread for PH10 of 6500 */ + setbits32(&rcpm_v2_regs->tph10setr0, 1 << hw_cpu); + break; + case E500_PM_PH15: + setbits32(&rcpm_v2_regs->pcph15setr, mask); + break; + case E500_PM_PH20: + setbits32(&rcpm_v2_regs->pcph20setr, mask); + break; + case E500_PM_PH30: + setbits32(&rcpm_v2_regs->pcph30setr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + } +} + +static void rcpm_v1_cpu_die(int cpu) +{ + rcpm_v1_cpu_enter_state(cpu, E500_PM_PH15); +} + +#ifdef CONFIG_PPC64 +static void qoriq_disable_thread(int cpu) +{ + int thread = cpu_thread_in_core(cpu); + + book3e_stop_thread(thread); +} +#endif + +static void rcpm_v2_cpu_die(int cpu) +{ +#ifdef CONFIG_PPC64 + int primary; + + if (threads_per_core == 2) { + primary = cpu_first_thread_sibling(cpu); + if (cpu_is_offline(primary) && cpu_is_offline(primary + 1)) { + /* if both threads are offline, put the cpu in PH20 */ + rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20); + } else { + /* if only one thread is offline, disable the thread */ + qoriq_disable_thread(cpu); + } + } +#endif + + if (threads_per_core == 1) + rcpm_v2_cpu_enter_state(cpu, E500_PM_PH20); +} + +static void rcpm_v1_cpu_exit_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + unsigned int mask = 1 << hw_cpu; + + switch (state) { + case E500_PM_PH10: + clrbits32(&rcpm_v1_regs->cdozcr, mask); + break; + case E500_PM_PH15: + clrbits32(&rcpm_v1_regs->cnapcr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + break; + } +} + +static void rcpm_v1_cpu_up_prepare(int cpu) +{ + rcpm_v1_cpu_exit_state(cpu, E500_PM_PH15); + rcpm_v1_irq_unmask(cpu); +} + +static void rcpm_v2_cpu_exit_state(int cpu, int state) +{ + int hw_cpu = get_hard_smp_processor_id(cpu); + u32 mask = 1 << cpu_core_index_of_thread(cpu); + + switch (state) { + case E500_PM_PH10: + setbits32(&rcpm_v2_regs->tph10clrr0, 1 << hw_cpu); + break; + case E500_PM_PH15: + setbits32(&rcpm_v2_regs->pcph15clrr, mask); + break; + case E500_PM_PH20: + setbits32(&rcpm_v2_regs->pcph20clrr, mask); + break; + case E500_PM_PH30: + setbits32(&rcpm_v2_regs->pcph30clrr, mask); + break; + default: + pr_warn("Unknown cpu PM state (%d)\n", state); + } +} + +static void rcpm_v2_cpu_up_prepare(int cpu) +{ + rcpm_v2_cpu_exit_state(cpu, E500_PM_PH20); + rcpm_v2_irq_unmask(cpu); +} + +static int rcpm_v1_plat_enter_state(int state) +{ + u32 *pmcsr_reg = &rcpm_v1_regs->powmgtcsr; + int ret = 0; + int result; + + switch (state) { + case PLAT_PM_SLEEP: + setbits32(pmcsr_reg, RCPM_POWMGTCSR_SLP); + + /* Upon resume, wait for RCPM_POWMGTCSR_SLP bit to be clear. */ + result = spin_event_timeout( + !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_SLP), 10000, 10); + if (!result) { + pr_err("timeout waiting for SLP bit to be cleared\n"); + ret = -ETIMEDOUT; + } + break; + default: + pr_warn("Unknown platform PM state (%d)", state); + ret = -EINVAL; + } + + return ret; +} + +static int rcpm_v2_plat_enter_state(int state) +{ + u32 *pmcsr_reg = &rcpm_v2_regs->powmgtcsr; + int ret = 0; + int result; + + switch (state) { + case PLAT_PM_LPM20: + /* clear previous LPM20 status */ + setbits32(pmcsr_reg, RCPM_POWMGTCSR_P_LPM20_ST); + /* enter LPM20 status */ + setbits32(pmcsr_reg, RCPM_POWMGTCSR_LPM20_RQ); + + /* At this point, the device is in LPM20 status. */ + + /* resume ... */ + result = spin_event_timeout( + !(in_be32(pmcsr_reg) & RCPM_POWMGTCSR_LPM20_ST), 10000, 10); + if (!result) { + pr_err("timeout waiting for LPM20 bit to be cleared\n"); + ret = -ETIMEDOUT; + } + break; + default: + pr_warn("Unknown platform PM state (%d)\n", state); + ret = -EINVAL; + } + + return ret; +} + +static int rcpm_v1_plat_enter_sleep(void) +{ + return rcpm_v1_plat_enter_state(PLAT_PM_SLEEP); +} + +static int rcpm_v2_plat_enter_sleep(void) +{ + return rcpm_v2_plat_enter_state(PLAT_PM_LPM20); +} + +static void rcpm_common_freeze_time_base(u32 *tben_reg, int freeze) +{ + static u32 mask; + + if (freeze) { + mask = in_be32(tben_reg); + clrbits32(tben_reg, mask); + } else { + setbits32(tben_reg, mask); + } + + /* read back to push the previous write */ + in_be32(tben_reg); +} + +static void rcpm_v1_freeze_time_base(bool freeze) +{ + rcpm_common_freeze_time_base(&rcpm_v1_regs->ctbenr, freeze); +} + +static void rcpm_v2_freeze_time_base(bool freeze) +{ + rcpm_common_freeze_time_base(&rcpm_v2_regs->pctbenr, freeze); +} + +static unsigned int rcpm_get_pm_modes(void) +{ + return fsl_supported_pm_modes; +} + +static const struct fsl_pm_ops qoriq_rcpm_v1_ops = { + .irq_mask = rcpm_v1_irq_mask, + .irq_unmask = rcpm_v1_irq_unmask, + .cpu_enter_state = rcpm_v1_cpu_enter_state, + .cpu_exit_state = rcpm_v1_cpu_exit_state, + .cpu_up_prepare = rcpm_v1_cpu_up_prepare, + .cpu_die = rcpm_v1_cpu_die, + .plat_enter_sleep = rcpm_v1_plat_enter_sleep, + .set_ip_power = rcpm_v1_set_ip_power, + .freeze_time_base = rcpm_v1_freeze_time_base, + .get_pm_modes = rcpm_get_pm_modes, +}; + +static const struct fsl_pm_ops qoriq_rcpm_v2_ops = { + .irq_mask = rcpm_v2_irq_mask, + .irq_unmask = rcpm_v2_irq_unmask, + .cpu_enter_state = rcpm_v2_cpu_enter_state, + .cpu_exit_state = rcpm_v2_cpu_exit_state, + .cpu_up_prepare = rcpm_v2_cpu_up_prepare, + .cpu_die = rcpm_v2_cpu_die, + .plat_enter_sleep = rcpm_v2_plat_enter_sleep, + .set_ip_power = rcpm_v2_set_ip_power, + .freeze_time_base = rcpm_v2_freeze_time_base, + .get_pm_modes = rcpm_get_pm_modes, +}; + +static const struct of_device_id rcpm_matches[] = { + { + .compatible = "fsl,qoriq-rcpm-1.0", + .data = &qoriq_rcpm_v1_ops, + }, + { + .compatible = "fsl,qoriq-rcpm-2.0", + .data = &qoriq_rcpm_v2_ops, + }, + { + .compatible = "fsl,qoriq-rcpm-2.1", + .data = &qoriq_rcpm_v2_ops, + }, + {}, +}; + +int __init fsl_rcpm_init(void) +{ + struct device_node *np; + const struct of_device_id *match; + void __iomem *base; + + np = of_find_matching_node_and_match(NULL, rcpm_matches, &match); + if (!np) + return 0; + + base = of_iomap(np, 0); + of_node_put(np); + if (!base) { + pr_err("of_iomap() error.\n"); + return -ENOMEM; + } + + rcpm_v1_regs = base; + rcpm_v2_regs = base; + + /* support sleep by default */ + fsl_supported_pm_modes = FSL_PM_SLEEP; + + qoriq_pm_ops = match->data; + + return 0; +} -- cgit v1.2.3 From 56f1ba280719469bffc870b6b2d935f3a3019ea4 Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:14:00 +0800 Subject: powerpc/mpc85xx: refactor the PM operations Freescale CoreNet-based and Non-CoreNet-based platforms require different PM operations. This patch extracted existing PM operations on Non-CoreNet-based platforms to a new file which can accommodate both platforms. In this way, PM operation codes are clearer structurally. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian Signed-off-by: Scott Wood --- arch/powerpc/platforms/85xx/Makefile | 1 + arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c | 106 +++++++++++++++++++++++++++ arch/powerpc/platforms/85xx/smp.c | 73 +++++------------- arch/powerpc/platforms/85xx/smp.h | 1 + 4 files changed, 127 insertions(+), 54 deletions(-) create mode 100644 arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 1fe7fb95175a..7bc86dae9517 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -2,6 +2,7 @@ # Makefile for the PowerPC 85xx linux kernel. # obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o obj-y += common.o diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c new file mode 100644 index 000000000000..f05325f0cc03 --- /dev/null +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -0,0 +1,106 @@ +/* + * MPC85xx PM operators + * + * Copyright 2015 Freescale Semiconductor Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#include +#include +#include +#include + +#include +#include + +static struct ccsr_guts __iomem *guts; + +static void mpc85xx_irq_mask(int cpu) +{ + +} + +static void mpc85xx_irq_unmask(int cpu) +{ + +} + +static void mpc85xx_cpu_die(int cpu) +{ + u32 tmp; + + tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; + mtspr(SPRN_HID0, tmp); + + /* Enter NAP mode. */ + tmp = mfmsr(); + tmp |= MSR_WE; + asm volatile( + "msync\n" + "mtmsr %0\n" + "isync\n" + : + : "r" (tmp)); +} + +static void mpc85xx_cpu_up_prepare(int cpu) +{ + +} + +static void mpc85xx_freeze_time_base(bool freeze) +{ + uint32_t mask; + + mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1; + if (freeze) + setbits32(&guts->devdisr, mask); + else + clrbits32(&guts->devdisr, mask); + + in_be32(&guts->devdisr); +} + +static const struct of_device_id mpc85xx_smp_guts_ids[] = { + { .compatible = "fsl,mpc8572-guts", }, + { .compatible = "fsl,p1020-guts", }, + { .compatible = "fsl,p1021-guts", }, + { .compatible = "fsl,p1022-guts", }, + { .compatible = "fsl,p1023-guts", }, + { .compatible = "fsl,p2020-guts", }, + { .compatible = "fsl,bsc9132-guts", }, + {}, +}; + +static const struct fsl_pm_ops mpc85xx_pm_ops = { + .freeze_time_base = mpc85xx_freeze_time_base, + .irq_mask = mpc85xx_irq_mask, + .irq_unmask = mpc85xx_irq_unmask, + .cpu_die = mpc85xx_cpu_die, + .cpu_up_prepare = mpc85xx_cpu_up_prepare, +}; + +int __init mpc85xx_setup_pmc(void) +{ + struct device_node *np; + + np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids); + if (np) { + guts = of_iomap(np, 0); + of_node_put(np); + if (!guts) { + pr_err("Could not map guts node address\n"); + return -ENOMEM; + } + } + + qoriq_pm_ops = &mpc85xx_pm_ops; + + return 0; +} diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 4a7841616c7f..ab0459d904ff 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -2,7 +2,7 @@ * Author: Andy Fleming * Kumar Gala * - * Copyright 2006-2008, 2011-2012 Freescale Semiconductor Inc. + * Copyright 2006-2008, 2011-2012, 2015 Freescale Semiconductor Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -29,6 +28,7 @@ #include #include #include +#include #include #include @@ -43,24 +43,11 @@ struct epapr_spin_table { u32 pir; }; -static struct ccsr_guts __iomem *guts; +#ifdef CONFIG_HOTPLUG_CPU static u64 timebase; static int tb_req; static int tb_valid; -static void mpc85xx_timebase_freeze(int freeze) -{ - uint32_t mask; - - mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1; - if (freeze) - setbits32(&guts->devdisr, mask); - else - clrbits32(&guts->devdisr, mask); - - in_be32(&guts->devdisr); -} - static void mpc85xx_give_timebase(void) { unsigned long flags; @@ -71,7 +58,7 @@ static void mpc85xx_give_timebase(void) barrier(); tb_req = 0; - mpc85xx_timebase_freeze(1); + qoriq_pm_ops->freeze_time_base(true); #ifdef CONFIG_PPC64 /* * e5500/e6500 have a workaround for erratum A-006958 in place @@ -104,7 +91,7 @@ static void mpc85xx_give_timebase(void) while (tb_valid) barrier(); - mpc85xx_timebase_freeze(0); + qoriq_pm_ops->freeze_time_base(false); local_irq_restore(flags); } @@ -126,31 +113,25 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } -#ifdef CONFIG_HOTPLUG_CPU static void smp_85xx_mach_cpu_die(void) { unsigned int cpu = smp_processor_id(); - u32 tmp; local_irq_disable(); + hard_irq_disable(); + /* mask all irqs to prevent cpu wakeup */ + qoriq_pm_ops->irq_mask(cpu); + idle_task_exit(); - generic_set_cpu_dead(cpu); - mb(); mtspr(SPRN_TCR, 0); + mtspr(SPRN_TSR, mfspr(SPRN_TSR)); - cur_cpu_spec->cpu_down_flush(); + generic_set_cpu_dead(cpu); - tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP; - mtspr(SPRN_HID0, tmp); - isync(); + cur_cpu_spec->cpu_down_flush(); - /* Enter NAP mode. */ - tmp = mfmsr(); - tmp |= MSR_WE; - mb(); - mtmsr(tmp); - isync(); + qoriq_pm_ops->cpu_die(cpu); while (1) ; @@ -468,16 +449,6 @@ static void smp_85xx_setup_cpu(int cpu_nr) smp_85xx_basic_setup(cpu_nr); } -static const struct of_device_id mpc85xx_smp_guts_ids[] = { - { .compatible = "fsl,mpc8572-guts", }, - { .compatible = "fsl,p1020-guts", }, - { .compatible = "fsl,p1021-guts", }, - { .compatible = "fsl,p1022-guts", }, - { .compatible = "fsl,p1023-guts", }, - { .compatible = "fsl,p2020-guts", }, - {}, -}; - void __init mpc85xx_smp_init(void) { struct device_node *np; @@ -501,22 +472,16 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.probe = NULL; } - np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids); - if (np) { - guts = of_iomap(np, 0); - of_node_put(np); - if (!guts) { - pr_err("%s: Could not map guts node address\n", - __func__); - return; - } +#ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_FSL_PMC + mpc85xx_setup_pmc(); +#endif + if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; -#ifdef CONFIG_HOTPLUG_CPU ppc_md.cpu_die = smp_85xx_mach_cpu_die; -#endif } - +#endif smp_ops = &smp_85xx_ops; #ifdef CONFIG_KEXEC diff --git a/arch/powerpc/platforms/85xx/smp.h b/arch/powerpc/platforms/85xx/smp.h index e2b44933ff19..0b20ae315c53 100644 --- a/arch/powerpc/platforms/85xx/smp.h +++ b/arch/powerpc/platforms/85xx/smp.h @@ -5,6 +5,7 @@ #ifdef CONFIG_SMP void __init mpc85xx_smp_init(void); +int __init mpc85xx_setup_pmc(void); #else static inline void mpc85xx_smp_init(void) { -- cgit v1.2.3 From 2f4f1f815bc6d03ea42d4f67dd1e284525e7524e Mon Sep 17 00:00:00 2001 From: chenhui zhao Date: Fri, 20 Nov 2015 17:14:01 +0800 Subject: powerpc/mpc85xx: Add hotplug support on E5500 and E500MC cores Freescale E500MC and E5500 core-based platforms, like P4080, T1040, support disabling/enabling CPU dynamically. This patch adds this feature on those platforms. Signed-off-by: Chenhui Zhao Signed-off-by: Tang Yuantian [scottwood: removed unused pr_fmt] Signed-off-by: Scott Wood --- arch/powerpc/Kconfig | 2 +- arch/powerpc/include/asm/smp.h | 3 + arch/powerpc/kernel/smp.c | 7 +- arch/powerpc/platforms/85xx/smp.c | 196 +++++++++++++++++++++----------------- 4 files changed, 118 insertions(+), 90 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7efddd18d700..d500772a1d9b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -389,7 +389,7 @@ config SWIOTLB config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ - PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC)) + PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE) ---help--- Sa