summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobin Murphy <robin.murphy@arm.com>2020-01-20 18:52:29 +0000
committerCatalin Marinas <catalin.marinas@arm.com>2020-03-09 18:08:25 +0000
commite9c7ddbf8b4b6a291bf3b5bfa7c883235164d9be (patch)
treeaa52e43a553143c92273950d372f427607c06c2c
parent27afb236fe5adaa3911e47c91057ba783549226f (diff)
arm64: csum: Optimise IPv6 header checksum
Throwing our __uint128_t idioms at csum_ipv6_magic() makes it about 1.3x-2x faster across a range of microarchitecture/compiler combinations. Not much in absolute terms, but every little helps. Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
-rw-r--r--arch/arm64/include/asm/checksum.h7
-rw-r--r--arch/arm64/lib/csum.c27
2 files changed, 33 insertions, 1 deletions
diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h
index 8d2a7de39744..b6f7bc6da5fb 100644
--- a/arch/arm64/include/asm/checksum.h
+++ b/arch/arm64/include/asm/checksum.h
@@ -5,7 +5,12 @@
#ifndef __ASM_CHECKSUM_H
#define __ASM_CHECKSUM_H
-#include <linux/types.h>
+#include <linux/in6.h>
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
static inline __sum16 csum_fold(__wsum csum)
{
diff --git a/arch/arm64/lib/csum.c b/arch/arm64/lib/csum.c
index 1f82c66b32ea..60eccae2abad 100644
--- a/arch/arm64/lib/csum.c
+++ b/arch/arm64/lib/csum.c
@@ -124,3 +124,30 @@ unsigned int do_csum(const unsigned char *buff, int len)
return sum >> 16;
}
+
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ __uint128_t src, dst;
+ u64 sum = (__force u64)csum;
+
+ src = *(const __uint128_t *)saddr->s6_addr;
+ dst = *(const __uint128_t *)daddr->s6_addr;
+
+ sum += (__force u32)htonl(len);
+#ifdef __LITTLE_ENDIAN
+ sum += (u32)proto << 24;
+#else
+ sum += proto;
+#endif
+ src += (src >> 64) | (src << 64);
+ dst += (dst >> 64) | (dst << 64);
+
+ sum = accumulate(sum, src >> 64);
+ sum = accumulate(sum, dst >> 64);
+
+ sum += ((sum >> 32) | (sum << 32));
+ return csum_fold((__force __wsum)(sum >> 32));
+}
+EXPORT_SYMBOL(csum_ipv6_magic);