summaryrefslogtreecommitdiffstats
path: root/arch/arm/crypto/chacha-scalar-core.S
diff options
context:
space:
mode:
authorArd Biesheuvel <ardb@kernel.org>2019-11-08 13:22:14 +0100
committerHerbert Xu <herbert@gondor.apana.org.au>2019-11-17 09:02:40 +0800
commitb36d8c09e710c71f6a9690b6586fea2d1c9e1e27 (patch)
tree28964932a4baebfbdb1f8bfc741fedfb371a41b7 /arch/arm/crypto/chacha-scalar-core.S
parent29621d099f9c642b22a69dc8e7e20c108473a392 (diff)
crypto: arm/chacha - remove dependency on generic ChaCha driver
Instead of falling back to the generic ChaCha skcipher driver for non-SIMD cases, use a fast scalar implementation for ARM authored by Eric Biggers. This removes the module dependency on chacha-generic altogether, which also simplifies things when we expose the ChaCha library interface from this module. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/arm/crypto/chacha-scalar-core.S')
-rw-r--r--arch/arm/crypto/chacha-scalar-core.S65
1 files changed, 32 insertions, 33 deletions
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
index 2140319b64a0..2985b80a45b5 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -41,14 +41,6 @@
X14 .req r12
X15 .req r14
-.Lexpand_32byte_k:
- // "expand 32-byte k"
- .word 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
-
-#ifdef __thumb2__
-# define adrl adr
-#endif
-
.macro __rev out, in, t0, t1, t2
.if __LINUX_ARM_ARCH__ >= 6
rev \out, \in
@@ -391,61 +383,65 @@
.endm // _chacha
/*
- * void chacha20_arm(u8 *out, const u8 *in, size_t len, const u32 key[8],
- * const u32 iv[4]);
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ * const u32 *state, int nrounds);
*/
-ENTRY(chacha20_arm)
+ENTRY(chacha_doarm)
cmp r2, #0 // len == 0?
reteq lr
+ ldr ip, [sp]
+ cmp ip, #12
+
push {r0-r2,r4-r11,lr}
// Push state x0-x15 onto stack.
// Also store an extra copy of x10-x11 just before the state.
- ldr r4, [sp, #48] // iv
- mov r0, sp
- sub sp, #80
-
- // iv: x12-x15
- ldm r4, {X12,X13,X14,X15}
- stmdb r0!, {X12,X13,X14,X15}
+ add X12, r3, #48
+ ldm X12, {X12,X13,X14,X15}
+ push {X12,X13,X14,X15}
+ sub sp, sp, #64
- // key: x4-x11
- __ldrd X8_X10, X9_X11, r3, 24
+ __ldrd X8_X10, X9_X11, r3, 40
__strd X8_X10, X9_X11, sp, 8
- stmdb r0!, {X8_X10, X9_X11}
- ldm r3, {X4-X9_X11}
- stmdb r0!, {X4-X9_X11}
-
- // constants: x0-x3
- adrl X3, .Lexpand_32byte_k
- ldm X3, {X0-X3}
+ __strd X8_X10, X9_X11, sp, 56
+ ldm r3, {X0-X9_X11}
__strd X0, X1, sp, 16
__strd X2, X3, sp, 24
+ __strd X4, X5, sp, 32
+ __strd X6, X7, sp, 40
+ __strd X8_X10, X9_X11, sp, 48
+ beq 1f
_chacha 20
- add sp, #76
+0: add sp, #76
pop {r4-r11, pc}
-ENDPROC(chacha20_arm)
+
+1: _chacha 12
+ b 0b
+ENDPROC(chacha_doarm)
/*
- * void hchacha20_arm(const u32 state[16], u32 out[8]);
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
*/
-ENTRY(hchacha20_arm)
+ENTRY(hchacha_block_arm)
push {r1,r4-r11,lr}
+ cmp r2, #12 // ChaCha12 ?
+
mov r14, r0
ldmia r14!, {r0-r11} // load x0-x11
push {r10-r11} // store x10-x11 to stack
ldm r14, {r10-r12,r14} // load x12-x15
sub sp, #8
+ beq 1f
_chacha_permute 20
// Skip over (unused0-unused1, x10-x11)
- add sp, #16
+0: add sp, #16
// Fix up rotations of x12-x15
ror X12, X12, #drot
@@ -458,4 +454,7 @@ ENTRY(hchacha20_arm)
stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
pop {r4-r11,pc}
-ENDPROC(hchacha20_arm)
+
+1: _chacha_permute 12
+ b 0b
+ENDPROC(hchacha_block_arm)