diff options
author | Pauli <pauli@openssl.org> | 2021-05-20 13:51:59 +1000 |
---|---|---|
committer | Matt Caswell <matt@openssl.org> | 2021-05-20 08:51:30 +0100 |
commit | e3884ec5c37334e585e9208ce69d7e5b3cad4624 (patch) | |
tree | 08ade3022fda3a64cd84b629736c0c9ac051833b /crypto/aes | |
parent | b7140b0604bdfaa034452d97648a9c23a97568e4 (diff) |
Revert "ARM assembly pack: translate bit-sliced AES implementation to AArch64"
This reverts commit da51566b256e0c0536d5b986e676863b0526bf5e.
Fixes #15321
Reviewed-by: Tim Hudson <tjh@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/15364)
Diffstat (limited to 'crypto/aes')
-rw-r--r-- | crypto/aes/asm/bsaes-armv8.S | 2338 | ||||
-rw-r--r-- | crypto/aes/build.info | 5 |
2 files changed, 2 insertions, 2341 deletions
diff --git a/crypto/aes/asm/bsaes-armv8.S b/crypto/aes/asm/bsaes-armv8.S deleted file mode 100644 index 9bd02d0c8a..0000000000 --- a/crypto/aes/asm/bsaes-armv8.S +++ /dev/null @@ -1,2338 +0,0 @@ -// Copyright 2021 The OpenSSL Project Authors. All Rights Reserved. -// -// Licensed under the OpenSSL license (the "License"). You may not use -// this file except in compliance with the License. You can obtain a copy -// in the file LICENSE in the source distribution or at -// https://www.openssl.org/source/license.html -// -// ==================================================================== -// Written by Ben Avison <bavison@riscosopen.org> for the OpenSSL -// project. Rights for redistribution and usage in source and binary -// forms are granted according to the OpenSSL license. -// ==================================================================== -// -// This implementation is a translation of bsaes-armv7 for AArch64. -// No attempt has been made to carry across the build switches for -// kernel targets, since the Linux kernel crypto support has moved on -// from when it was based on OpenSSL. - -// A lot of hand-scheduling has been performed. Consequently, this code -// doesn't factor out neatly into macros in the same way that the -// AArch32 version did, and there is little to be gained by wrapping it -// up in Perl, and it is presented as pure assembly. - - -#include "crypto/arm_arch.h" - -.text - -.type _bsaes_decrypt8,%function -.align 4 -// On entry: -// x9 -> key (previously expanded using _bsaes_key_convert) -// x10 = number of rounds -// v0-v7 input data -// On exit: -// x9-x11 corrupted -// other general-purpose registers preserved -// v0-v7 output data -// v11-v15 preserved -// other SIMD registers corrupted -_bsaes_decrypt8: - ldr q8, [x9], #16 - adr x11, .LM0ISR - movi v9.16b, #0x55 - ldr q10, [x11], #16 - movi v16.16b, #0x33 - movi v17.16b, #0x0f - sub x10, x10, #1 - eor v0.16b, v0.16b, v8.16b - eor v1.16b, v1.16b, v8.16b - eor v2.16b, v2.16b, v8.16b - eor v4.16b, v4.16b, v8.16b - eor v3.16b, v3.16b, v8.16b - eor v5.16b, v5.16b, v8.16b - tbl v0.16b, {v0.16b}, v10.16b - tbl v1.16b, {v1.16b}, v10.16b - tbl v2.16b, {v2.16b}, v10.16b - tbl v4.16b, {v4.16b}, v10.16b - eor v6.16b, v6.16b, v8.16b - eor v7.16b, v7.16b, v8.16b - tbl v3.16b, {v3.16b}, v10.16b - tbl v5.16b, {v5.16b}, v10.16b - tbl v6.16b, {v6.16b}, v10.16b - ushr v8.2d, v0.2d, #1 - tbl v7.16b, {v7.16b}, v10.16b - ushr v10.2d, v4.2d, #1 - ushr v18.2d, v2.2d, #1 - eor v8.16b, v8.16b, v1.16b - ushr v19.2d, v6.2d, #1 - eor v10.16b, v10.16b, v5.16b - eor v18.16b, v18.16b, v3.16b - and v8.16b, v8.16b, v9.16b - eor v19.16b, v19.16b, v7.16b - and v10.16b, v10.16b, v9.16b - and v18.16b, v18.16b, v9.16b - eor v1.16b, v1.16b, v8.16b - shl v8.2d, v8.2d, #1 - and v9.16b, v19.16b, v9.16b - eor v5.16b, v5.16b, v10.16b - shl v10.2d, v10.2d, #1 - eor v3.16b, v3.16b, v18.16b - shl v18.2d, v18.2d, #1 - eor v0.16b, v0.16b, v8.16b - shl v8.2d, v9.2d, #1 - eor v7.16b, v7.16b, v9.16b - eor v4.16b, v4.16b, v10.16b - eor v2.16b, v2.16b, v18.16b - ushr v9.2d, v1.2d, #2 - eor v6.16b, v6.16b, v8.16b - ushr v8.2d, v0.2d, #2 - ushr v10.2d, v5.2d, #2 - ushr v18.2d, v4.2d, #2 - eor v9.16b, v9.16b, v3.16b - eor v8.16b, v8.16b, v2.16b - eor v10.16b, v10.16b, v7.16b - eor v18.16b, v18.16b, v6.16b - and v9.16b, v9.16b, v16.16b - and v8.16b, v8.16b, v16.16b - and v10.16b, v10.16b, v16.16b - and v16.16b, v18.16b, v16.16b - eor v3.16b, v3.16b, v9.16b - shl v9.2d, v9.2d, #2 - eor v2.16b, v2.16b, v8.16b - shl v8.2d, v8.2d, #2 - eor v7.16b, v7.16b, v10.16b - shl v10.2d, v10.2d, #2 - eor v6.16b, v6.16b, v16.16b - shl v16.2d, v16.2d, #2 - eor v1.16b, v1.16b, v9.16b - eor v0.16b, v0.16b, v8.16b - eor v5.16b, v5.16b, v10.16b - eor v4.16b, v4.16b, v16.16b - ushr v8.2d, v3.2d, #4 - ushr v9.2d, v2.2d, #4 - ushr v10.2d, v1.2d, #4 - ushr v16.2d, v0.2d, #4 - eor v8.16b, v8.16b, v7.16b - eor v9.16b, v9.16b, v6.16b - eor v10.16b, v10.16b, v5.16b - eor v16.16b, v16.16b, v4.16b - and v8.16b, v8.16b, v17.16b - and v9.16b, v9.16b, v17.16b - and v10.16b, v10.16b, v17.16b - and v16.16b, v16.16b, v17.16b - eor v7.16b, v7.16b, v8.16b - shl v8.2d, v8.2d, #4 - eor v6.16b, v6.16b, v9.16b - shl v9.2d, v9.2d, #4 - eor v5.16b, v5.16b, v10.16b - shl v10.2d, v10.2d, #4 - eor v4.16b, v4.16b, v16.16b - shl v16.2d, v16.2d, #4 - eor v3.16b, v3.16b, v8.16b - eor v2.16b, v2.16b, v9.16b - eor v1.16b, v1.16b, v10.16b - eor v0.16b, v0.16b, v16.16b - b .Ldec_sbox -.align 4 -.Ldec_loop: - ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64 - ldp q8, q9, [x9], #32 - eor v0.16b, v16.16b, v0.16b - ldr q10, [x9], #16 - eor v1.16b, v17.16b, v1.16b - ldr q16, [x9], #16 - eor v2.16b, v18.16b, v2.16b - eor v3.16b, v19.16b, v3.16b - eor v4.16b, v8.16b, v4.16b - eor v5.16b, v9.16b, v5.16b - eor v6.16b, v10.16b, v6.16b - eor v7.16b, v16.16b, v7.16b - tbl v0.16b, {v0.16b}, v28.16b - tbl v1.16b, {v1.16b}, v28.16b - tbl v2.16b, {v2.16b}, v28.16b - tbl v3.16b, {v3.16b}, v28.16b - tbl v4.16b, {v4.16b}, v28.16b - tbl v5.16b, {v5.16b}, v28.16b - tbl v6.16b, {v6.16b}, v28.16b - tbl v7.16b, {v7.16b}, v28.16b -.Ldec_sbox: - eor v1.16b, v1.16b, v4.16b - eor v3.16b, v3.16b, v4.16b - subs x10, x10, #1 - eor v4.16b, v4.16b, v7.16b - eor v2.16b, v2.16b, v7.16b - eor v1.16b, v1.16b, v6.16b - eor v6.16b, v6.16b, v4.16b - eor v2.16b, v2.16b, v5.16b - eor v0.16b, v0.16b, v1.16b - eor v7.16b, v7.16b, v6.16b - eor v8.16b, v6.16b, v2.16b - and v9.16b, v4.16b, v6.16b - eor v10.16b, v2.16b, v6.16b - eor v3.16b, v3.16b, v0.16b - eor v5.16b, v5.16b, v0.16b - eor v16.16b, v7.16b, v4.16b - eor v17.16b, v4.16b, v0.16b - and v18.16b, v0.16b, v2.16b - eor v19.16b, v7.16b, v4.16b - eor v1.16b, v1.16b, v3.16b - eor v20.16b, v3.16b, v0.16b - eor v21.16b, v5.16b, v2.16b - eor v22.16b, v3.16b, v7.16b - and v8.16b, v17.16b, v8.16b - orr v17.16b, v3.16b, v5.16b - eor v23.16b, v1.16b, v6.16b - eor v24.16b, v20.16b, v16.16b - eor v25.16b, v1.16b, v5.16b - orr v26.16b, v20.16b, v21.16b - and v20.16b, v20.16b, v21.16b - and v27.16b, v7.16b, v1.16b - eor v21.16b, v21.16b, v23.16b - orr v28.16b, v16.16b, v23.16b - orr v29.16b, v22.16b, v25.16b - eor v26.16b, v26.16b, v8.16b - and v16.16b, v16.16b, v23.16b - and v22.16b, v22.16b, v25.16b - and v21.16b, v24.16b, v21.16b - eor v8.16b, v28.16b, v8.16b - eor v23.16b, v5.16b, v2.16b - eor v24.16b, v1.16b, v6.16b - eor v16.16b, v16.16b, v22.16b - eor v22.16b, v3.16b, v0.16b - eor v25.16b, v29.16b, v21.16b - eor v21.16b, v26.16b, v21.16b - eor v8.16b, v8.16b, v20.16b - eor v26.16b, v23.16b, v24.16b - eor v16.16b, v16.16b, v20.16b - eor v28.16b, v22.16b, v19.16b - eor v20.16b, v25.16b, v20.16b - eor v9.16b, v21.16b, v9.16b - eor v8.16b, v8.16b, v18.16b - eor v18.16b, v5.16b, v1.16b - eor v21.16b, v16.16b, v17.16b - eor v16.16b, v16.16b, v17.16b - eor v17.16b, v20.16b, v27.16b - eor v20.16b, v3.16b, v7.16b - eor v25.16b, v9.16b, v8.16b - eor v27.16b, v0.16b, v4.16b - and v29.16b, v9.16b, v17.16b - eor v30.16b, v8.16b, v29.16b - eor v31.16b, v21.16b, v29.16b - eor v29.16b, v21.16b, v29.16b - bsl v30.16b, v17.16b, v21.16b - bsl v31.16b, v9.16b, v8.16b - bsl v16.16b, v30.16b, v29.16b - bsl v21.16b, v29.16b, v30.16b - eor v8.16b, v31.16b, v30.16b - and v1.16b, v1.16b, v31.16b - and v9.16b, v16.16b, v31.16b - and v6.16b, v6.16b, v30.16b - eor v16.16b, v17.16b, v21.16b - and v4.16b, v4.16b, v30.16b - eor v17.16b, v8.16b, v30.16b - and v21.16b, v24.16b, v8.16b - eor v9.16b, v9.16b, v25.16b - and v19.16b, v19.16b, v8.16b - eor v24.16b, v30.16b, v16.16b - eor v25.16b, v30.16b, v16.16b - and v7.16b, v7.16b, v17.16b - and v10.16b, v10.16b, v16.16b - eor v29.16b, v9.16b, v16.16b - eor v30.16b, v31.16b, v9.16b - and v0.16b, v24.16b, v0.16b - and v9.16b, v18.16b, v9.16b - and v2.16b, v25.16b, v2.16b - eor v10.16b, v10.16b, v6.16b - eor v18.16b, v29.16b, v16.16b - and v5.16b, v30.16b, v5.16b - eor v24.16b, v8.16b, v29.16b - and v25.16b, v26.16b, v29.16b - and v26.16b, v28.16b, v29.16b - eor v8.16b, v8.16b, v29.16b - eor v17.16b, v17.16b, v18.16b - eor v5.16b, v1.16b, v5.16b - and v23.16b, v24.16b, v23.16b - eor v21.16b, v21.16b, v25.16b - eor v19.16b, v19.16b, v26.16b - eor v0.16b, v4.16b, v0.16b - and v3.16b, v17.16b, v3.16b - eor v1.16b, v9.16b, v1.16b - eor v9.16b, v25.16b, v23.16b - eor v5.16b, v5.16b, v21.16b - eor v2.16b, v6.16b, v2.16b - and v6.16b, v8.16b, v22.16b - eor v3.16b, v7.16b, v3.16b - and v8.16b, v20.16b, v18.16b - eor v10.16b, v10.16b, v9.16b - eor v0.16b, v0.16b, v19.16b - eor v9.16b, v1.16b, v9.16b - eor v1.16b, v2.16b, v21.16b - eor v3.16b, v3.16b, v19.16b - and v16.16b, v27.16b, v16.16b - eor v17.16b, v26.16b, v6.16b - eor v6.16b, v8.16b, v7.16b - eor v7.16b, v1.16b, v9.16b - eor v1.16b, v5.16b, v3.16b - eor v2.16b, v10.16b, v3.16b - eor v4.16b, v16.16b, v4.16b - eor v8.16b, v6.16b, v17.16b - eor v5.16b, v9.16b, v3.16b - eor v9.16b, v0.16b, v1.16b - eor v6.16b, v7.16b, v1.16b - eor v0.16b, v4.16b, v17.16b - eor v4.16b, v8.16b, v7.16b - eor v7.16b, v9.16b, v2.16b - eor v8.16b, v3.16b, v0.16b - eor v7.16b, v7.16b, v5.16b - eor v3.16b, v4.16b, v7.16b - eor v4.16b, v7.16b, v0.16b - eor v7.16b, v8.16b, v3.16b - bcc .Ldec_done - ext v8.16b, v0.16b, v0.16b, #8 - ext v9.16b, v1.16b, v1.16b, #8 - ldr q28, [x11] // load from .LISR in common case (x10 > 0) - ext v10.16b, v6.16b, v6.16b, #8 - ext v16.16b, v3.16b, v3.16b, #8 - ext v17.16b, v5.16b, v5.16b, #8 - ext v18.16b, v4.16b, v4.16b, #8 - eor v8.16b, v8.16b, v0.16b - eor v9.16b, v9.16b, v1.16b - eor v10.16b, v10.16b, v6.16b - eor v16.16b, v16.16b, v3.16b - eor v17.16b, v17.16b, v5.16b - ext v19.16b, v2.16b, v2.16b, #8 - ext v20.16b, v7.16b, v7.16b, #8 - eor v18.16b, v18.16b, v4.16b - eor v6.16b, v6.16b, v8.16b - eor v8.16b, v2.16b, v10.16b - eor v4.16b, v4.16b, v9.16b - eor v2.16b, v19.16b, v2.16b - eor v9.16b, v20.16b, v7.16b - eor v0.16b, v0.16b, v16.16b - eor v1.16b, v1.16b, v16.16b - eor v6.16b, v6.16b, v17.16b - eor v8.16b, v8.16b, v16.16b - eor v7.16b, v7.16b, v18.16b - eor v4.16b, v4.16b, v16.16b - eor v2.16b, v3.16b, v2.16b - eor v1.16b, v1.16b, v17.16b - eor v3.16b, v5.16b, v9.16b - eor v5.16b, v8.16b, v17.16b - eor v7.16b, v7.16b, v17.16b - ext v8.16b, v0.16b, v0.16b, #12 - ext v9.16b, v6.16b, v6.16b, #12 - ext v10.16b, v4.16b, v4.16b, #12 - ext v16.16b, v1.16b, v1.16b, #12 - ext v17.16b, v5.16b, v5.16b, #12 - ext v18.16b, v7.16b, v7.16b, #12 - eor v0.16b, v0.16b, v8.16b - eor v6.16b, v6.16b, v9.16b - eor v4.16b, v4.16b, v10.16b - ext v19.16b, v2.16b, v2.16b, #12 - ext v20.16b, v3.16b, v3.16b, #12 - eor v1.16b, v1.16b, v16.16b - eor v5.16b, v5.16b, v17.16b - eor v7.16b, v7.16b, v18.16b - eor v2.16b, v2.16b, v19.16b - eor v16.16b, v16.16b, v0.16b - eor v3.16b, v3.16b, v20.16b - eor v17.16b, v17.16b, v4.16b - eor v10.16b, v10.16b, v6.16b - ext v0.16b, v0.16b, v0.16b, #8 - eor v9.16b, v9.16b, v1.16b - ext v1.16b, v1.16b, v1.16b, #8 - eor v8.16b, v8.16b, v3.16b - eor v16.16b, v16.16b, v3.16b - eor v18.16b, v18.16b, v5.16b - eor v19.16b, v19.16b, v7.16b - ext v21.16b, v5.16b, v5.16b, #8 - ext v5.16b, v7.16b, v7.16b, #8 - eor v7.16b, v20.16b, v2.16b - ext v4.16b, v4.16b, v4.16b, #8 - ext v20.16b, v3.16b, v3.16b, #8 - eor v17.16b, v17.16b, v3.16b - ext v2.16b, v2.16b, v2.16b, #8 - eor v3.16b, v10.16b, v3.16b - ext v10.16b, v6.16b, v6.16b, #8 - eor v0.16b, v0.16b, v8.16b - eor v1.16b, v1.16b, v16.16b - eor v5.16b, v5.16b, v18.16b - eor v3.16b, v3.16b, v4.16b - eor v7.16b, v20.16b, v7.16b - eor v6.16b, v2.16b, v19.16b - eor v4.16b, v21.16b, v17.16b - eor v2.16b, v10.16b, v9.16b - bne .Ldec_loop - ldr q28, [x11, #16]! // load from .LISRM0 on last round (x10 == 0) - b .Ldec_loop -.align 4 -.Ldec_done: - ushr v8.2d, v0.2d, #1 - movi v9.16b, #0x55 - ldr q10, [x9] - ushr v16.2d, v2.2d, #1 - movi v17.16b, #0x33 - ushr v18.2d, v6.2d, #1 - movi v19.16b, #0x0f - eor v8.16b, v8.16b, v1.16b - ushr v20.2d, v3.2d, #1 - eor v16.16b, v16.16b, v7.16b - eor v18.16b, v18.16b, v4.16b - and v8.16b, v8.16b, v9.16b - eor v20.16b, v20.16b, v5.16b - and v16.16b, v16.16b, v9.16b - and v18.16b, v18.16b, v9.16b - shl v21.2d, v8.2d, #1 - eor v1.16b, v1.16b, v8.16b - and v8.16b, v20.16b, v9.16b - eor v7.16b, v7.16b, v16.16b - shl v9.2d, v16.2d, #1 - eor v4.16b, v4.16b, v18.16b - shl v16.2d, v18.2d, #1 - eor v0.16b, v0.16b, v21.16b - shl v18.2d, v8.2d, #1 - eor v5.16b, v5.16b, v8.16b - eor v2.16b, v2.16b, v9.16b - eor v6.16b, v6.16b, v16.16b - ushr v8.2d, v1.2d, #2 - eor v3.16b, v3.16b, v18.16b - ushr v9.2d, v0.2d, #2 - ushr v16.2d, v7.2d, #2 - ushr v18.2d, v2.2d, #2 - eor v8.16b, v8.16b, v4.16b - eor v9.16b, v9.16b, v6.16b - eor v16.16b, v16.16b, v5.16b - eor v18.16b, v18.16b, v3.16b - and v8.16b, v8.16b, v17.16b - and v9.16b, v9.16b, v17.16b - and v16.16b, v16.16b, v17.16b - and v17.16b, v18.16b, v17.16b - eor v4.16b, v4.16b, v8.16b - shl v8.2d, v8.2d, #2 - eor v6.16b, v6.16b, v9.16b - shl v9.2d, v9.2d, #2 - eor v5.16b, v5.16b, v16.16b - shl v16.2d, v16.2d, #2 - eor v3.16b, v3.16b, v17.16b - shl v17.2d, v17.2d, #2 - eor v1.16b, v1.16b, v8.16b - eor v0.16b, v0.16b, v9.16b - eor v7.16b, v7.16b, v16.16b - eor v2.16b, v2.16b, v17.16b - ushr v8.2d, v4.2d, #4 - ushr v9.2d, v6.2d, #4 - ushr v16.2d, v1.2d, #4 - ushr v17.2d, v0.2d, #4 - eor v8.16b, v8.16b, v5.16b - eor v9.16b, v9.16b, v3.16b - eor v16.16b, v16.16b, v7.16b - eor v17.16b, v17.16b, v2.16b - and v8.16b, v8.16b, v19.16b - and v9.16b, v9.16b, v19.16b - and v16.16b, v16.16b, v19.16b - and v17.16b, v17.16b, v19.16b - eor v5.16b, v5.16b, v8.16b - shl v8.2d, v8.2d, #4 - eor v3.16b, v3.16b, v9.16b - shl v9.2d, v9.2d, #4 - eor v7.16b, v7.16b, v16.16b - shl v16.2d, v16.2d, #4 - eor v2.16b, v2.16b, v17.16b - shl v17.2d, v17.2d, #4 - eor v4.16b, v4.16b, v8.16b - eor v6.16b, v6.16b, v9.16b - eor v7.16b, v7.16b, v10.16b - eor v1.16b, v1.16b, v16.16b - eor v2.16b, v2.16b, v10.16b - eor v0.16b, v0.16b, v17.16b - eor v4.16b, v4.16b, v10.16b - eor v6.16b, v6.16b, v10.16b - eor v3.16b, v3.16b, v10.16b - eor v5.16b, v5.16b, v10.16b - eor v1.16b, v1.16b, v10.16b - eor v0.16b, v0.16b, v10.16b - ret -.size _bsaes_decrypt8,.-_bsaes_decrypt8 - -.type _bsaes_const,%object -.align 6 -_bsaes_const: -// InvShiftRows constants -// Used in _bsaes_decrypt8, which assumes contiguity -// .LM0ISR used with round 0 key -// .LISR used with middle round keys -// .LISRM0 used with final round key -.LM0ISR: -.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -.LISR: -.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -.LISRM0: -.quad 0x01040b0e0205080f, 0x0306090c00070a0d - -// ShiftRows constants -// Used in _bsaes_encrypt8, which assumes contiguity -// .LM0SR used with round 0 key -// .LSR used with middle round keys -// .LSRM0 used with final round key -.LM0SR: -.quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LSR: -.quad 0x0504070600030201, 0x0f0e0d0c0a09080b -.LSRM0: -.quad 0x0304090e00050a0f, 0x01060b0c0207080d - -.LM0_bigendian: -.quad 0x02060a0e03070b0f, 0x0004080c0105090d -.LM0_littleendian: -.quad 0x0105090d0004080c, 0x03070b0f02060a0e - -// Used in bsaes_ctr32_encrypt_blocks, prior to dropping into -// _bsaes_encrypt8_alt, for round 0 key in place of .LM0SR -.LREVM0SR: -.quad 0x090d01050c000408, 0x03070b0f060a0e02 - -.align 6 -.size _bsaes_const,.-_bsaes_const - -.type _bsaes_encrypt8,%function -.align 4 -// On entry: -// x9 -> key (previously expanded using _bsaes_key_convert) -// x10 = number of rounds -// v0-v7 input data -// On exit: -// x9-x11 corrupted -// other general-purpose registers preserved -// v0-v7 output data -// v11-v15 preserved -// other SIMD registers corrupted -_bsaes_encrypt8: - ldr q8, [x9], #16 - adr x11, .LM0SR - ldr q9, [x11], #16 -_bsaes_encrypt8_alt: - eor v0.16b, v0.16b, v8.16b - eor v1.16b, v1.16b, v8.16b - sub x10, x10, #1 - eor v2.16b, v2.16b, v8.16b - eor v4.16b, v4.16b, v8.16b - eor v3.16b, v3.16b, v8.16b - eor v5.16b, v5.16b, v8.16b - tbl v0.16b, {v0.16b}, v9.16b - tbl v1.16b, {v1.16b}, v9.16b - tbl v2.16b, {v2.16b}, v9.16b - tbl v4.16b, {v4.16b}, v9.16b - eor v6.16b, v6.16b, v8.16b - eor v7.16b, v7.16b, v8.16b - tbl v3.16b, {v3.16b}, v9.16b - tbl v5.16b, {v5.16b}, v9.16b - tbl v6.16b, {v6.16b}, v9.16b - ushr v8.2d, v0.2d, #1 - movi v10.16b, #0x55 - tbl v7.16b, {v7.16b}, v9.16b - ushr v9.2d, v4.2d, #1 - movi v16.16b, #0x33 - ushr v17.2d, v2.2d, #1 - eor v8.16b, v8.16b, v1.16b - movi v18.16b, #0x0f - ushr v19.2d, v6.2d, #1 - eor v9.16b, v9.16b, v5.16b - eor v17.16b, v17.16b, v3.16b - and v8.16b, v8.16b, v10.16b - eor v19.16b, v19.16b, v7.16b - and v9.16b, v9.16b, v10.16b - and v17.16b, v17.16b, v10.16b - eor v1.16b, v1.16b, v8.16b - shl v8.2d, v8.2d, #1 - and v10.16b, v19.16b, v10.16b - eor v5.16b, v5.16b, v9.16b - shl v9.2d, v9.2d, #1 - eor v3.16b, v3.16b, v17.16b - shl v17.2d, v17.2d, #1 - eor v0.16b, v0.16b, v8.16b - shl v8.2d, v10.2d, #1 - eor v7.16b, v7.16b, v10.16b - eor v4.16b, v4.16b, v9.16b - eor v2.16b, v2.16b, v17.16b - ushr v9.2d, v1.2d, #2 - eor v6.16b, v6.16b, v8.16b - ushr v8.2d, v0.2d, #2 - ushr v10.2d, v5.2d, #2 - ushr v17.2d, v4.2d, #2 - eor v9.16b, v9.16b, v3.16b - eor v8.16b, v8.16b, v2.16b - eor v10.16b, v10.16b, v7.16b - eor v17.16b, v17.16b, v6.16b - and v9.16b, v9.16b, v16.16b - and v8.16b, v8.16b, v16.16b - and v10.16b, v10.16b, v16.16b - and v16.16b, v17.16b, v16.16b - eor v3.16b, v3.16b, v9.16b - shl v9.2d, v9.2d, #2 - eor v2.16b, v2.16b, v8.16b - shl v8.2d, v8.2d, #2 - eor v7.16b, v7.16b, v10.16b - shl v10.2d, v10.2d, #2 - eor v6.16b, v6.16b, v16.16b - shl v16.2d, v16.2d, #2 - eor v1.16b, v1.16b, v9.16b - eor v0.16b, v0.16b, v8.16b - eor v5.16b, v5.16b, v10.16b - eor v4.16b, v4.16b, v16.16b - ushr v8.2d, v3.2d, #4 - ushr v9.2d, v2.2d, #4 - ushr v10.2d, v1.2d, #4 - ushr v16.2d, v0.2d, #4 - eor v8.16b, v8.16b, v7.16b - eor v9.16b, v9.16b, v6.16b - eor v10.16b, v10.16b, v5.16b - eor v16.16b, v16.16b, v4.16b - and v8.16b, v8.16b, v18.16b - and v9.16b, v9.16b, v18.16b - and v10.16b, v10.16b, v18.16b - and v16.16b, v16.16b, v18.16b - eor v7.16b, v7.16b, v8.16b - shl v8.2d, v8.2d, #4 - eor v6.16b, v6.16b, v9.16b - shl v9.2d, v9.2d, #4 - eor v5.16b, v5.16b, v10.16b - shl v10.2d, v10.2d, #4 - eor v4.16b, v4.16b, v16.16b - shl v16.2d, v16.2d, #4 - eor v3.16b, v3.16b, v8.16b - eor v2.16b, v2.16b, v9.16b - eor v1.16b, v1.16b, v10.16b - eor v0.16b, v0.16b, v16.16b - b .Lenc_sbox -.align 4 -.Lenc_loop: - ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x9], #64 - ldp q8, q9, [x9], #32 - eor v0.16b, v16.16b, v0.16b - ldr q10, [x9], #16 - eor v1.16b, v17.16b, v1.16b - ldr q16, [x9], #16 - eor v2.16b, v18.16b, v2.16b - eor v3.16b, v19.16b, v3.16b - eor v4.16b, v8.16b, v4.16b - eor v5.16b, v9.16b, v5.16b - eor v6.16b, v10.16b, v6.16b - eor v7.16b, v16.16b, v7.16b - tbl v0.16b, {v0.16b}, v28.16b - tbl v1.16b, {v1.16b}, v28.16b - tbl v2.16b, {v2.16b}, v28.16b - tbl v3.16b, {v3.16b}, v28.16b - tbl v4.16b, {v4.16b}, v28.16b - tbl v5.16b, {v5.16b}, v28.16b - tbl v6.16b, {v6.16b}, v28.16b - tbl v7.16b, {v7.16b}, v28.16b -.Lenc_sbox: - eor v5.16b, v5.16b, v6.16b - eor v3.16b, v3.16b, v0.16b - subs x10, x10, #1 - eor v2.16b, v2.16b, v1.16b - eor v5.16b, v5.16b, v0.16b - eor v8.16b, v3.16b, v7.16b - eor v6.16b, v6.16b, v2.16b - eor v7.16b, v7.16b, v5.16b - eor v8.16b, v8.16b, v4.16b - eor v3.16b, v6.16b, v3.16b - eor v4.16b, v4.16b, v5.16b - eor v6.16b, v1.16b, v5.16b - eor v2.16b, v2.16b, v7.16b - eor v1.16b, v8.16b, v1.16b - eor v8.16b, v7.16b, v4.16b - eor v9.16b, v3.16b, v0.16b - eor v10.16b, v7.16b, v6.16b - eor v16.16b, v5.16b, v3.16b - eor v17.16b, v6.16b, v2.16b - eor v18.16b, v5.16b, v1.16b - eor v19.16b, v2.16b, v4.16b - eor v20.16b, v1.16b, v0.16b - orr v21.16b, v8.16b, v9.16b - orr v22.16b, v10.16b, v16.16b - eor v23.16b, v8.16b, v17.16b - eor v24.16b, v9.16b, v18.16b - and v19.16b, v19.16b, v20.16b - orr v20.16b, v17.16b, v18.16b - and v8.16b, v8.16b, v9.16b - and v9.16b, v17.16b, v18.16b - and v17.16b, v23.16b, v24.16b - and v10.16b, v10.16b, v16.16b - eor v16.16b, v21.16b, v19.16b - eor v18.16b, v20.16b, v19.16b - and v19.16b, v2.16b, v1.16b - and v20.16b, v6.16b, v5.16b - eor v21.16b, v22.16b, v17.16b - eor v9.16b, v9.16b, v10.16b - eor v10.16b, v16.16b, v17.16b - eor v16.16b, v18.16b, v8.16b - and v17.16b, v4.16b, v0.16b - orr v18.16b, v7.16b, v3.16b - eor v21.16b, v21.16b, v8.16b - eor v8.16b, v9.16b, v8.16b - eor v9.16b, v10.16b, v19.16b - eor v10.16b, v3.16b, v0.16b - eor v16.16b, v16.16b, v17.16b - eor v17.16b, v5.16b, v1.16b - eor v19.16b, v21.16b, v20.16b - eor v20.16b, v8.16b, v18.16b - eor v8.16b, v8.16b, v18.16b - eor v18.16b, v7.16b, v4.16b - eor v21.16b, v9.16b, v16.16b - eor v22.16b, v6.16b, v2.16b - and v23.16b, v9.16b, v19.16b - eor v24.16b, v10.16b, v17.16b - eor v25.16b, v0.16b, v1.16b - eor v26.16b, v7.16b, v6.16b - eor v27.16b, v18.16b, v22.16b - eor v28.16b, v3.16b, v5.16b - eor v29.16b, v16.16b, v23.16b - eor v30.16b, v20.16b, v23.16b - eor v23.16b, v20.16b, v23.16b - eor v31.16b, v4.16b, v2.16b - bsl v29.16b, v19.16b, v20.16b - bsl v30.16b, v9.16b, v16.16b - bsl v8.16b, v29.16b, v23.16b - bsl v20.16b, v23.16b, v29.16b - eor v9.16b, v30.16b, v29.16b - and v5.16b, v5.16b, v30.16b - and v8.16b, v8.16b, v30.16b - and v1.16b, v1.16b, v29.16b - eor v16.16b, v19.16b, v20.16b - and v2.16b, v2.16b, v29.16b - eor v19.16b, v9.16b, v29.16b - and v17.16b, v17.16b, v9.16b - eor v8.16b, v8.16b, v21.16b - and v20.16b, v22.16b, v9.16b - eor v21.16b, v29.16b, v16.16b - eor v22.16b, v29.16b, v16.16b - and v23.16b, v25.16b, v16.16b - and v6.16b, v6.16b, v19.16b - eor v25.16b, v8.16b, v16.16b - eor v29.16b, v30.16b, v8.16b - and v4.16b, v21.16b, v4.16b - and v8.16b, v28.16b, v8.16b - and v0.16b, v22.16b, v0.16b - eor v21.16b, v23.16b, v1.16b - eor v22.16b, v9.16b, v25.16b - eor v9.16b, v9.16b, v25.16b - eor v23.16b, v25.16b, v16.16b - and v3.16b, v29.16b, v3.16b - and v24.16b, v24.16b, v25.16b - and v25.16b, v27.16b, v25.16b - and v10.16b, v22.16b, v10.16b - and v9.16b, v9.16b, v18.16b - eor v18.16b, v19.16b, v23.16b - and v19.16b, v26.16b, v23.16b - eor v3.16b, v5.16b, v3.16b - eor v17.16b, v17.16b, v24.16b - eor v10.16b, v24.16b, v10.16b - and v16.16b, v31.16b, v16.16b - eor v20.16b, v20.16b, v25.16b - eor v9.16b, v25.16b, v9.16b - eor v4.16b, v2.16b, v4.16b - and v7.16b, v18.16b, v7.16b - eor v18.16b, v19.16b, v6.16b - eor v5.16b, v8.16b, v5.16b - eor v0.16b, v1.16b, v0.16b - eor v1.16b, v21.16b, v10.16b - eor v8.16b, v3.16b, v17.16b - eor v2.16b, v16.16b, v2.16b - eor v3.16b, v6.16b, v7.16b - eor v6.16b, v18.16b, v9.16b - eor v4.16b, v4.16b, v20.16b - eor v10.16b, v5.16b, v10.16b - eor v0.16b, v0.16b, v17.16b - eor v9.16b, v2.16b, v9.16b - eor v3.16b, v3.16b, v20.16b - eor v7.16b, v6.16b, v1.16b - eor v5.16b, v8.16b, v4.16b - eor v6.16b, v10.16b, v1.16b - eor v2.16b, v4.16b, v0.16b - eor v4.16b, v3.16b, v10.16b - eor v9.16b, v9.16b, v7.16b - eor v3.16b, v0.16b, v5.16b - eor v0.16b, v1.16b, v4.16b - eor v1.16b, v4.16b, v8.16b - eor v4.16b, v9.16b, v5.16b - eor v6.16b, v6.16b, v3.16b - bcc .Lenc_done - ext v8.16b, v0.16b, v0.16b, #12 - ext v9.16b, v4.16b, v4.16b, #12 - ldr q28, [x11] - ext v10.16b, v6.16b, v6.16b, #12 - ext v16.16b, v1.16b, v1.16b, #12 - ext v17.16b, v3.16b, v3.16b, #12 - ext v18.16b, v7.16b, v7.16b, #12 - eor v0.16b, v0.16b, v8.16b - eor v4.16b, v4.16b, v9.16b - eor v6.16b, v6.16b, v10.16b - ext v19.16b, v2.16b, v2.16b, #12 - ext v20.16b, v5.16b, v5.16b, #12 - eor v1.16b, v1.16b, v16.16b - eor v3.16b, v3.16b, v17.16b - eor v7.16b, v7.16b, v18.16b - eor v2.16b, v2.16b, v19.16b - eor v16.16b, v16.16b, v0.16b - eor v5.16b, v5.16b, v20.16b - eor v17.16b, v17.16b, v6.16b - eor v10.16b, v10.16b, v4.16b - ext v0.16b, v0.16b, v0.16b, #8 - eor v9.16b, v9.16b, v1.16b - ext v1.16b, v1.16b, v1.16b, #8 - eor v8.16b, v8.16b, v5.16b - eor v16.16b, v16.16b, v5.16b - eor v18.16b, v18.16b, v3.16b - eor v19.16b, v19.16b, v7.16b - ext v3.16b, v3.16b, v3.16b, #8 - ext v7.16b, v7.16b, v7.16b, #8 - eor v20.16b, v20.16b, v2.16b - ext v6.16b, v6.16b, v6.16b, #8 - ext v21.16b, v5.16b, v5.16b, #8 - eor v17.16b, v17.16b, v5.16b - ext v2.16b, v2.16b, v2.16b, #8 - eor v10.16b, v10.16b, v5.16b - ext v22.16b, v4.16b, v4.16b, #8 - eor v0.16b, v0.16b, v8.16b - eor v1.16b, v1.16b, v16.16b - eor v5.16b, v7.16b, v18.16b - eor v4.16b, v3.16b, v17.16b - eor v3.16b, v6.16b, v10.16b - eor v7.16b, v21.16b, v20.16b - eor v6.16b, v2.16b, v19.16b - eor v2.16b, v22.16b, v9.16b - bne .Lenc_loop - ldr q28, [x11, #16]! // load from .LSRM0 on last round (x10 == 0) - b .Lenc_loop -.align 4 -.Lenc_done: - ushr v8.2d, v0.2d, #1 - movi v9.16b, #0x55 - ldr q10, [x9] - ushr v16.2d, v3.2d, #1 - movi v17.16b, #0x33 - ushr v18.2d, v4.2d, #1 - movi v19.16b, #0x0f - eor v8.16b, v8.16b, v1.16b - ushr v20.2d, v2.2d, #1 - eor v16.16b, v16.16b, v7.16b - eor v18.16b, v18.16b, v6.16b - and v8.16b, v8.16b, v9.16b - eor v20.16b, v20.16b, v5.16b - and v16.16b, v16.16b, v9.16b - and v18.16b, v18.16b, v9.16b - shl v21.2d, v8.2d, #1 - eor v1.16b, v1.16b, v8.16b - and v8.16b, v20.16b, v9.16b - eor v7.16b, v7.16b, v16.16b - shl v9.2d, v16.2d, #1 - eor v6.16b, v6.16b, v18.16b - shl v16.2d, v18.2d, #1 - eor v0.16b, v0.16b, v21.16b - shl v18.2d, v8.2d, #1 - eor v5.16b, v5.16b, v8.16b - eor v3.16b, v3.16b, v9.16b - eor v4.16b, v4.16b, v16.16b - ushr v8.2d, v1.2d, #2 - eor v2.16b, v2.16b, v18.16b - ushr v9.2d, v0.2d, #2 - ushr v16.2d, v7.2d, #2 - ushr v18.2d, v3.2d, #2 - eor v8.16b, v8.16b, v6.16b - eor v9.16b, v9.16b, v4.16b - eor v16.16b, v16.16b, v5.16b - eor v18.16b, v18.16b, v2.16b - and v8.16b, v8.16b, v17.16b - and v9.16b, v9.16b, v17.16b - and v16.16b, v16.16b, v17.16b - and v17.16b, v18.16b, v17.16b - eor v6.16b, v6.16b, v8.16b - shl v8.2d, v8.2d, #2 - eor v4.16b, v4.16b, v9.16b - shl v9.2d, v9.2d, #2 - eor v5.16b, v5.16b, v16.16b - shl v16.2d, v16.2d, #2 - eor v2.16b, v2.16b, v17.16b - shl v17.2d, v17.2d, #2 - eor v1.16b, v1.16b, v8.16b - eor v0.16b, v0.16b, v9.16b - eor v7.16b, v7.16b, v16.16b - eor v3.16b, v3.16b, v17.16b - ushr v8.2d, v6.2d, #4 - ushr v9.2d, v4.2d, #4 - ushr v16.2d, v1.2d, #4 - ushr v17.2d, v0.2d, #4 - eor v8.16b, v8.16b, v5.16b - eor v9.16b, v9.16b, v2.16b - eor v16.16b, v16.16b, v7.16b - eor v17.16b, v17.16b, v3.16b - and v8.16b, v8.16b, v19.16b - and v9.16b, v9.16b, v19.16b - and v16.16b, v16.16b, v19.16b - and v17.16b, v17.16b, v19.16b - eor v5.16b, v5.16b, v8.16b - shl v8.2d, v8.2d, #4 - eor v2.16b, v2.16b, v9.16b - shl v9.2d, v9.2d, #4 - eor v7.16b, v7.16b, v16.16b - shl v16.2d, v16.2d, #4 - eor v3.16b, v3.16b, v17.16b - shl v17.2d, v17.2d, #4 - eor v6.16b, v6.16b, v8.16b - eor v4.16b, v4.16b, v9.16b - eor v7.16b, v7.16b, v10.16b - eor v1.16b, v1.16b, v16.16b - eor v3.16b, v3.16b, v10.16b - eor v0.16b, v0.16b, v17.16b - eor v6.16b, v6.16b, v10.16b - eor v4.16b, v4.16b, v10.16b - eor v2.16b, v2.16b, v10.16b - eor v5.16b, v5.16b, v10.16b - eor v1.16b, v1.16b, v10.16b - eor v0.16b, v0.16b, v10.16b - ret -.size _bsaes_encrypt8,.-_bsaes_encrypt8 - -.type _bsaes_key_convert,%function -.align 4 -// On entry: -// x9 -> input key (big-endian) -// x10 = number of rounds -// x17 -> output key (native endianness) -// On exit: -// x9, x10 corrupted -// x11 -> .LM0_bigendian -// x17 -> last quadword of output key -// other general-purpose registers preserved -// v2-v6 preserved -// v7.16b[] = 0x63 -// v8-v14 preserved -// v15 = last round key (converted to native endianness) -// other SIMD registers corrupted -_bsaes_key_convert: -#ifdef __ARMEL__ - adr |