summaryrefslogtreecommitdiffstats
path: root/arch/sparc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 12:57:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-02 12:57:42 -0700
commita20acf99f75e49271381d65db097c9763060a1e8 (patch)
tree3cf661125e86b7625171b96b885bf5395f62e684 /arch/sparc
parent437589a74b6a590d175f86cf9f7b2efcee7765e7 (diff)
parent42a4172b6ebb4a419085c6caee7c135e51cae5ea (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
Pull sparc updates from David Miller: "Largely this is simply adding support for the Niagara 4 cpu. Major areas are perf events (chip now supports 4 counters and can monitor any event on each counter), crypto (opcodes are availble for sha1, sha256, sha512, md5, crc32c, AES, DES, CAMELLIA, and Kasumi although the last is unsupported since we lack a generic crypto layer Kasumi implementation), and an optimized memcpy. Finally some cleanups by Peter Senna Tschudin." * git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next: (47 commits) sparc64: Fix trailing whitespace in NG4 memcpy. sparc64: Fix comment type in NG4 copy from user. sparc64: Add SPARC-T4 optimized memcpy. drivers/sbus/char: removes unnecessary semicolon arch/sparc/kernel/pci_sun4v.c: removes unnecessary semicolon sparc64: Fix function argument comment in camellia_sparc64_key_expand asm. sparc64: Fix IV handling bug in des_sparc64_cbc_decrypt sparc64: Add auto-loading mechanism to crypto-opcode drivers. sparc64: Add missing pr_fmt define to crypto opcode drivers. sparc64: Adjust crypto priorities. sparc64: Use cpu_pgsz_mask for linear kernel mapping config. sparc64: Probe cpu page size support more portably. sparc64: Support 2GB and 16GB page sizes for kernel linear mappings. sparc64: Fix bugs in unrolled 256-bit loops. sparc64: Avoid code duplication in crypto assembler. sparc64: Unroll CTR crypt loops in AES driver. sparc64: Unroll ECB decryption loops in AES driver. sparc64: Unroll ECB encryption loops in AES driver. sparc64: Add ctr mode support to AES driver. sparc64: Move AES driver over to a methods based implementation. ...
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/Kbuild1
-rw-r--r--arch/sparc/crypto/Makefile25
-rw-r--r--arch/sparc/crypto/aes_asm.S1535
-rw-r--r--arch/sparc/crypto/aes_glue.c477
-rw-r--r--arch/sparc/crypto/camellia_asm.S563
-rw-r--r--arch/sparc/crypto/camellia_glue.c322
-rw-r--r--arch/sparc/crypto/crc32c_asm.S20
-rw-r--r--arch/sparc/crypto/crc32c_glue.c179
-rw-r--r--arch/sparc/crypto/crop_devid.c14
-rw-r--r--arch/sparc/crypto/des_asm.S418
-rw-r--r--arch/sparc/crypto/des_glue.c529
-rw-r--r--arch/sparc/crypto/md5_asm.S70
-rw-r--r--arch/sparc/crypto/md5_glue.c188
-rw-r--r--arch/sparc/crypto/opcodes.h99
-rw-r--r--arch/sparc/crypto/sha1_asm.S72
-rw-r--r--arch/sparc/crypto/sha1_glue.c183
-rw-r--r--arch/sparc/crypto/sha256_asm.S78
-rw-r--r--arch/sparc/crypto/sha256_glue.c241
-rw-r--r--arch/sparc/crypto/sha512_asm.S102
-rw-r--r--arch/sparc/crypto/sha512_glue.c226
-rw-r--r--arch/sparc/include/asm/asi.h4
-rw-r--r--arch/sparc/include/asm/elf_64.h9
-rw-r--r--arch/sparc/include/asm/hypervisor.h11
-rw-r--r--arch/sparc/include/asm/mdesc.h1
-rw-r--r--arch/sparc/include/asm/pcr.h36
-rw-r--r--arch/sparc/include/asm/perfctr.h30
-rw-r--r--arch/sparc/include/asm/pstate.h14
-rw-r--r--arch/sparc/kernel/head_64.S14
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/ktlb.S25
-rw-r--r--arch/sparc/kernel/mdesc.c24
-rw-r--r--arch/sparc/kernel/nmi.c21
-rw-r--r--arch/sparc/kernel/pci_sun4v.c2
-rw-r--r--arch/sparc/kernel/pcr.c172
-rw-r--r--arch/sparc/kernel/perf_event.c516
-rw-r--r--arch/sparc/kernel/setup_64.c67
-rw-r--r--arch/sparc/lib/Makefile3
-rw-r--r--arch/sparc/lib/NG4copy_from_user.S30
-rw-r--r--arch/sparc/lib/NG4copy_page.S57
-rw-r--r--arch/sparc/lib/NG4copy_to_user.S39
-rw-r--r--arch/sparc/lib/NG4memcpy.S360
-rw-r--r--arch/sparc/lib/NG4patch.S43
-rw-r--r--arch/sparc/lib/NGpage.S2
-rw-r--r--arch/sparc/lib/ksyms.c4
-rw-r--r--arch/sparc/mm/init_64.c230
-rw-r--r--arch/sparc/mm/init_64.h4
47 files changed, 6814 insertions, 263 deletions
diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild
index 5cd01161fd00..675afa285ddb 100644
--- a/arch/sparc/Kbuild
+++ b/arch/sparc/Kbuild
@@ -6,3 +6,4 @@ obj-y += kernel/
obj-y += mm/
obj-y += math-emu/
obj-y += net/
+obj-y += crypto/
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
new file mode 100644
index 000000000000..6ae1ad5e502b
--- /dev/null
+++ b/arch/sparc/crypto/Makefile
@@ -0,0 +1,25 @@
+#
+# Arch-specific CryptoAPI modules.
+#
+
+obj-$(CONFIG_CRYPTO_SHA1_SPARC64) += sha1-sparc64.o
+obj-$(CONFIG_CRYPTO_SHA256_SPARC64) += sha256-sparc64.o
+obj-$(CONFIG_CRYPTO_SHA512_SPARC64) += sha512-sparc64.o
+obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
+
+obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
+obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
+obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
+
+obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
+
+sha1-sparc64-y := sha1_asm.o sha1_glue.o crop_devid.o
+sha256-sparc64-y := sha256_asm.o sha256_glue.o crop_devid.o
+sha512-sparc64-y := sha512_asm.o sha512_glue.o crop_devid.o
+md5-sparc64-y := md5_asm.o md5_glue.o crop_devid.o
+
+aes-sparc64-y := aes_asm.o aes_glue.o crop_devid.o
+des-sparc64-y := des_asm.o des_glue.o crop_devid.o
+camellia-sparc64-y := camellia_asm.o camellia_glue.o crop_devid.o
+
+crc32c-sparc64-y := crc32c_asm.o crc32c_glue.o crop_devid.o
diff --git a/arch/sparc/crypto/aes_asm.S b/arch/sparc/crypto/aes_asm.S
new file mode 100644
index 000000000000..23f6cbb910d3
--- /dev/null
+++ b/arch/sparc/crypto/aes_asm.S
@@ -0,0 +1,1535 @@
+#include <linux/linkage.h>
+#include <asm/visasm.h>
+
+#include "opcodes.h"
+
+#define ENCRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23(KEY_BASE + 6, T0, T1, I1)
+
+#define ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \
+ AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \
+ AES_EROUND01(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23(KEY_BASE + 6, T0, T1, I1) \
+ AES_EROUND01(KEY_BASE + 4, T2, T3, I2) \
+ AES_EROUND23(KEY_BASE + 6, T2, T3, I3)
+
+#define ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1)
+
+#define ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_EROUND01(KEY_BASE + 0, I0, I1, T0) \
+ AES_EROUND23(KEY_BASE + 2, I0, I1, T1) \
+ AES_EROUND01(KEY_BASE + 0, I2, I3, T2) \
+ AES_EROUND23(KEY_BASE + 2, I2, I3, T3) \
+ AES_EROUND01_L(KEY_BASE + 4, T0, T1, I0) \
+ AES_EROUND23_L(KEY_BASE + 6, T0, T1, I1) \
+ AES_EROUND01_L(KEY_BASE + 4, T2, T3, I2) \
+ AES_EROUND23_L(KEY_BASE + 6, T2, T3, I3)
+
+ /* 10 rounds */
+#define ENCRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
+
+#define ENCRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 12 rounds */
+#define ENCRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
+
+#define ENCRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
+ ENCRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 14 rounds */
+#define ENCRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+ ENCRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+#define ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
+ ENCRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
+ TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
+
+#define ENCRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
+ ldd [%o0 + 0xd0], %f56; \
+ ldd [%o0 + 0xd8], %f58; \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
+ ldd [%o0 + 0xe0], %f60; \
+ ldd [%o0 + 0xe8], %f62; \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
+ ENCRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
+ AES_EROUND01(KEY_BASE + 48, I0, I1, KEY_BASE + 0) \
+ AES_EROUND23(KEY_BASE + 50, I0, I1, KEY_BASE + 2) \
+ AES_EROUND01(KEY_BASE + 48, I2, I3, KEY_BASE + 4) \
+ AES_EROUND23(KEY_BASE + 50, I2, I3, KEY_BASE + 6) \
+ AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I0) \
+ AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I1) \
+ ldd [%o0 + 0x10], %f8; \
+ ldd [%o0 + 0x18], %f10; \
+ AES_EROUND01_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I2) \
+ AES_EROUND23_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I3) \
+ ldd [%o0 + 0x20], %f12; \
+ ldd [%o0 + 0x28], %f14;
+
+#define DECRYPT_TWO_ROUNDS(KEY_BASE, I0, I1, T0, T1) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01(KEY_BASE + 6, T0, T1, I0)
+
+#define DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
+ AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
+ AES_DROUND23(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01(KEY_BASE + 6, T0, T1, I0) \
+ AES_DROUND23(KEY_BASE + 4, T2, T3, I3) \
+ AES_DROUND01(KEY_BASE + 6, T2, T3, I2)
+
+#define DECRYPT_TWO_ROUNDS_LAST(KEY_BASE, I0, I1, T0, T1) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0)
+
+#define DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ AES_DROUND23(KEY_BASE + 0, I0, I1, T1) \
+ AES_DROUND01(KEY_BASE + 2, I0, I1, T0) \
+ AES_DROUND23(KEY_BASE + 0, I2, I3, T3) \
+ AES_DROUND01(KEY_BASE + 2, I2, I3, T2) \
+ AES_DROUND23_L(KEY_BASE + 4, T0, T1, I1) \
+ AES_DROUND01_L(KEY_BASE + 6, T0, T1, I0) \
+ AES_DROUND23_L(KEY_BASE + 4, T2, T3, I3) \
+ AES_DROUND01_L(KEY_BASE + 6, T2, T3, I2)
+
+ /* 10 rounds */
+#define DECRYPT_128(KEY_BASE, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 32, I0, I1, T0, T1)
+
+#define DECRYPT_128_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 12 rounds */
+#define DECRYPT_192(KEY_BASE, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 40, I0, I1, T0, T1)
+
+#define DECRYPT_192_2(KEY_BASE, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, T0, T1, T2, T3) \
+ DECRYPT_TWO_ROUNDS_LAST_2(KEY_BASE + 40, I0, I1, I2, I3, T0, T1, T2, T3)
+
+ /* 14 rounds */
+#define DECRYPT_256(KEY_BASE, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 0, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 8, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 16, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 24, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 32, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS(KEY_BASE + 40, I0, I1, T0, T1) \
+ DECRYPT_TWO_ROUNDS_LAST(KEY_BASE + 48, I0, I1, T0, T1)
+
+#define DECRYPT_256_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, TMP_BASE) \
+ DECRYPT_TWO_ROUNDS_2(KEY_BASE, I0, I1, I2, I3, \
+ TMP_BASE + 0, TMP_BASE + 2, TMP_BASE + 4, TMP_BASE + 6)
+
+#define DECRYPT_256_2(KEY_BASE, I0, I1, I2, I3) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 0, I0, I1, I2, I3, KEY_BASE + 48) \
+ ldd [%o0 + 0x18], %f56; \
+ ldd [%o0 + 0x10], %f58; \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 8, I0, I1, I2, I3, KEY_BASE + 0) \
+ ldd [%o0 + 0x08], %f60; \
+ ldd [%o0 + 0x00], %f62; \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 16, I0, I1, I2, I3, KEY_BASE + 0) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 24, I0, I1, I2, I3, KEY_BASE + 0) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 32, I0, I1, I2, I3, KEY_BASE + 0) \
+ DECRYPT_256_TWO_ROUNDS_2(KEY_BASE + 40, I0, I1, I2, I3, KEY_BASE + 0) \
+ AES_DROUND23(KEY_BASE + 48, I0, I1, KEY_BASE + 2) \
+ AES_DROUND01(KEY_BASE + 50, I0, I1, KEY_BASE + 0) \
+ AES_DROUND23(KEY_BASE + 48, I2, I3, KEY_BASE + 6) \
+ AES_DROUND01(KEY_BASE + 50, I2, I3, KEY_BASE + 4) \
+ AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 0, KEY_BASE + 2, I1) \
+ AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 0, KEY_BASE + 2, I0) \
+ ldd [%o0 + 0xd8], %f8; \
+ ldd [%o0 + 0xd0], %f10; \
+ AES_DROUND23_L(KEY_BASE + 52, KEY_BASE + 4, KEY_BASE + 6, I3) \
+ AES_DROUND01_L(KEY_BASE + 54, KEY_BASE + 4, KEY_BASE + 6, I2) \
+ ldd [%o0 + 0xc8], %f12; \
+ ldd [%o0 + 0xc0], %f14;
+
+ .align 32
+ENTRY(aes_sparc64_key_expand)
+ /* %o0=input_key, %o1=output_key, %o2=key_len */
+ VISEntry
+ ld [%o0 + 0x00], %f0
+ ld [%o0 + 0x04], %f1
+ ld [%o0 + 0x08], %f2
+ ld [%o0 + 0x0c], %f3
+
+ std %f0, [%o1 + 0x00]
+ std %f2, [%o1 + 0x08]
+ add %o1, 0x10, %o1
+
+ cmp %o2, 24
+ bl 2f
+ nop
+
+ be 1f
+ nop
+
+ /* 256-bit key expansion */
+ ld [%o0 + 0x10], %f4
+ ld [%o0 + 0x14], %f5
+ ld [%o0 + 0x18], %f6
+ ld [%o0 + 0x1c], %f7
+
+ std %f4, [%o1 + 0x00]
+ std %f6, [%o1 + 0x08]
+ add %o1, 0x10, %o1
+
+ AES_KEXPAND1(0, 6, 0x0, 8)
+ AES_KEXPAND2(2, 8, 10)
+ AES_KEXPAND0(4, 10, 12)
+ AES_KEXPAND2(6, 12, 14)
+ AES_KEXPAND1(8, 14, 0x1, 16)
+ AES_KEXPAND2(10, 16, 18)
+ AES_KEXPAND0(12, 18, 20)
+ AES_KEXPAND2(14, 20, 22)
+ AES_KEXPAND1(16, 22, 0x2, 24)
+ AES_KEXPAND2(18, 24, 26)
+ AES_KEXPAND0(20, 26, 28)
+ AES_KEXPAND2(22, 28, 30)
+ AES_KEXPAND1(24, 30, 0x3, 32)
+ AES_KEXPAND2(26, 32, 34)
+ AES_KEXPAND0(28, 34, 36)
+ AES_KEXPAND2(30, 36, 38)
+ AES_KEXPAND1(32, 38, 0x4, 40)
+ AES_KEXPAND2(34, 40, 42)
+ AES_KEXPAND0(36, 42, 44)
+ AES_KEXPAND2(38, 44, 46)
+ AES_KEXPAND1(40, 46, 0x5, 48)
+ AES_KEXPAND2(42, 48, 50)
+ AES_KEXPAND0(44, 50, 52)
+ AES_KEXPAND2(46, 52, 54)
+ AES_KEXPAND1(48, 54, 0x6, 56)
+ AES_KEXPAND2(50, 56, 58)
+
+ std %f8, [%o1 + 0x00]
+ std %f10, [%o1 + 0x08]
+ std %f12, [%o1 + 0x10]
+ std %f14, [%o1 + 0x18]
+ std %f16, [%o1 + 0x20]
+ std %f18, [%o1 + 0x28]
+ std %f20, [%o1 + 0x30]
+ std %f22, [%o1 + 0x38]
+ std %f24, [%o1 + 0x40]
+ std %f26, [%o1 + 0x48]
+ std %f28, [%o1 + 0x50]
+ std %f30, [%o1 + 0x58]
+ std %f32, [%o1 + 0x60]
+ std %f34, [%o1 + 0x68]
+ std %f36, [%o1 + 0x70]
+ std %f38, [%o1 + 0x78]
+ std %f40, [%o1 + 0x80]
+ std %f42, [%o1 + 0x88]
+ std %f44, [%o1 + 0x90]
+ std %f46, [%o1 + 0x98]
+ std %f48, [%o1 + 0xa0]
+ std %f50, [%o1 + 0xa8]
+ std %f52, [%o1 + 0xb0]
+ std %f54, [%o1 + 0xb8]
+ std %f56, [%o1 + 0xc0]
+ ba,pt %xcc, 80f
+ std %f58, [%o1 + 0xc8]
+
+1:
+ /* 192-bit key expansion */
+ ld [%o0 + 0x10], %f4
+ ld [%o0 + 0x14], %f5
+
+ std %f4, [%o1 + 0x00]
+ add %o1, 0x08, %o1
+
+ AES_KEXPAND1(0, 4, 0x0, 6)
+ AES_KEXPAND2(2, 6, 8)
+ AES_KEXPAND2(4, 8, 10)
+ AES_KEXPAND1(6, 10, 0x1, 12)
+ AES_KEXPAND2(8, 12, 14)
+ AES_KEXPAND2(10, 14, 16)
+ AES_KEXPAND1(12, 16, 0x2, 18)
+ AES_KEXPAND2(14, 18, 20)
+ AES_KEXPAND2(16, 20, 22)
+ AES_KEXPAND1(18, 22, 0x3, 24)
+ AES_KEXPAND2(20, 24, 26)
+ AES_KEXPAND2(22, 26, 28)
+ AES_KEXPAND1(24, 28, 0x4, 30)
+ AES_KEXPAND2(26, 30, 32)
+ AES_KEXPAND2(28, 32, 34)
+ AES_KEXPAND1(30, 34, 0x5, 36)
+ AES_KEXPAND2(32, 36, 38)
+ AES_KEXPAND2(34, 38, 40)
+ AES_KEXPAND1(36, 40, 0x6, 42)
+ AES_KEXPAND2(38, 42, 44)
+ AES_KEXPAND2(40, 44, 46)
+ AES_KEXPAND1(42, 46, 0x7, 48)
+ AES_KEXPAND2(44, 48, 50)
+
+ std %f6, [%o1 + 0x00]
+ std %f8, [%o1 + 0x08]
+ std %f10, [%o1 + 0x10]
+ std %f12, [%o1 + 0x18]
+ std %f14, [%o1 + 0x20]
+ std %f16, [%o1 + 0x28]
+ std %f18, [%o1 + 0x30]
+ std %f20, [%o1 + 0x38]
+ std %f22, [%o1 + 0x40]
+ std %f24, [%o1 + 0x48]
+ std %f26, [%o1 + 0x50]
+ std %f28, [%o1 + 0x58]
+ std %f30, [%o1 + 0x60]
+ std %f32, [%o1 + 0x68]
+ std %f34, [%o1 + 0x70]
+ std %f36, [%o1 + 0x78]
+ std %f38, [%o1 + 0x80]
+ std %f40, [%o1 + 0x88]
+ std %f42, [%o1 + 0x90]
+ std %f44, [%o1 + 0x98]
+ std %f46, [%o1 + 0xa0]
+ std %f48, [%o1 + 0xa8]
+ ba,pt %xcc, 80f
+ std %f50, [%o1 + 0xb0]
+
+2:
+ /* 128-bit key expansion */
+ AES_KEXPAND1(0, 2, 0x0, 4)
+ AES_KEXPAND2(2, 4, 6)
+ AES_KEXPAND1(4, 6, 0x1, 8)
+ AES_KEXPAND2(6, 8, 10)
+ AES_KEXPAND1(8, 10, 0x2, 12)
+ AES_KEXPAND2(10, 12, 14)
+ AES_KEXPAND1(12, 14, 0x3, 16)
+ AES_KEXPAND2(14, 16, 18)
+ AES_KEXPAND1(16, 18, 0x4, 20)
+ AES_KEXPAND2(18, 20, 22)
+ AES_KEXPAND1(20, 22, 0x5, 24)
+ AES_KEXPAND2(22, 24, 26)
+ AES_KEXPAND1(24, 26, 0x6, 28)
+ AES_KEXPAND2(26, 28, 30)
+ AES_KEXPAND1(28, 30, 0x7, 32)
+ AES_KEXPAND2(30, 32, 34)
+ AES_KEXPAND1(32, 34, 0x8, 36)
+ AES_KEXPAND2(34, 36, 38)
+ AES_KEXPAND1(36, 38, 0x9, 40)
+ AES_KEXPAND2(38, 40, 42)
+
+ std %f4, [%o1 + 0x00]
+ std %f6, [%o1 + 0x08]
+ std %f8, [%o1 + 0x10]
+ std %f10, [%o1 + 0x18]
+ std %f12, [%o1 + 0x20]
+ std %f14, [%o1 + 0x28]
+ std %f16, [%o1 + 0x30]
+ std %f18, [%o1 + 0x38]
+ std %f20, [%o1 + 0x40]
+ std %f22, [%o1 + 0x48]
+ std %f24, [%o1 + 0x50]
+ std %f26, [%o1 + 0x58]
+ std %f28, [%o1 + 0x60]
+ std %f30, [%o1 + 0x68]
+ std %f32, [%o1 + 0x70]
+ std %f34, [%o1 + 0x78]
+ std %f36, [%o1 + 0x80]
+ std %f38, [%o1 + 0x88]
+ std %f40, [%o1 + 0x90]
+ std %f42, [%o1 + 0x98]
+80:
+ retl
+ VISExit
+ENDPROC(aes_sparc64_key_expand)
+
+ .align 32
+ENTRY(aes_sparc64_encrypt_128)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0x00], %f8
+ ldd [%o0 + 0x08], %f10
+ ldd [%o0 + 0x10], %f12
+ ldd [%o0 + 0x18], %f14
+ ldd [%o0 + 0x20], %f16
+ ldd [%o0 + 0x28], %f18
+ ldd [%o0 + 0x30], %f20
+ ldd [%o0 + 0x38], %f22
+ ldd [%o0 + 0x40], %f24
+ ldd [%o0 + 0x48], %f26
+ ldd [%o0 + 0x50], %f28
+ ldd [%o0 + 0x58], %f30
+ ldd [%o0 + 0x60], %f32
+ ldd [%o0 + 0x68], %f34
+ ldd [%o0 + 0x70], %f36
+ ldd [%o0 + 0x78], %f38
+ ldd [%o0 + 0x80], %f40
+ ldd [%o0 + 0x88], %f42
+ ldd [%o0 + 0x90], %f44
+ ldd [%o0 + 0x98], %f46
+ ldd [%o0 + 0xa0], %f48
+ ldd [%o0 + 0xa8], %f50
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+ ENCRYPT_128(12, 4, 6, 0, 2)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_encrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_encrypt_192)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+
+ ldd [%o0 + 0x00], %f8
+ ldd [%o0 + 0x08], %f10
+
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ ldd [%o0 + 0x10], %f8
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ add %o0, 0x20, %o0
+
+ ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+ ldd [%o0 + 0x10], %f12
+ ldd [%o0 + 0x18], %f14
+ ldd [%o0 + 0x20], %f16
+ ldd [%o0 + 0x28], %f18
+ ldd [%o0 + 0x30], %f20
+ ldd [%o0 + 0x38], %f22
+ ldd [%o0 + 0x40], %f24
+ ldd [%o0 + 0x48], %f26
+ ldd [%o0 + 0x50], %f28
+ ldd [%o0 + 0x58], %f30
+ ldd [%o0 + 0x60], %f32
+ ldd [%o0 + 0x68], %f34
+ ldd [%o0 + 0x70], %f36
+ ldd [%o0 + 0x78], %f38
+ ldd [%o0 + 0x80], %f40
+ ldd [%o0 + 0x88], %f42
+ ldd [%o0 + 0x90], %f44
+ ldd [%o0 + 0x98], %f46
+ ldd [%o0 + 0xa0], %f48
+ ldd [%o0 + 0xa8], %f50
+
+
+ ENCRYPT_128(12, 4, 6, 0, 2)
+
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+
+ retl
+ VISExit
+ENDPROC(aes_sparc64_encrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_encrypt_256)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+
+ ldd [%o0 + 0x00], %f8
+ ldd [%o0 + 0x08], %f10
+
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+
+ ldd [%o0 + 0x10], %f8
+
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ add %o0, 0x20, %o0
+
+ ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+ ldd [%o0 + 0x10], %f8
+
+ ldd [%o0 + 0x18], %f10
+ ldd [%o0 + 0x20], %f12
+ ldd [%o0 + 0x28], %f14
+ add %o0, 0x20, %o0
+
+ ENCRYPT_TWO_ROUNDS(8, 4, 6, 0, 2)
+
+ ldd [%o0 + 0x10], %f12
+ ldd [%o0 + 0x18], %f14
+ ldd [%o0 + 0x20], %f16
+ ldd [%o0 + 0x28], %f18
+ ldd [%o0 + 0x30], %f20
+ ldd [%o0 + 0x38], %f22
+ ldd [%o0 + 0x40], %f24
+ ldd [%o0 + 0x48], %f26
+ ldd [%o0 + 0x50], %f28
+ ldd [%o0 + 0x58], %f30
+ ldd [%o0 + 0x60], %f32
+ ldd [%o0 + 0x68], %f34
+ ldd [%o0 + 0x70], %f36
+ ldd [%o0 + 0x78], %f38
+ ldd [%o0 + 0x80], %f40
+ ldd [%o0 + 0x88], %f42
+ ldd [%o0 + 0x90], %f44
+ ldd [%o0 + 0x98], %f46
+ ldd [%o0 + 0xa0], %f48
+ ldd [%o0 + 0xa8], %f50
+
+ ENCRYPT_128(12, 4, 6, 0, 2)
+
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+
+ retl
+ VISExit
+ENDPROC(aes_sparc64_encrypt_256)
+
+ .align 32
+ENTRY(aes_sparc64_decrypt_128)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0xa0], %f8
+ ldd [%o0 + 0xa8], %f10
+ ldd [%o0 + 0x98], %f12
+ ldd [%o0 + 0x90], %f14
+ ldd [%o0 + 0x88], %f16
+ ldd [%o0 + 0x80], %f18
+ ldd [%o0 + 0x78], %f20
+ ldd [%o0 + 0x70], %f22
+ ldd [%o0 + 0x68], %f24
+ ldd [%o0 + 0x60], %f26
+ ldd [%o0 + 0x58], %f28
+ ldd [%o0 + 0x50], %f30
+ ldd [%o0 + 0x48], %f32
+ ldd [%o0 + 0x40], %f34
+ ldd [%o0 + 0x38], %f36
+ ldd [%o0 + 0x30], %f38
+ ldd [%o0 + 0x28], %f40
+ ldd [%o0 + 0x20], %f42
+ ldd [%o0 + 0x18], %f44
+ ldd [%o0 + 0x10], %f46
+ ldd [%o0 + 0x08], %f48
+ ldd [%o0 + 0x00], %f50
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+ DECRYPT_128(12, 4, 6, 0, 2)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_decrypt_128)
+
+ .align 32
+ENTRY(aes_sparc64_decrypt_192)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0xc0], %f8
+ ldd [%o0 + 0xc8], %f10
+ ldd [%o0 + 0xb8], %f12
+ ldd [%o0 + 0xb0], %f14
+ ldd [%o0 + 0xa8], %f16
+ ldd [%o0 + 0xa0], %f18
+ fxor %f8, %f4, %f4
+ fxor %f10, %f6, %f6
+ ldd [%o0 + 0x98], %f20
+ ldd [%o0 + 0x90], %f22
+ ldd [%o0 + 0x88], %f24
+ ldd [%o0 + 0x80], %f26
+ DECRYPT_TWO_ROUNDS(12, 4, 6, 0, 2)
+ ldd [%o0 + 0x78], %f28
+ ldd [%o0 + 0x70], %f30
+ ldd [%o0 + 0x68], %f32
+ ldd [%o0 + 0x60], %f34
+ ldd [%o0 + 0x58], %f36
+ ldd [%o0 + 0x50], %f38
+ ldd [%o0 + 0x48], %f40
+ ldd [%o0 + 0x40], %f42
+ ldd [%o0 + 0x38], %f44
+ ldd [%o0 + 0x30], %f46
+ ldd [%o0 + 0x28], %f48
+ ldd [%o0 + 0x20], %f50
+ ldd [%o0 + 0x18], %f52
+ ldd [%o0 + 0x10], %f54
+ ldd [%o0 + 0x08], %f56
+ ldd [%o0 + 0x00], %f58
+ DECRYPT_128(20, 4, 6, 0, 2)
+ st %f4, [%o2 + 0x00]
+ st %f5, [%o2 + 0x04]
+ st %f6, [%o2 + 0x08]
+ st %f7, [%o2 + 0x0c]
+ retl
+ VISExit
+ENDPROC(aes_sparc64_decrypt_192)
+
+ .align 32
+ENTRY(aes_sparc64_decrypt_256)
+ /* %o0=key, %o1=input, %o2=output */
+ VISEntry
+ ld [%o1 + 0x00], %f4
+ ld [%o1 + 0x04], %f5
+ ld [%o1 + 0x08], %f6
+ ld [%o1 + 0x0c], %f7
+ ldd [%o0 + 0xe0], %f8
+ ldd [%o0 + 0xe8], %f10
+ ldd [%o0 + 0xd8], %f12
+ ldd [%o0 + 0xd0], %f14
+ ldd [%o0 + 0xc8], %f16
+ fxor %f8, %f4, %f4
+ ldd [%o0 + 0xc0], %f18
+ fxor %f10, %f6, %f6
+ ldd [%o0 + 0xb8], %f20
+ AES_DROUND23(12, 4, 6, 2)
+ ldd [%o0 + 0xb0], %f22
+ AES_DROUND01(14, 4, 6, 0)
+ ldd [%o0 + 0xa8], %f24
+ AES_DROUND23(16, 0, 2, 6)
+ ldd [%o0 + 0xa0], %f26
+ AES_DROUND01(18, 0, 2, 4)
+ ldd [%o0 + 0x98], %f12
+ AES_DROUND23(20, 4, 6, 2)