/*
* Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
/**
* rijndael-alg-fst.c
*
* @version 3.0 (December 2000)
*
* Optimised ANSI C code for the Rijndael cipher (now AES)
*
* @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
* @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
* @author Paulo Barreto <paulo.barreto@terra.com.br>
*
* This code is hereby placed in the public domain.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
* OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* This is experimental x86[_64] derivative. It assumes little-endian
* byte order and expects CPU to sustain unaligned memory references.
* It is used as playground for cache-time attack mitigations and
* serves as reference C implementation for x86[_64] assembler.
*
* <appro@fy.chalmers.se>
*/
#include <assert.h>
#include <stdlib.h>
#include <openssl/aes.h>
#include "aes_locl.h"
/*
* These two parameters control which table, 256-byte or 2KB, is
* referenced in outer and respectively inner rounds.
*/
#define AES_COMPACT_IN_OUTER_ROUNDS
#ifdef AES_COMPACT_IN_OUTER_ROUNDS
/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
* adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
* by factor of ~2. */
# undef AES_COMPACT_IN_INNER_ROUNDS
#endif
#if 1
static void prefetch256(const void *table)
{
volatile unsigned long *t=(void *)table,ret;
unsigned long sum;
int i;
/* 32 is common least cache-line size */
for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
ret = sum;
}
#else
# define prefetch256(t)
#endif
#undef GETU32
#define GETU32(p) (*((u32*)(p)))
#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
typedef unsigned __int64 u64;
#define U64(C) C##UI64
#elif defined(__arch64__)
typedef unsigned long u64;
#define U64(C) C##UL
#else
typedef unsigned long long u64;
#define U64(C) C##ULL
#endif
#undef ROTATE
#if defined(_MSC_VER)
# define ROTATE(a,n) _lrotl(a,n)
#elif defined(__ICC)
# define ROTATE(a,n) _rotl(a,n)
#elif defined(__GNUC__) && __GNUC__>=2
# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
# define ROTATE(a,n) ({ register unsigned int ret; \
asm ( \
"roll %1,%0" \
: "=r"(ret) \
: "I"(n), "0"(a) \
: "cc"); \
ret; \
})
# endif
#endif
/*-
Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
Te0[x] = S [x].[02, 01, 01, 03];
Te1[x] = S [x].[03, 02, 01, 01];
Te2[x] = S [x].[01, 03, 02, 01];
Te3[x] = S [x].[01, 01, 03, 02];
*/
#define Te0 (u32)((u64*)((u8*)Te+0))
#define Te1 (u32)((u64*)((u8*)Te+3))
#define Te2 (u32)((u64*)((u8*)Te+2))
#define Te3 (u32)((u64*)((u8*)Te+1))
/*-
Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
Td0[x] = Si[x].[0e, 09, 0d, 0b];
Td1[x] = Si[x].[0b, 0e, 09, 0d];
Td2[x] = Si[x].[0d, 0b, 0e, 09];
Td3[x] = Si[x].[09, 0d, 0b, 0e];
Td4[x] = Si[x].[01];
*/
#define Td0 (u32)((u64*)((u8*)Td+0))
#define Td1 (u32)((u64*)((u8*)Td+3))
#define Td2 (u32)((u64*)((u8*)Td+2))
#define Td3 (u32)((u64*)((u8*)Td+1))
static const u64 Te[256] = {
U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
U64(0x5030306050303060), U64(0x0301010203010102),
U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
U64(0x5331316253313162), U64(0x3f15152a3f15152a),
U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
U64(0x2818183028181830), U64(0xa1969637a1969637),
U64(0x0f05050a0f05050a), U64(