#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# Needs more work: key setup, CBC routine...
#
# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
# 128-bit key, which is ~40% better than 64-bit code generated by gcc
# 4.0. But these are not the ones currently used! Their "compact"
# counterparts are, for security reason. ppc_AES_encrypt_compact runs
# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
# at 1/3 of ppc_AES_decrypt.
# February 2010
#
# Rescheduling instructions to favour Power6 pipeline gave 10%
# performance improvement on the platfrom in question (and marginal
# improvement even on others). It should be noted that Power6 fails
# to process byte in 18 cycles, only in 23, because it fails to issue
# 4 load instructions in two cycles, only in 3. As result non-compact
# block subroutines are 25% slower than one would expect. Compact
# functions scale better, because they have pure computational part,
# which scales perfectly with clock frequency. To be specific
# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
$flavour = shift;
if ($flavour =~ /64/) {
$SIZE_T =8;
$LRSAVE =2*$SIZE_T;
$STU ="stdu";
$POP ="ld";
$PUSH ="std";
} elsif ($flavour =~ /32/) {
$SIZE_T =4;
$LRSAVE =$SIZE_T;
$STU ="stwu";
$POP ="lwz";
$PUSH ="stw";
} else { die "nonsense $flavour"; }
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
$FRAME=32*$SIZE_T;
sub _data_word()
{ my $i;
while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i