#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# Multi-buffer AES-NI procedures process several independent buffers
# in parallel by interleaving independent instructions.
#
# Cycles per byte for interleave factor 4:
#
# asymptotic measured
# ---------------------------
# Westmere 5.00/4=1.25 5.13/4=1.28
# Atom 15.0/4=3.75 ?15.7/4=3.93
# Sandy Bridge 5.06/4=1.27 5.18/4=1.29
# Ivy Bridge 5.06/4=1.27 5.14/4=1.29
# Haswell 4.44/4=1.11 4.44/4=1.11
# Bulldozer 5.75/4=1.44 5.76/4=1.44
#
# Cycles per byte for interleave factor 8 (not implemented for
# pre-AVX processors, where higher interleave factor incidentally
# doesn't result in improvement):
#
# asymptotic measured
# ---------------------------
# Sandy Bridge 5.06/8=0.64 7.10/8=0.89(*)
# Ivy Bridge 5.06/8=0.64 7.14/8=0.89(*)
# Haswell 5.00/8=0.63 5.00/8=0.63
# Bulldozer 5.75/8=0.72 5.77/8=0.72
#
# (*) Sandy/Ivy Bridge are known to handle high interleave factors
# suboptimally;
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
$avx=0;
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
$avx = ($1>=2.19) + ($1>=2.22);
}
if (!$avx &&