#! /usr/bin/env perl
# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the Apache License 2.0 (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# December 2014
#
# ChaCha20 for ARMv4.
#
# Performance in cycles per byte out of large buffer.
#
# IALU/gcc-4.4 1xNEON 3xNEON+1xIALU
#
# Cortex-A5 19.3(*)/+95% 21.8 14.1
# Cortex-A8 10.5(*)/+160% 13.9 6.35
# Cortex-A9 12.9(**)/+110% 14.3 6.50
# Cortex-A15 11.0/+40% 16.0 5.00
# Snapdragon S4 11.5/+125% 13.6 4.90
#
# (*) most "favourable" result for aligned data on little-endian
# processor, result for misaligned data is 10-15% lower;
# (**) this result is a trade-off: it can be improved by 20%,
# but then Snapdragon S4 and Cortex-A8 results get
# 20-25% worse;
# $output is the last argument if it looks like a file (it has an extension)
# $flavour is the first argument if it doesn't look like a file
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output"
or die "can't call $xlate: $!";
} else {
$output and open STDOUT,">$output";
}
sub AUTOLOAD() # thunk [simplified] x86-style perlasm
{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
my $arg = pop;
$arg = "#$arg" if ($arg*1 eq $arg);
$code .= "\t$opcode\t".join(',',@_,$arg)."\n";
}
my @x=map("r$_",(0..7,"x","x","x","x",12,"x",14,"x"));
my @t=map("r$_",(8..11));
sub ROUND {
my ($a0,$b0,$c0,$d0)=@_;
my ($a1,$b1,$c1,$d1)=map(($_&~3)+(($_+1)&3),($a0,$b0,$c0,$d0));
my ($a2,$b2,$c2,$d2)=map(($_&~3)+(($_+1)&3),($a1,$b1,$c1,$d1));
my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
my $odd = $d0&1;
my ($xc,$xc_) = (@t[0..1]);
my ($xd,$xd_) = $odd ? (@t[2],@x[$d1]) : (@x[$d0],