#!/usr/bin/env perl
# Ascetic x86_64 AT&T to MASM/NASM assembler translator by <appro>.
#
# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T
# format is way easier to parse. Because it's simpler to "gear" from
# Unix ABI to Windows one [see cross-reference "card" at the end of
# file]. Because Linux targets were available first...
#
# In addition the script also "distills" code suitable for GNU
# assembler, so that it can be compiled with more rigid assemblers,
# such as Solaris /usr/ccs/bin/as.
#
# This translator is not designed to convert *arbitrary* assembler
# code from AT&T format to MASM one. It's designed to convert just
# enough to provide for dual-ABI OpenSSL modules development...
# There *are* limitations and you might have to modify your assembler
# code or this script to achieve the desired result...
#
# Currently recognized limitations:
#
# - can't use multiple ops per line;
#
# Dual-ABI styling rules.
#
# 1. Adhere to Unix register and stack layout [see cross-reference
# ABI "card" at the end for explanation].
# 2. Forget about "red zone," stick to more traditional blended
# stack frame allocation. If volatile storage is actually required
# that is. If not, just leave the stack as is.
# 3. Functions tagged with ".type name,@function" get crafted with
# unified Win64 prologue and epilogue automatically. If you want
# to take care of ABI differences yourself, tag functions as
# ".type name,@abi-omnipotent" instead.
# 4. To optimize the Win64 prologue you can specify number of input
# arguments as ".type name,@function,N." Keep in mind that if N is
# larger than 6, then you *have to* write "abi-omnipotent" code,
# because >6 cases can't be addressed with unified prologue.
# 5. Name local labels as .L*, do *not* use dynamic labels such as 1:
# (sorry about latter).
# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is
# required to identify the spots, where to inject Win64 epilogue!
# But on the pros, it's then prefixed with rep automatically:-)
# 7. Stick to explicit ip-relative addressing. If you have to use
# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??.
# Both are recognized and translated to proper Win64 addressing
# modes. To support legacy code a synthetic directive, .picmeup,
# is implemented. It puts address of the *next* instruction into
# target register, e.g.:
#
# .picmeup %rax
# lea .Label-.(%rax),%rax
#
# 8. In order to provide for structured exception handling unified
# Win64 prologue copies %rsp value to %rax. For further details
# see SEH paragraph at the end.
# 9. .init segment is allowed to contain calls to functions only.
# a. If function accepts more than 4 arguments *and* >4th argument
# is declared as non 64-bit value, do clear its upper part.
my $flavour = shift;
my $output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
{ my ($stddev,$stdino,@junk)=stat(STDOUT);
my ($outdev,$outino,@junk)=stat($output);
open STDOUT,">$output" || die "can't open $output: $!"
if ($stddev!=$outdev || $stdino!=$outino);
}
my $gas=1; $gas=0 if ($output =~ /\.asm$/);
my $elf=1; $elf=0 if (!$gas);
my $win64=0;
my $prefix="";
my $decor=".L";
my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005
my $masm=0;
my $PTR=" PTR";
my $nasmref=2.03;
my $nasm=0;
if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1;
$prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`;
chomp($prefix);
}
elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; }
elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; }
elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; }
elsif (!$gas)
{ if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i)
{ $nasm = $1 + $2*0.01; $PTR=""; }
elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/)
{ $masm = $1 + $2*2**-16 + $4*2**-32; }
die "no assembler found on %PATH" if (!($nasm || $masm));
$win64=1;
$elf=0;
$decor="\$L\$";
}
my $current_segment;
my $current_function;
my %globals;
{ package opcode; # pick up opcodes
sub re {
my $self = shift; # single instance in enough...
local *line = shift;