extend x86_64-xlate.pl perlasm so it can handle .rodata sections properly

For nasm/masm assembler flavors the xlate script must make sure the code won't land in .rodata section along the data. For masm we also need to introduce an .align option which can be passed along section header. It's hint for masm to align rodata/rdata section properly. Also macos-x flavor requires small tweak to emit proper section header for its assembler style. Changes for masm flavor are based on SEGMENT description [1] in MASM reference manual. Changes for nasm flavor are based on nasm 2.14 manual chapter 7 [2]. Details behind macos-x changes can be found in 'Overview of the Mach-O Executable Format' [3] [1] https://learn.microsoft.com/en-us/cpp/assembler/masm/segment?view=msvc-170 [2] https://nasm.us/xdoc/2.14rc0/html/nasmdoc7.html [3] https://developer.apple.com/library/archive/documentation/Performance/Conceptual/CodeFootprint/Articles/MachOOverview.html Reviewed-by: Richard Levitte <levitte@openssl.org> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/23997)
author: Alexandr Nedvedicky <sashan@openssl.org> 2024-04-11 09:06:47 +0200
committer: Tomas Mraz <tomas@openssl.org> 2024-04-17 09:33:57 +0200
commit: fc807a0349bbddb00273465097177025d9b4e25e (patch)
tree: 15888f716d5602b6571a90775c7ea9b151f886c4
parent: 8d8a0144303374f69f73fc944dd55c68600d15e5 (diff)
1 files changed, 162 insertions, 7 deletions
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 6b93cfb84f..80fe1ff197 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -156,6 +156,65 @@ _____
 }
 
 my $current_segment;
+#
+# I could not find equivalent of .previous directive for MASM (Microsoft
+# assembler ML). Using of .previous got introduced to .pl files with
+# placing of various constants into .rodata sections (segments).
+# Each .rodata section is terminated by .previous directive which
+# restores the preceding section to .rodata:
+#
+# .text
+# 	; this is is the text section/segment
+# .rodata
+#	; constant definitions go here
+# .previous
+#	; the .text section which precedes .rodata got restored here
+#
+# The equivalent form for masm reads as follows:
+#
+# .text$	SEGMENT ALIGN(256) 'CODE'
+# 	; this is is the text section/segment
+# .text$	ENDS
+# .rdata	SEGMENT READONLY ALIGN(64)
+#	; constant definitions go here
+# .rdata$	ENDS
+# .text$	SEGMENT ALIGN(256) 'CODE'
+#	; text section follows
+# .text$	ENDS
+#
+# The .previous directive typically terminates .roadata segments/sections which
+# hold definitions of constants. In order to place constants into .rdata
+# segments when using masm we need to introduce a segment_stack array so we can
+# emit proper ENDS directive whenever we see .previous.
+#
+# The code is tailored to work current set of .pl/asm files. There are some
+# inconsistencies. For example .text section is the first section in all those
+# files except ecp_nistz256. So we need to take that into account.
+#
+#	; stack is empty
+# .text
+#	; push '.text ' section twice, the stack looks as
+#	; follows:
+#	;	('.text', '.text')
+# .rodata
+#	; pop() so we can generate proper 'ENDS' for masm.
+#	; stack looks like:
+#	; 	('.text')
+#	; push '.rodata', so we can create corresponding ENDS for masm.
+#	; stack looks like:
+#	;	('.rodata', '.text')
+# .previous
+#	; pop() '.rodata' from stack, so we create '.rodata ENDS'
+#	; in masm flavour. For nasm flavour we just pop() because
+#	; nasm does not use .rodata ENDS to close the current section
+#	; the stack content is like this:
+#	;	('.text', '.text')
+#	; pop() again to find a previous section we need to restore.
+#	; Depending on flavour we either generate .section .text
+#	; or .text SEGMENT. The stack looks like:
+#	; ('.text')
+#
+my @segment_stack = ();
 my $current_function;
 my %globals;
 
@@ -844,7 +903,21 @@ my %globals;
 		} elsif (!$elf && $dir =~ /\.align/) {
 		    $self->{value} = ".p2align\t" . (log($$line)/log(2));
 		} elsif ($dir eq ".section") {
-		    $current_segment=$$line;
+		    #
+		    # get rid off align option, it's not supported/tolerated
+		    # by gcc. openssl project introduced the option as an aid
+		    # to deal with nasm/masm assembly.
+		    #
+		    $self->{value} =~ s/(.+)\s+align\s*=.*$/$1/;
+		    #
+		    # $$line may still contains align= option. We do care
+		    # about section type here.
+		    #
+		    $current_segment = $$line;
+		    $current_segment =~ s/([^\s]+).*$/$1/;
+		    if (!$elf && $current_segment eq ".rodata") {
+			if	($flavour eq "macosx") { $self->{value} = ".section\t__DATA,__const"; }
+		    }
 		    if (!$elf && $current_segment eq ".init") {
 			if	($flavour eq "macosx")	{ $self->{value} = ".mod_init_func"; }
 			elsif	($flavour eq "mingw64")	{ $self->{value} = ".section\t.ctors"; }
@@ -857,6 +930,8 @@ my %globals;
 		} elsif ($dir =~ /\.comm/) {
 		    $self->{value} = "$dir\t$prefix$$line";
 		    $self->{value} =~ s|,([0-9]+),([0-9]+)$|",$1,".log($2)/log(2)|e if ($flavour eq "macosx");
+		} elsif ($dir =~ /\.previous/) {
+		    $self->{value} = "" if ($flavour eq "mingw64");
 		}
 		$$line = "";
 		return $self;
@@ -866,10 +941,21 @@ my %globals;
 	    SWITCH: for ($dir) {
 		/\.text/    && do { my $v=undef;
 				    if ($nasm) {
+					$current_segment = pop(@segment_stack);
+					if (not $current_segment) {
+					    push(@segment_stack, ".text");
+				        }
 					$v="section	.text code align=64\n";
+					$current_segment = ".text";
+					push(@segment_stack, $current_segment);
 				    } else {
+					$current_segment = pop(@segment_stack);
+					if (not $current_segment) {
+					    push(@segment_stack, ".text\$");
+				        }
 					$v="$current_segment\tENDS\n" if ($current_segment);
 					$current_segment = ".text\$";
+					push(@segment_stack, $current_segment);
 					$v.="$current_segment\tSEGMENT ";
 					$v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE";
 					$v.=" 'CODE'";
@@ -881,36 +967,75 @@ my %globals;
 				    if ($nasm) {
 					$v="section	.data data align=8\n";
 				    } else {
+					$current_segment = pop(@segment_stack);
 					$v="$current_segment\tENDS\n" if ($current_segment);
 					$current_segment = "_DATA";
+					push(@segment_stack, $current_segment);
 					$v.="$current_segment\tSEGMENT";
 				    }
 				    $self->{value} = $v;
 				    last;
 				  };
 		/\.section/ && do { my $v=undef;
-				    $$line =~ s/([^,]*).*/$1/;
+				    my $align=undef;
+				    #
+				    # $$line may currently contain something like this
+				    #	.rodata align = 64
+				    # align part is optional
+				    #
+				    $align = $$line;
+				    $align =~ s/(.*)(align\s*=\s*\d+$)/$2/;
+				    $$line =~ s/(.*)(\s+align\s*=\s*\d+$)/$1/;
 				    $$line = ".CRT\$XCU" if ($$line eq ".init");
+				    $$line = ".rdata" if ($$line eq ".rodata");
 				    if ($nasm) {
+					$current_segment = pop(@segment_stack);
+					if (not $current_segment) {
+					    #
+					    # This is a hack which deals with ecp_nistz256-x86_64.pl,
+					    # The precomputed curve is stored in the first section
+					    # in .asm file. Pushing extra .text section here
+					    # allows our poor man's solution to stick to assumption
+					    # .text section is always the first.
+					    #
+					    push(@segment_stack, ".text");
+					}
 					$v="section	$$line";
-					if ($$line=~/\.([px])data/) {
-					    $v.=" rdata align=";
-					    $v.=$1 eq "p"? 4 : 8;
+					if ($$line=~/\.([prx])data/) {
+					    if ($align =~ /align\s*=\s*(\d+)/) {
+						$v.= " rdata align=$1" ;
+					    } else {
+						$v.=" rdata align=";
+						$v.=$1 eq "p"? 4 : 8;
+					    }
 					} elsif ($$line=~/\.CRT\$/i) {
 					    $v.=" rdata align=8";
 					}
 				    } else {
+					$current_segment = pop(@segment_stack);
+					if (not $current_segment) {
+					    #
+					    # same hack for masm to keep ecp_nistz256-x86_64.pl
+					    # happy.
+					    #
+					    push(@segment_stack, ".text\$");
+				        }
 					$v="$current_segment\tENDS\n" if ($current_segment);
 					$v.="$$line\tSEGMENT";
-					if ($$line=~/\.([px])data/) {
+					if ($$line=~/\.([prx])data/) {
 					    $v.=" READONLY";
-					    $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref);
+					    if ($align =~ /align\s*=\s*(\d+)$/) {
+						$v.=" ALIGN($1)" if ($masm>=$masmref);
+					    } else {
+						$v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref);
+					    }
 					} elsif ($$line=~/\.CRT\$/i) {
 					    $v.=" READONLY ";
 					    $v.=$masm>=$masmref ? "ALIGN(8)" : "DWORD";
 					}
 				    }
 				    $current_segment = $$line;
+				    push(@segment_stack, $$line);
 				    $self->{value} = $v;
 				    last;
 				  };
@@ -973,14 +1098,44 @@ my %globals;
 				    if ($nasm) {
 					$v.="common	$prefix@str[0] @str[1]";
 				    } else {
+					$current_segment = pop(@segment_stack);;
 					$v="$current_segment\tENDS\n" if ($current_segment);
 					$current_segment = "_DATA";
+					push(@segment_stack, $current_segment);
 					$v.="$current_segment\tSEGMENT\n";
 					$v.="COMM	@str[0]:DWORD:".@str[1]/4;
 				    }
 				    $self->{value} = $v;
 				    last;
 				  };
+		/^.previous/ && do {
+				    my $v=undef;
+				    if ($nasm) {
+					pop(@segment_stack); # pop ourselves, we don't need to emit END directive
+					# pop section so we can emit proper .section name.
+					$current_segment = pop(@segment_stack);
+					$v="section $current_segment";
+					# Hack again:
+					# push section/segment to stack. The .previous is currently paired
+					# with .rodata only. We have to keep extra '.text' on stack for
+					# situation where there is for example .pdata section 'terminated'
+					# by new '.text' section.
+					#
+					push(@segment_stack, $current_segment);
+				    } else {
+					$current_segment = pop(@segment_stack);
+					$v="$current_segment\tENDS\n" if ($current_segment);
+					$current_segment = pop(@segment_stack);
+					if ($current_segment =~ /\.text\$/) {
+					    $v.="$current_segment\tSEGMENT ";
+					    $v.=$masm>=$masmref ? "ALIGN(256)" : "PAGE";
+					    $v.=" 'CODE'";
+					    push(@segment_stack, $current_segment);
+					}
+				    }
+				    $self->{value} = $v;
+				    last;
+				    };
 	    }
 	    $$line = "";
 	}
author	Alexandr Nedvedicky <sashan@openssl.org>	2024-04-11 09:06:47 +0200
committer	Tomas Mraz <tomas@openssl.org>	2024-04-17 09:33:57 +0200
commit	fc807a0349bbddb00273465097177025d9b4e25e (patch)
tree	15888f716d5602b6571a90775c7ea9b151f886c4
parent	8d8a0144303374f69f73fc944dd55c68600d15e5 (diff)