aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/perlasm
diff options
context:
space:
mode:
Diffstat (limited to 'openssl/crypto/perlasm')
-rw-r--r--openssl/crypto/perlasm/cbc.pl351
-rw-r--r--openssl/crypto/perlasm/readme124
-rw-r--r--openssl/crypto/perlasm/x86_64-xlate.pl554
-rw-r--r--openssl/crypto/perlasm/x86asm.pl130
-rw-r--r--openssl/crypto/perlasm/x86ms.pl472
-rw-r--r--openssl/crypto/perlasm/x86nasm.pl455
-rw-r--r--openssl/crypto/perlasm/x86unix.pl725
7 files changed, 2811 insertions, 0 deletions
diff --git a/openssl/crypto/perlasm/cbc.pl b/openssl/crypto/perlasm/cbc.pl
new file mode 100644
index 000000000..e43dc9ae1
--- /dev/null
+++ b/openssl/crypto/perlasm/cbc.pl
@@ -0,0 +1,351 @@
+#!/usr/local/bin/perl
+
+# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc)
+# des_cblock (*input);
+# des_cblock (*output);
+# long length;
+# des_key_schedule schedule;
+# des_cblock (*ivec);
+# int enc;
+#
+# calls
+# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT);
+#
+
+#&cbc("des_ncbc_encrypt","des_encrypt",0);
+#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",
+# 1,4,5,3,5,-1);
+#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",
+# 0,4,5,3,5,-1);
+#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",
+# 0,6,7,3,4,5);
+#
+# When doing a cipher that needs bigendian order,
+# for encrypt, the iv is kept in bigendian form,
+# while for decrypt, it is kept in little endian.
+sub cbc
+ {
+ local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_;
+ # name is the function name
+ # enc_func and dec_func and the functions to call for encrypt/decrypt
+ # swap is true if byte order needs to be reversed
+ # iv_off is parameter number for the iv
+ # enc_off is parameter number for the encrypt/decrypt flag
+ # p1,p2,p3 are the offsets for parameters to be passed to the
+ # underlying calls.
+
+ &function_begin_B($name,"");
+ &comment("");
+
+ $in="esi";
+ $out="edi";
+ $count="ebp";
+
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
+
+ $data_off=4;
+ $data_off+=4 if ($p1 > 0);
+ $data_off+=4 if ($p2 > 0);
+ $data_off+=4 if ($p3 > 0);
+
+ &mov($count, &wparam(2)); # length
+
+ &comment("getting iv ptr from parameter $iv_off");
+ &mov("ebx", &wparam($iv_off)); # Get iv ptr
+
+ &mov($in, &DWP(0,"ebx","",0));# iv[0]
+ &mov($out, &DWP(4,"ebx","",0));# iv[1]
+
+ &push($out);
+ &push($in);
+ &push($out); # used in decrypt for iv[1]
+ &push($in); # used in decrypt for iv[0]
+
+ &mov("ebx", "esp"); # This is the address of tin[2]
+
+ &mov($in, &wparam(0)); # in
+ &mov($out, &wparam(1)); # out
+
+ # We have loaded them all, how lets push things
+ &comment("getting encrypt flag from parameter $enc_off");
+ &mov("ecx", &wparam($enc_off)); # Get enc flag
+ if ($p3 > 0)
+ {
+ &comment("get and push parameter $p3");
+ if ($enc_off != $p3)
+ { &mov("eax", &wparam($p3)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ if ($p2 > 0)
+ {
+ &comment("get and push parameter $p2");
+ if ($enc_off != $p2)
+ { &mov("eax", &wparam($p2)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ if ($p1 > 0)
+ {
+ &comment("get and push parameter $p1");
+ if ($enc_off != $p1)
+ { &mov("eax", &wparam($p1)); &push("eax"); }
+ else { &push("ecx"); }
+ }
+ &push("ebx"); # push data/iv
+
+ &cmp("ecx",0);
+ &jz(&label("decrypt"));
+
+ &and($count,0xfffffff8);
+ &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0]
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1]
+
+ &jz(&label("encrypt_finish"));
+
+ #############################################################
+
+ &set_label("encrypt_loop");
+ # encrypt start
+ # "eax" and "ebx" hold iv (or the last cipher text)
+
+ &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &xor("eax", "ecx");
+ &xor("ebx", "edx");
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($enc_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0));
+ &mov("ebx", &DWP($data_off+4,"esp","",0));
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP(0,$out,"",0),"eax");
+ &mov(&DWP(4,$out,"",0),"ebx");
+
+ # eax and ebx are the next iv.
+
+ &add($in, 8);
+ &add($out, 8);
+
+ &sub($count, 8);
+ &jnz(&label("encrypt_loop"));
+
+###################################################################3
+ &set_label("encrypt_finish");
+ &mov($count, &wparam(2)); # length
+ &and($count, 7);
+ &jz(&label("finish"));
+ &call(&label("PIC_point"));
+&set_label("PIC_point");
+ &blindpop("edx");
+ &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
+ &mov($count,&DWP(0,"ecx",$count,4))
+ &add($count,"edx");
+ &xor("ecx","ecx");
+ &xor("edx","edx");
+ #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
+ &jmp_ptr($count);
+
+&set_label("ej7");
+ &xor("edx", "edx") if $ppro; # ppro friendly
+ &movb(&HB("edx"), &BP(6,$in,"",0));
+ &shl("edx",8);
+&set_label("ej6");
+ &movb(&HB("edx"), &BP(5,$in,"",0));
+&set_label("ej5");
+ &movb(&LB("edx"), &BP(4,$in,"",0));
+&set_label("ej4");
+ &mov("ecx", &DWP(0,$in,"",0));
+ &jmp(&label("ejend"));
+&set_label("ej3");
+ &movb(&HB("ecx"), &BP(2,$in,"",0));
+ &xor("ecx", "ecx") if $ppro; # ppro friendly
+ &shl("ecx",8);
+&set_label("ej2");
+ &movb(&HB("ecx"), &BP(1,$in,"",0));
+&set_label("ej1");
+ &movb(&LB("ecx"), &BP(0,$in,"",0));
+&set_label("ejend");
+
+ &xor("eax", "ecx");
+ &xor("ebx", "edx");
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($enc_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0));
+ &mov("ebx", &DWP($data_off+4,"esp","",0));
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP(0,$out,"",0),"eax");
+ &mov(&DWP(4,$out,"",0),"ebx");
+
+ &jmp(&label("finish"));
+
+ #############################################################
+ #############################################################
+ &set_label("decrypt",1);
+ # decrypt start
+ &and($count,0xfffffff8);
+ # The next 2 instructions are only for if the jz is taken
+ &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+ &jz(&label("decrypt_finish"));
+
+ &set_label("decrypt_loop");
+ &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put back
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($dec_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0)); # get return
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); #
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+
+ &xor("ecx", "eax");
+ &xor("edx", "ebx");
+
+ &mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
+ &mov("ebx", &DWP(4,$in,"",0)); # next iv actually
+
+ &mov(&DWP(0,$out,"",0),"ecx");
+ &mov(&DWP(4,$out,"",0),"edx");
+
+ &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv
+ &mov(&DWP($data_off+12,"esp","",0), "ebx"); #
+
+ &add($in, 8);
+ &add($out, 8);
+
+ &sub($count, 8);
+ &jnz(&label("decrypt_loop"));
+############################ ENDIT #######################3
+ &set_label("decrypt_finish");
+ &mov($count, &wparam(2)); # length
+ &and($count, 7);
+ &jz(&label("finish"));
+
+ &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes
+ &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov(&DWP($data_off,"esp","",0), "eax"); # put back
+ &mov(&DWP($data_off+4,"esp","",0), "ebx"); #
+
+ &call($dec_func);
+
+ &mov("eax", &DWP($data_off,"esp","",0)); # get return
+ &mov("ebx", &DWP($data_off+4,"esp","",0)); #
+
+ &bswap("eax") if $swap;
+ &bswap("ebx") if $swap;
+
+ &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0]
+ &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1]
+
+ &xor("ecx", "eax");
+ &xor("edx", "ebx");
+
+ # this is for when we exit
+ &mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
+ &mov("ebx", &DWP(4,$in,"",0)); # next iv actually
+
+&set_label("dj7");
+ &rotr("edx", 16);
+ &movb(&BP(6,$out,"",0), &LB("edx"));
+ &shr("edx",16);
+&set_label("dj6");
+ &movb(&BP(5,$out,"",0), &HB("edx"));
+&set_label("dj5");
+ &movb(&BP(4,$out,"",0), &LB("edx"));
+&set_label("dj4");
+ &mov(&DWP(0,$out,"",0), "ecx");
+ &jmp(&label("djend"));
+&set_label("dj3");
+ &rotr("ecx", 16);
+ &movb(&BP(2,$out,"",0), &LB("ecx"));
+ &shl("ecx",16);
+&set_label("dj2");
+ &movb(&BP(1,$in,"",0), &HB("ecx"));
+&set_label("dj1");
+ &movb(&BP(0,$in,"",0), &LB("ecx"));
+&set_label("djend");
+
+ # final iv is still in eax:ebx
+ &jmp(&label("finish"));
+
+
+############################ FINISH #######################3
+ &set_label("finish",1);
+ &mov("ecx", &wparam($iv_off)); # Get iv ptr
+
+ #################################################
+ $total=16+4;
+ $total+=4 if ($p1 > 0);
+ $total+=4 if ($p2 > 0);
+ $total+=4 if ($p3 > 0);
+ &add("esp",$total);
+
+ &mov(&DWP(0,"ecx","",0), "eax"); # save iv
+ &mov(&DWP(4,"ecx","",0), "ebx"); # save iv
+
+ &function_end_A($name);
+
+ &align(64);
+ &set_label("cbc_enc_jmp_table");
+ &data_word("0");
+ &data_word(&label("ej1")."-".&label("PIC_point"));
+ &data_word(&label("ej2")."-".&label("PIC_point"));
+ &data_word(&label("ej3")."-".&label("PIC_point"));
+ &data_word(&label("ej4")."-".&label("PIC_point"));
+ &data_word(&label("ej5")."-".&label("PIC_point"));
+ &data_word(&label("ej6")."-".&label("PIC_point"));
+ &data_word(&label("ej7")."-".&label("PIC_point"));
+ # not used
+ #&set_label("cbc_dec_jmp_table",1);
+ #&data_word("0");
+ #&data_word(&label("dj1")."-".&label("PIC_point"));
+ #&data_word(&label("dj2")."-".&label("PIC_point"));
+ #&data_word(&label("dj3")."-".&label("PIC_point"));
+ #&data_word(&label("dj4")."-".&label("PIC_point"));
+ #&data_word(&label("dj5")."-".&label("PIC_point"));
+ #&data_word(&label("dj6")."-".&label("PIC_point"));
+ #&data_word(&label("dj7")."-".&label("PIC_point"));
+ &align(64);
+
+ &function_end_B($name);
+
+ }
+
+1;
diff --git a/openssl/crypto/perlasm/readme b/openssl/crypto/perlasm/readme
new file mode 100644
index 000000000..f02bbee75
--- /dev/null
+++ b/openssl/crypto/perlasm/readme
@@ -0,0 +1,124 @@
+The perl scripts in this directory are my 'hack' to generate
+multiple different assembler formats via the one origional script.
+
+The way to use this library is to start with adding the path to this directory
+and then include it.
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+The first thing we do is setup the file and type of assember
+
+&asm_init($ARGV[0],$0);
+
+The first argument is the 'type'. Currently
+'cpp', 'sol', 'a.out', 'elf' or 'win32'.
+Argument 2 is the file name.
+
+The reciprocal function is
+&asm_finish() which should be called at the end.
+
+There are 2 main 'packages'. x86ms.pl, which is the microsoft assembler,
+and x86unix.pl which is the unix (gas) version.
+
+Functions of interest are:
+&external_label("des_SPtrans"); declare and external variable
+&LB(reg); Low byte for a register
+&HB(reg); High byte for a register
+&BP(off,base,index,scale) Byte pointer addressing
+&DWP(off,base,index,scale) Word pointer addressing
+&stack_push(num) Basically a 'sub esp, num*4' with extra
+&stack_pop(num) inverse of stack_push
+&function_begin(name,extra) Start a function with pushing of
+ edi, esi, ebx and ebp. extra is extra win32
+ external info that may be required.
+&function_begin_B(name,extra) Same as norma function_begin but no pushing.
+&function_end(name) Call at end of function.
+&function_end_A(name) Standard pop and ret, for use inside functions
+&function_end_B(name) Call at end but with poping or 'ret'.
+&swtmp(num) Address on stack temp word.
+&wparam(num) Parameter number num, that was push
+ in C convention. This all works over pushes
+ and pops.
+&comment("hello there") Put in a comment.
+&label("loop") Refer to a label, normally a jmp target.
+&set_label("loop") Set a label at this point.
+&data_word(word) Put in a word of data.
+
+So how does this all hold together? Given
+
+int calc(int len, int *data)
+ {
+ int i,j=0;
+
+ for (i=0; i<len; i++)
+ {
+ j+=other(data[i]);
+ }
+ }
+
+So a very simple version of this function could be coded as
+
+ push(@INC,"perlasm","../../perlasm");
+ require "x86asm.pl";
+
+ &asm_init($ARGV[0],"cacl.pl");
+
+ &external_label("other");
+
+ $tmp1= "eax";
+ $j= "edi";
+ $data= "esi";
+ $i= "ebp";
+
+ &comment("a simple function");
+ &function_begin("calc");
+ &mov( $data, &wparam(1)); # data
+ &xor( $j, $j);
+ &xor( $i, $i);
+
+ &set_label("loop");
+ &cmp( $i, &wparam(0));
+ &jge( &label("end"));
+
+ &mov( $tmp1, &DWP(0,$data,$i,4));
+ &push( $tmp1);
+ &call( "other");
+ &add( $j, "eax");
+ &pop( $tmp1);
+ &inc( $i);
+ &jmp( &label("loop"));
+
+ &set_label("end");
+ &mov( "eax", $j);
+
+ &function_end("calc");
+
+ &asm_finish();
+
+The above example is very very unoptimised but gives an idea of how
+things work.
+
+There is also a cbc mode function generator in cbc.pl
+
+&cbc( $name,
+ $encrypt_function_name,
+ $decrypt_function_name,
+ $true_if_byte_swap_needed,
+ $parameter_number_for_iv,
+ $parameter_number_for_encrypt_flag,
+ $first_parameter_to_pass,
+ $second_parameter_to_pass,
+ $third_parameter_to_pass);
+
+So for example, given
+void BF_encrypt(BF_LONG *data,BF_KEY *key);
+void BF_decrypt(BF_LONG *data,BF_KEY *key);
+void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length,
+ BF_KEY *ks, unsigned char *iv, int enc);
+
+&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1);
+
+&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1);
+&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5);
+
diff --git a/openssl/crypto/perlasm/x86_64-xlate.pl b/openssl/crypto/perlasm/x86_64-xlate.pl
new file mode 100644
index 000000000..a4af769b4
--- /dev/null
+++ b/openssl/crypto/perlasm/x86_64-xlate.pl
@@ -0,0 +1,554 @@
+#!/usr/bin/env perl
+
+# Ascetic x86_64 AT&T to MASM assembler translator by <appro>.
+#
+# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T
+# format is way easier to parse. Because it's simpler to "gear" from
+# Unix ABI to Windows one [see cross-reference "card" at the end of
+# file]. Because Linux targets were available first...
+#
+# In addition the script also "distills" code suitable for GNU
+# assembler, so that it can be compiled with more rigid assemblers,
+# such as Solaris /usr/ccs/bin/as.
+#
+# This translator is not designed to convert *arbitrary* assembler
+# code from AT&T format to MASM one. It's designed to convert just
+# enough to provide for dual-ABI OpenSSL modules development...
+# There *are* limitations and you might have to modify your assembler
+# code or this script to achieve the desired result...
+#
+# Currently recognized limitations:
+#
+# - can't use multiple ops per line;
+# - indirect calls and jumps are not supported;
+#
+# Dual-ABI styling rules.
+#
+# 1. Adhere to Unix register and stack layout [see the end for
+# explanation].
+# 2. Forget about "red zone," stick to more traditional blended
+# stack frame allocation. If volatile storage is actually required
+# that is. If not, just leave the stack as is.
+# 3. Functions tagged with ".type name,@function" get crafted with
+# unified Win64 prologue and epilogue automatically. If you want
+# to take care of ABI differences yourself, tag functions as
+# ".type name,@abi-omnipotent" instead.
+# 4. To optimize the Win64 prologue you can specify number of input
+# arguments as ".type name,@function,N." Keep in mind that if N is
+# larger than 6, then you *have to* write "abi-omnipotent" code,
+# because >6 cases can't be addressed with unified prologue.
+# 5. Name local labels as .L*, do *not* use dynamic labels such as 1:
+# (sorry about latter).
+# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is
+# required to identify the spots, where to inject Win64 epilogue!
+# But on the pros, it's then prefixed with rep automatically:-)
+# 7. Due to MASM limitations [and certain general counter-intuitivity
+# of ip-relative addressing] generation of position-independent
+# code is assisted by synthetic directive, .picmeup, which puts
+# address of the *next* instruction into target register.
+#
+# Example 1:
+# .picmeup %rax
+# lea .Label-.(%rax),%rax
+# Example 2:
+# .picmeup %rcx
+# .Lpic_point:
+# ...
+# lea .Label-.Lpic_point(%rcx),%rbp
+
+my $output = shift;
+
+{ my ($stddev,$stdino,@junk)=stat(STDOUT);
+ my ($outdev,$outino,@junk)=stat($output);
+
+ open STDOUT,">$output" || die "can't open $output: $!"
+ if ($stddev!=$outdev || $stdino!=$outino);
+}
+
+my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005
+my $masm=$masmref if ($output =~ /\.asm/);
+if ($masm && `ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/)
+{ $masm=$1 + $2*2**-16 + $4*2**-32; }
+
+my $current_segment;
+my $current_function;
+
+{ package opcode; # pick up opcodes
+ sub re {
+ my $self = shift; # single instance in enough...
+ local *line = shift;
+ undef $ret;
+
+ if ($line =~ /^([a-z][a-z0-9]*)/i) {
+ $self->{op} = $1;
+ $ret = $self;
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+
+ undef $self->{sz};
+ if ($self->{op} =~ /^(movz)b.*/) { # movz is pain...
+ $self->{op} = $1;
+ $self->{sz} = "b";
+ } elsif ($self->{op} =~ /call/) {
+ $self->{sz} = ""
+ } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
+ $self->{op} = $1;
+ $self->{sz} = $2;
+ }
+ }
+ $ret;
+ }
+ sub size {
+ my $self = shift;
+ my $sz = shift;
+ $self->{sz} = $sz if (defined($sz) && !defined($self->{sz}));
+ $self->{sz};
+ }
+ sub out {
+ my $self = shift;
+ if (!$masm) {
+ if ($self->{op} eq "movz") { # movz is pain...
+ sprintf "%s%s%s",$self->{op},$self->{sz},shift;
+ } elsif ($self->{op} =~ /^set/) {
+ "$self->{op}";
+ } elsif ($self->{op} eq "ret") {
+ ".byte 0xf3,0xc3";
+ } else {
+ "$self->{op}$self->{sz}";
+ }
+ } else {
+ $self->{op} =~ s/^movz/movzx/;
+ if ($self->{op} eq "ret") {
+ $self->{op} = "";
+ if ($current_function->{abi} eq "svr4") {
+ $self->{op} = "mov rdi,QWORD PTR 8[rsp]\t;WIN64 epilogue\n\t".
+ "mov rsi,QWORD PTR 16[rsp]\n\t";
+ }
+ $self->{op} .= "DB\t0F3h,0C3h\t\t;repret";
+ }
+ $self->{op};
+ }
+ }
+}
+{ package const; # pick up constants, which start with $
+ sub re {
+ my $self = shift; # single instance in enough...
+ local *line = shift;
+ undef $ret;
+
+ if ($line =~ /^\$([^,]+)/) {
+ $self->{value} = $1;
+ $ret = $self;
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+ }
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+
+ if (!$masm) {
+ # Solaris /usr/ccs/bin/as can't handle multiplications
+ # in $self->{value}
+ $self->{value} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi;
+ $self->{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
+ sprintf "\$%s",$self->{value};
+ } else {
+ $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig;
+ sprintf "%s",$self->{value};
+ }
+ }
+}
+{ package ea; # pick up effective addresses: expr(%reg,%reg,scale)
+ sub re {
+ my $self = shift; # single instance in enough...
+ local *line = shift;
+ undef $ret;
+
+ if ($line =~ /^([^\(,]*)\(([%\w,]+)\)/) {
+ $self->{label} = $1;
+ ($self->{base},$self->{index},$self->{scale})=split(/,/,$2);
+ $self->{scale} = 1 if (!defined($self->{scale}));
+ $ret = $self;
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+
+ $self->{base} =~ s/^%//;
+ $self->{index} =~ s/^%// if (defined($self->{index}));
+ }
+ $ret;
+ }
+ sub size {}
+ sub out {
+ my $self = shift;
+ my $sz = shift;
+
+ # Silently convert all EAs to 64-bit. This is required for
+ # elder GNU assembler and results in more compact code,
+ # *but* most importantly AES module depends on this feature!
+ $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
+ $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
+
+ if (!$masm) {
+ # Solaris /usr/ccs/bin/as can't handle multiplications
+ # in $self->{label}
+ $self->{label} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi;
+ $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
+
+ if (defined($self->{index})) {
+ sprintf "%s(%%%s,%%%s,%d)",
+ $self->{label},$self->{base},
+ $self->{index},$self->{scale};
+ } else {
+ sprintf "%s(%%%s)", $self->{label},$self->{base};
+ }
+ } else {
+ %szmap = ( b=>"BYTE", w=>"WORD", l=>"DWORD", q=>"QWORD" );
+
+ $self->{label} =~ s/\./\$/g;
+ $self->{label} =~ s/0x([0-9a-f]+)/0$1h/ig;
+ $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
+
+ if (defined($self->{index})) {
+ sprintf "%s PTR %s[%s*%d+%s]",$szmap{$sz},
+ $self->{label},
+ $self->{index},$self->{scale},
+ $self->{base};
+ } elsif ($self->{base} eq "rip") {
+ sprintf "%s PTR %s",$szmap{$sz},$self->{label};
+ } else {
+ sprintf "%s PTR %s[%s]",$szmap{$sz},
+ $self->{label},$self->{base};
+ }
+ }
+ }
+}
+{ package register; # pick up registers, which start with %.
+ sub re {
+ my $class = shift; # muliple instances...
+ my $self = {};
+ local *line = shift;
+ undef $ret;
+
+ if ($line =~ /^%(\w+)/) {
+ bless $self,$class;
+ $self->{value} = $1;
+ $ret = $self;
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+ }
+ $ret;
+ }
+ sub size {
+ my $self = shift;
+ undef $ret;
+
+ if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; }
+ elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; }
+ elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; }
+ elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; }
+ elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; }
+ elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; }
+ elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; }
+ elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; }
+
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+ sprintf $masm?"%s":"%%%s",$self->{value};
+ }
+}
+{ package label; # pick up labels, which end with :
+ sub re {
+ my $self = shift; # single instance is enough...
+ local *line = shift;
+ undef $ret;
+
+ if ($line =~ /(^[\.\w]+\:)/) {
+ $self->{value} = $1;
+ $ret = $self;
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+
+ $self->{value} =~ s/\.L/\$L/ if ($masm);
+ }
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+
+ if (!$masm) {
+ $self->{value};
+ } elsif ($self->{value} ne "$current_function->{name}:") {
+ $self->{value};
+ } elsif ($current_function->{abi} eq "svr4") {
+ my $func = "$current_function->{name} PROC\n".
+ " mov QWORD PTR 8[rsp],rdi\t;WIN64 prologue\n".
+ " mov QWORD PTR 16[rsp],rsi\n";
+ my $narg = $current_function->{narg};
+ $narg=6 if (!defined($narg));
+ $func .= " mov rdi,rcx\n" if ($narg>0);
+ $func .= " mov rsi,rdx\n" if ($narg>1);
+ $func .= " mov rdx,r8\n" if ($narg>2);
+ $func .= " mov rcx,r9\n" if ($narg>3);
+ $func .= " mov r8,QWORD PTR 40[rsp]\n" if ($narg>4);
+ $func .= " mov r9,QWORD PTR 48[rsp]\n" if ($narg>5);
+ $func .= "\n";
+ } else {
+ "$current_function->{name} PROC";
+ }
+ }
+}
+{ package expr; # pick up expressioins
+ sub re {
+ my $self = shift; # single instance is enough...
+ local *line = shift;
+ undef $ret;
+
+ if ($line =~ /(^[^,]+)/) {
+ $self->{value} = $1;
+ $ret = $self;
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+
+ $self->{value} =~ s/\.L/\$L/g if ($masm);
+ }
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+ $self->{value};
+ }
+}
+{ package directive; # pick up directives, which start with .
+ sub re {
+ my $self = shift; # single instance is enough...
+ local *line = shift;
+ undef $ret;
+ my $dir;
+ my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2:
+ ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48,
+ "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48,
+ "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48,
+ "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48,
+ "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c,
+ "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c,
+ "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c,
+ "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c );
+
+ if ($line =~ /^\s*(\.\w+)/) {
+ if (!$masm) {
+ $self->{value} = $1;
+ $line =~ s/\@abi\-omnipotent/\@function/;
+ $line =~ s/\@function.*/\@function/;
+ if ($line =~ /\.picmeup\s+(%r[\w]+)/i) {
+ $self->{value} = sprintf "\t.long\t0x%x,0x90000000",$opcode{$1};
+ } elsif ($line =~ /\.asciz\s+"(.*)"$/) {
+ $self->{value} = ".byte\t".join(",",unpack("C*",$1),0);
+ } elsif ($line =~ /\.extern/) {
+ $self->{value} = ""; # swallow extern
+ } else {
+ $self->{value} = $line;
+ }
+ $line = "";
+ return $self;
+ }
+
+ $dir = $1;
+ $ret = $self;
+ undef $self->{value};
+ $line = substr($line,@+[0]); $line =~ s/^\s+//;
+ SWITCH: for ($dir) {
+ /\.(text)/
+ && do { my $v=undef;
+ $v="$current_segment\tENDS\n" if ($current_segment);
+ $current_segment = "_$1\$";
+ $current_segment =~ tr/[a-z]/[A-Z]/;
+ $v.="$current_segment\tSEGMENT ";
+ $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE";
+ $v.=" 'CODE'";
+ $self->{value} = $v;
+ last;
+ };
+ /\.extern/ && do { $self->{value} = "EXTRN\t".$line.":BYTE"; last; };
+ /\.globl/ && do { $self->{value} = "PUBLIC\t".$line; last; };
+ /\.type/ && do { ($sym,$type,$narg) = split(',',$line);
+ if ($type eq "\@function") {
+ undef $current_function;
+ $current_function->{name} = $sym;
+ $current_function->{abi} = "svr4";
+ $current_function->{narg} = $narg;
+ } elsif ($type eq "\@abi-omnipotent") {
+ undef $current_function;
+ $current_function->{name} = $sym;
+ }
+ last;
+ };
+ /\.size/ && do { if (defined($current_function)) {
+ $self->{value}="$current_function->{name}\tENDP";
+ undef $current_function;
+ }
+ last;
+ };
+ /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; };
+ /\.(byte|value|long|quad)/
+ && do { my @arr = split(',',$line);
+ my $sz = substr($1,0,1);
+ my $last = pop(@arr);
+ my $conv = sub { my $var=shift;
+ if ($var=~s/0x([0-9a-f]+)/0$1h/i) { $var; }
+ else { sprintf"0%Xh",$var; }
+ };
+
+ $sz =~ tr/bvlq/BWDQ/;
+ $self->{value} = "\tD$sz\t";
+ for (@arr) { $self->{value} .= &$conv($_).","; }
+ $self->{value} .= &$conv($last);
+ last;
+ };
+ /\.picmeup/ && do { $self->{value} = sprintf"\tDD\t 0%Xh,090000000h",$opcode{$line};
+ last;
+ };
+ /\.asciz/ && do { if ($line =~ /^"(.*)"$/) {
+ my @str=unpack("C*",$1);
+ push @str,0;
+ while ($#str>15) {
+ $self->{value}.="DB\t"
+ .join(",",@str[0..15])."\n";
+ foreach (0..15) { shift @str; }
+ }
+ $self->{value}.="DB\t"
+ .join(",",@str) if (@str);
+ }
+ last;
+ };
+ }
+ $line = "";
+ }
+
+ $ret;
+ }
+ sub out {
+ my $self = shift;
+ $self->{value};
+ }
+}
+
+while($line=<>) {
+
+ chomp($line);
+
+ $line =~ s|[#!].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning
+
+ undef $label;
+ undef $opcode;
+ undef $dst;
+ undef $src;
+ undef $sz;
+
+ if ($label=label->re(\$line)) { print $label->out(); }
+
+ if (directive->re(\$line)) {
+ printf "%s",directive->out();
+ } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: {
+
+ if ($src=register->re(\$line)) { opcode->size($src->size()); }
+ elsif ($src=const->re(\$line)) { }
+ elsif ($src=ea->re(\$line)) { }
+ elsif ($src=expr->re(\$line)) { }
+
+ last ARGUMENT if ($line !~ /^,/);
+
+ $line = substr($line,1); $line =~ s/^\s+//;
+
+ if ($dst=register->re(\$line)) { opcode->size($dst->size()); }
+ elsif ($dst=const->re(\$line)) { }
+ elsif ($dst=ea->re(\$line)) { }
+
+ } # ARGUMENT:
+
+ $sz=opcode->size();
+
+ if (defined($dst)) {
+ if (!$masm) {
+ printf "\t%s\t%s,%s", $opcode->out($dst->size()),
+ $src->out($sz),$dst->out($sz);
+ } else {
+ printf "\t%s\t%s,%s", $opcode->out(),
+ $dst->out($sz),$src->out($sz);
+ }
+ } elsif (defined($src)) {
+ printf "\t%s\t%s",$opcode->out(),$src->out($sz);
+ } else {
+ printf "\t%s",$opcode->out();
+ }
+ }
+
+ print $line,"\n";
+}
+
+print "\n$current_segment\tENDS\nEND\n" if ($masm);
+
+close STDOUT;
+
+#################################################
+# Cross-reference x86_64 ABI "card"
+#
+# Unix Win64
+# %rax * *
+# %rbx - -
+# %rcx #4 #1
+# %rdx #3 #2
+# %rsi #2 -
+# %rdi #1 -
+# %rbp - -
+# %rsp - -
+# %r8 #5 #3
+# %r9 #6 #4
+# %r10 * *
+# %r11 * *
+# %r12 - -
+# %r13 - -
+# %r14 - -
+# %r15 - -
+#
+# (*) volatile register
+# (-) preserved by callee
+# (#) Nth argument, volatile
+#
+# In Unix terms top of stack is argument transfer area for arguments
+# which could not be accomodated in registers. Or in other words 7th
+# [integer] argument resides at 8(%rsp) upon function entry point.
+# 128 bytes above %rsp constitute a "red zone" which is not touched
+# by signal handlers and can be used as temporal storage without
+# allocating a frame.
+#
+# In Win64 terms N*8 bytes on top of stack is argument transfer area,
+# which belongs to/can be overwritten by callee. N is the number of
+# arguments passed to callee, *but* not less than 4! This means that
+# upon function entry point 5th argument resides at 40(%rsp), as well
+# as that 32 bytes from 8(%rsp) can always be used as temporal
+# storage [without allocating a frame]. One can actually argue that
+# one can assume a "red zone" above stack pointer under Win64 as well.
+# Point is that at apparently no occasion Windows kernel would alter
+# the area above user stack pointer in true asynchronous manner...
+#
+# All the above means that if assembler programmer adheres to Unix
+# register and stack layout, but disregards the "red zone" existense,
+# it's possible to use following prologue and epilogue to "gear" from
+# Unix to Win64 ABI in leaf functions with not more than 6 arguments.
+#
+# omnipotent_function:
+# ifdef WIN64
+# movq %rdi,8(%rsp)
+# movq %rsi,16(%rsp)
+# movq %rcx,%rdi ; if 1st argument is actually present
+# movq %rdx,%rsi ; if 2nd argument is actually ...
+# movq %r8,%rdx ; if 3rd argument is ...
+# movq %r9,%rcx ; if 4th argument ...
+# movq 40(%rsp),%r8 ; if 5th ...
+# movq 48(%rsp),%r9 ; if 6th ...
+# endif
+# ...
+# ifdef WIN64
+# movq 8(%rsp),%rdi
+# movq 16(%rsp),%rsi
+# endif
+# ret
diff --git a/openssl/crypto/perlasm/x86asm.pl b/openssl/crypto/perlasm/x86asm.pl
new file mode 100644
index 000000000..597912215
--- /dev/null
+++ b/openssl/crypto/perlasm/x86asm.pl
@@ -0,0 +1,130 @@
+#!/usr/local/bin/perl
+
+# require 'x86asm.pl';
+# &asm_init("cpp","des-586.pl");
+# XXX
+# XXX
+# main'asm_finish
+
+sub main'asm_finish
+ {
+ &file_end();
+ &asm_finish_cpp() if $cpp;
+ print &asm_get_output();
+ }
+
+sub main'asm_init
+ {
+ ($type,$fn,$i386)=@_;
+ $filename=$fn;
+
+ $elf=$cpp=$coff=$aout=$win32=$netware=$mwerks=0;
+ if ( ($type eq "elf"))
+ { $elf=1; require "x86unix.pl"; }
+ elsif ( ($type eq "a.out"))
+ { $aout=1; require "x86unix.pl"; }
+ elsif ( ($type eq "coff" or $type eq "gaswin"))
+ { $coff=1; require "x86unix.pl"; }
+ elsif ( ($type eq "cpp"))
+ { $cpp=1; require "x86unix.pl"; }
+ elsif ( ($type eq "win32"))
+ { $win32=1; require "x86ms.pl"; }
+ elsif ( ($type eq "win32n"))
+ { $win32=1; require "x86nasm.pl"; }
+ elsif ( ($type eq "nw-nasm"))
+ { $netware=1; require "x86nasm.pl"; }
+ elsif ( ($type eq "nw-mwasm"))
+ { $netware=1; $mwerks=1; require "x86nasm.pl"; }
+ else
+ {
+ print STDERR <<"EOF";
+Pick one target type from
+ elf - Linux, FreeBSD, Solaris x86, etc.
+ a.out - OpenBSD, DJGPP, etc.
+ coff - GAS/COFF such as Win32 targets
+ win32 - Windows 95/Windows NT
+ win32n - Windows 95/Windows NT NASM format
+ nw-nasm - NetWare NASM format
+ nw-mwasm- NetWare Metrowerks Assembler
+EOF
+ exit(1);
+ }
+
+ $pic=0;
+ for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); }
+
+ &asm_init_output();
+
+&comment("Don't even think of reading this code");
+&comment("It was automatically generated by $filename");
+&comment("Which is a perl program used to generate the x86 assember for");
+&comment("any of ELF, a.out, COFF, Win32, ...");
+&comment("eric <eay\@cryptsoft.com>");
+&comment("");
+
+ $filename =~ s/\.pl$//;
+ &file($filename);
+ }
+
+sub asm_finish_cpp
+ {
+ return unless $cpp;
+
+ local($tmp,$i);
+ foreach $i (&get_labels())
+ {
+ $tmp.="#define $i _$i\n";
+ }
+ print <<"EOF";
+/* Run the C pre-processor over this file with one of the following defined
+ * ELF - elf object files,
+ * OUT - a.out object files,
+ * BSDI - BSDI style a.out object files
+ * SOL - Solaris style elf
+ */
+
+#define TYPE(a,b) .type a,b
+#define SIZE(a,b) .size a,b
+
+#if defined(OUT) || (defined(BSDI) && !defined(ELF))
+$tmp
+#endif
+
+#ifdef OUT
+#define OK 1
+#define ALIGN 4
+#if defined(__CYGWIN__) || defined(__DJGPP__) || (__MINGW32__)
+#undef SIZE
+#undef TYPE
+#define SIZE(a,b)
+#define TYPE(a,b) .def a; .scl 2; .type 32; .endef
+#endif /* __CYGWIN || __DJGPP */
+#endif
+
+#if defined(BSDI) && !defined(ELF)
+#define OK 1
+#define ALIGN 4
+#undef SIZE
+#undef TYPE
+#define SIZE(a,b)
+#define TYPE(a,b)
+#endif
+
+#if defined(ELF) || defined(SOL)
+#define OK 1
+#define ALIGN 16
+#endif
+
+#ifndef OK
+You need to define one of
+ELF - elf systems - linux-elf, NetBSD and DG-UX
+OUT - a.out systems - linux-a.out and FreeBSD
+SOL - solaris systems, which are elf with strange comment lines
+BSDI - a.out with a very primative version of as.
+#endif
+
+/* Let the Assembler begin :-) */
+EOF
+ }
+
+1;
diff --git a/openssl/crypto/perlasm/x86ms.pl b/openssl/crypto/perlasm/x86ms.pl
new file mode 100644
index 000000000..a0be2934c
--- /dev/null
+++ b/openssl/crypto/perlasm/x86ms.pl
@@ -0,0 +1,472 @@
+#!/usr/local/bin/perl
+
+package x86ms;
+
+$label="L000";
+
+%lb=( 'eax', 'al',
+ 'ebx', 'bl',
+ 'ecx', 'cl',
+ 'edx', 'dl',
+ 'ax', 'al',
+ 'bx', 'bl',
+ 'cx', 'cl',
+ 'dx', 'dl',
+ );
+
+%hb=( 'eax', 'ah',
+ 'ebx', 'bh',
+ 'ecx', 'ch',
+ 'edx', 'dh',
+ 'ax', 'ah',
+ 'bx', 'bh',
+ 'cx', 'ch',
+ 'dx', 'dh',
+ );
+
+sub main'asm_init_output { @out=(); }
+sub main'asm_get_output { return(@out); }
+sub main'get_labels { return(@labels); }
+sub main'external_label
+{
+ push(@labels,@_);
+ foreach (@_) {
+ push(@out, "EXTRN\t_$_:DWORD\n");
+ }
+}
+
+sub main'LB
+ {
+ (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
+ return($lb{$_[0]});
+ }
+
+sub main'HB
+ {
+ (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
+ return($hb{$_[0]});
+ }
+
+sub main'BP
+ {
+ &get_mem("BYTE",@_);
+ }
+
+sub main'DWP
+ {
+ &get_mem("DWORD",@_);
+ }
+
+sub main'QWP
+ {
+ &get_mem("QWORD",@_);
+ }
+
+sub main'BC
+ {
+ return @_;
+ }
+
+sub main'DWC
+ {
+ return @_;
+ }
+
+sub main'stack_push
+ {
+ local($num)=@_;
+ $stack+=$num*4;
+ &main'sub("esp",$num*4);
+ }
+
+sub main'stack_pop
+ {
+ local($num)=@_;
+ $stack-=$num*4;
+ &main'add("esp",$num*4);
+ }
+
+sub get_mem
+ {
+ local($size,$addr,$reg1,$reg2,$idx)=@_;
+ local($t,$post);
+ local($ret)="$size PTR ";
+
+ $addr =~ s/^\s+//;
+ if ($addr =~ /^(.+)\+(.+)$/)
+ {
+ $reg2=&conv($1);
+ $addr="_$2";
+ }
+ elsif ($addr =~ /^[_a-z][_a-z0-9]*$/i)
+ {
+ $addr="_$addr";
+ }
+
+ if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; }
+
+ $reg1="$regs{$reg1}" if defined($regs{$reg1});
+ $reg2="$regs{$reg2}" if defined($regs{$reg2});
+ if (($addr ne "") && ($addr ne 0))
+ {
+ if ($addr !~ /^-/)
+ { $ret.=$addr; }
+ else { $post=$addr; }
+ }
+ if ($reg2 ne "")
+ {
+ $t="";
+ $t="*$idx" if ($idx != 0);
+ $reg1="+".$reg1 if ("$reg1$post" ne "");
+ $ret.="[$reg2$t$reg1$post]";
+ }
+ else
+ {
+ $ret.="[$reg1$post]"
+ }
+ $ret =~ s/\[\]//; # in case $addr was the only argument
+ return($ret);
+ }
+
+sub main'mov { &out2("mov",@_); }
+sub main'movb { &out2("mov",@_); }
+sub main'and { &out2("and",@_); }
+sub main'or { &out2("or",@_); }
+sub main'shl { &out2("shl",@_); }
+sub main'shr { &out2("shr",@_); }
+sub main'xor { &out2("xor",@_); }
+sub main'xorb { &out2("xor",@_); }
+sub main'add { &out2("add",@_); }
+sub main'adc { &out2("adc",@_); }
+sub main'sub { &out2("sub",@_); }
+sub main'sbb { &out2("sbb",@_); }
+sub main'rotl { &out2("rol",@_); }
+sub main'rotr { &out2("ror",@_); }
+sub main'exch { &out2("xchg",@_); }
+sub main'cmp { &out2("cmp",@_); }
+sub main'lea { &out2("lea",@_); }
+sub main'mul { &out1("mul",@_); }
+sub main'imul { &out2("imul",@_); }
+sub main'div { &out1("div",@_); }
+sub main'dec { &out1("dec",@_); }
+sub main'inc { &out1("inc",@_); }
+sub main'jmp { &out1("jmp",@_); }
+sub main'jmp_ptr { &out1p("jmp",@_); }
+sub main'je { &out1("je",@_); }
+sub main'jle { &out1("jle",@_); }
+sub main'jz { &out1("jz",@_); }
+sub main'jge { &out1("jge",@_); }
+sub main'jl { &out1("jl",@_); }
+sub main'ja { &out1("ja",@_); }
+sub main'jae { &out1("jae",@_); }
+sub main'jb { &out1("jb",@_); }
+sub main'jbe { &out1("jbe",@_); }
+sub main'jc { &out1("jc",@_); }
+sub main'jnc { &out1("jnc",@_); }
+sub main'jnz { &out1("jnz",@_); }
+sub main'jne { &out1("jne",@_); }
+sub main'jno { &out1("jno",@_); }
+sub main'push { &out1("push",@_); $stack+=4; }
+sub main'pop { &out1("pop",@_); $stack-=4; }
+sub main'pushf { &out0("pushfd"); $stack+=4; }
+sub main'popf { &out0("popfd"); $stack-=4; }
+sub main'bswap { &out1("bswap",@_); &using486(); }
+sub main'not { &out1("not",@_); }
+sub main'call { &out1("call",($_[0]=~/^\$L/?'':'_').$_[0]); }
+sub main'call_ptr { &out1p("call",@_); }
+sub main'ret { &out0("ret"); }
+sub main'nop { &out0("nop"); }
+sub main'test { &out2("test",@_); }
+sub main'bt { &out2("bt",@_); }
+sub main'leave { &out0("leave"); }
+sub main'cpuid { &out0("DW\t0A20Fh"); }
+sub main'rdtsc { &out0("DW\t0310Fh"); }
+sub main'halt { &out0("hlt"); }
+sub main'movz { &out2("movzx",@_); }
+sub main'neg { &out1("neg",@_); }
+sub main'cld { &out0("cld"); }
+
+# SSE2
+sub main'emms { &out0("emms"); }
+sub main'movd { &out2("movd",@_); }
+sub main'movq { &out2("movq",@_); }
+sub main'movdqu { &out2("movdqu",@_); }
+sub main'movdqa { &out2("movdqa",@_); }
+sub main'movdq2q{ &out2("movdq2q",@_); }
+sub main'movq2dq{ &out2("movq2dq",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'psllq { &out2("psllq",@_); }
+sub main'pxor { &out2("pxor",@_); }
+sub main'por { &out2("por",@_); }
+sub main'pand { &out2("pand",@_); }
+
+sub out2
+ {
+ local($name,$p1,$p2)=@_;
+ local($l,$t,$line);
+
+ $line="\t$name\t";
+ $t=&conv($p1).",";
+ $l=length($t);
+ $line.="$t";
+ $l=4-($l+9)/8;
+ $line.="\t" x $l;
+ $line.=&conv($p2);
+ if ($line=~/\bxmm[0-7]\b/i) { $line=~s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i; }
+ push(@out,$line."\n");
+ }
+
+sub out0
+ {
+ local($name)=@_;
+
+ push(@out,"\t$name\n");
+ }
+
+sub out1
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub conv
+ {
+ local($p)=@_;
+
+ $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
+ return $p;
+ }
+
+sub using486
+ {
+ return if $using486;
+ $using486++;
+ grep(s/\.386/\.486/,@out);
+ }
+
+sub main'file
+ {
+ local($file)=@_;
+
+ local($tmp)=<<"EOF";
+ TITLE $file.asm
+ .386
+.model FLAT
+_TEXT\$ SEGMENT PAGE 'CODE'
+
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'function_begin
+ {
+ local($func,$extra)=@_;
+
+ push(@labels,$func);
+
+ local($tmp)=<<"EOF";
+PUBLIC _$func
+$extra
+_$func PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+EOF
+ push(@out,$tmp);
+ $stack=20;
+ }
+
+sub main'function_begin_B
+ {
+ local($func,$extra)=@_;
+
+ local($tmp)=<<"EOF";
+PUBLIC _$func
+$extra
+_$func PROC NEAR
+EOF
+ push(@out,$tmp);
+ $stack=4;
+ }
+
+sub main'function_end
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_$func ENDP
+EOF
+ push(@out,$tmp);
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_B
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+_$func ENDP
+EOF
+ push(@out,$tmp);
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_A
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'file_end
+ {
+ # try to detect if SSE2 or MMX extensions were used...
+ my $xmmheader=<<___;
+.686
+.XMM
+IF \@Version LT 800
+XMMWORD STRUCT 16
+ DQ 2 dup (?)
+XMMWORD ENDS
+ENDIF
+___
+ if (grep {/\b[x]?mm[0-7]\b/i} @out) {
+ grep {s/\.[3-7]86/$xmmheader/} @out;
+ }
+ push(@out,"_TEXT\$ ENDS\n");
+ push(@out,"END\n");
+ }
+
+sub main'wparam
+ {
+ local($num)=@_;
+
+ return(&main'DWP($stack+$num*4,"esp","",0));
+ }
+
+sub main'swtmp
+ {
+ return(&main'DWP($_[0]*4,"esp","",0));
+ }
+
+# Should use swtmp, which is above esp. Linix can trash the stack above esp
+#sub main'wtmp
+# {
+# local($num)=@_;
+#
+# return(&main'DWP(-(($num+1)*4),"esp","",0));
+# }
+
+sub main'comment
+ {
+ foreach (@_)
+ {
+ push(@out,"\t; $_\n");
+ }
+ }
+
+sub main'public_label
+ {
+ $label{$_[0]}="_$_[0]" if (!defined($label{$_[0]}));
+ push(@out,"PUBLIC\t$label{$_[0]}\n");
+ }
+
+sub main'label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="\$${label}${_[0]}";
+ $label++;
+ }
+ return($label{$_[0]});
+ }
+
+sub main'set_label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="\$${label}${_[0]}";
+ $label++;
+ }
+ if ($_[1]!=0 && $_[1]>1)
+ {
+ main'align($_[1]);
+ }
+ if((defined $_[2]) && ($_[2] == 1))
+ {
+ push(@out,"$label{$_[0]}::\n");
+ }
+ elsif ($label{$_[0]} !~ /^\$/)
+ {
+ push(@out,"$label{$_[0]}\tLABEL PTR\n");
+ }
+ else
+ {
+ push(@out,"$label{$_[0]}:\n");
+ }
+ }
+
+sub main'data_byte
+ {
+ push(@out,"\tDB\t".join(',',@_)."\n");
+ }
+
+sub main'data_word
+ {
+ push(@out,"\tDD\t".join(',',@_)."\n");
+ }
+
+sub main'align
+ {
+ push(@out,"\tALIGN\t$_[0]\n");
+ }
+
+sub out1p
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub main'picmeup
+ {
+ local($dst,$sym)=@_;
+ &main'lea($dst,&main'DWP($sym));
+ }
+
+sub main'blindpop { &out1("pop",@_); }
+
+sub main'initseg
+ {
+ local($f)=@_;
+ local($tmp)=<<___;
+OPTION DOTNAME
+.CRT\$XCU SEGMENT DWORD PUBLIC 'DATA'
+EXTRN _$f:NEAR
+DD _$f
+.CRT\$XCU ENDS
+___
+ push(@out,$tmp);
+ }
+
+1;
diff --git a/openssl/crypto/perlasm/x86nasm.pl b/openssl/crypto/perlasm/x86nasm.pl
new file mode 100644
index 000000000..fa38f89c0
--- /dev/null
+++ b/openssl/crypto/perlasm/x86nasm.pl
@@ -0,0 +1,455 @@
+#!/usr/local/bin/perl
+
+package x86nasm;
+
+$label="L000";
+$under=($main'netware)?'':'_';
+
+%lb=( 'eax', 'al',
+ 'ebx', 'bl',
+ 'ecx', 'cl',
+ 'edx', 'dl',
+ 'ax', 'al',
+ 'bx', 'bl',
+ 'cx', 'cl',
+ 'dx', 'dl',
+ );
+
+%hb=( 'eax', 'ah',
+ 'ebx', 'bh',
+ 'ecx', 'ch',
+ 'edx', 'dh',
+ 'ax', 'ah',
+ 'bx', 'bh',
+ 'cx', 'ch',
+ 'dx', 'dh',
+ );
+
+sub main'asm_init_output { @out=(); }
+sub main'asm_get_output { return(@out); }
+sub main'get_labels { return(@labels); }
+
+sub main'external_label
+{
+ push(@labels,@_);
+ foreach (@_) {
+ push(@out,".") if ($main'mwerks);
+ push(@out, "extern\t${under}$_\n");
+ }
+}
+
+sub main'LB
+ {
+ (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
+ return($lb{$_[0]});
+ }
+
+sub main'HB
+ {
+ (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
+ return($hb{$_[0]});
+ }
+
+sub main'BP
+ {
+ &get_mem("BYTE",@_);
+ }
+
+sub main'DWP
+ {
+ &get_mem("DWORD",@_);
+ }
+
+sub main'QWP
+ {
+ &get_mem("",@_);
+ }
+
+sub main'BC
+ {
+ return (($main'mwerks)?"":"BYTE ")."@_";
+ }
+
+sub main'DWC
+ {
+ return (($main'mwerks)?"":"DWORD ")."@_";
+ }
+
+sub main'stack_push
+ {
+ my($num)=@_;
+ $stack+=$num*4;
+ &main'sub("esp",$num*4);
+ }
+
+sub main'stack_pop
+ {
+ my($num)=@_;
+ $stack-=$num*4;
+ &main'add("esp",$num*4);
+ }
+
+sub get_mem
+ {
+ my($size,$addr,$reg1,$reg2,$idx)=@_;
+ my($t,$post);
+ my($ret)=$size;
+ if ($ret ne "")
+ {
+ $ret .= " PTR" if ($main'mwerks);
+ $ret .= " ";
+ }
+ $ret .= "[";
+ $addr =~ s/^\s+//;
+ if ($addr =~ /^(.+)\+(.+)$/)
+ {
+ $reg2=&conv($1);
+ $addr="$under$2";
+ }
+ elsif ($addr =~ /^[_a-z][_a-z0-9]*$/i)
+ {
+ $addr="$under$addr";
+ }
+
+ if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; }
+
+ $reg1="$regs{$reg1}" if defined($regs{$reg1});
+ $reg2="$regs{$reg2}" if defined($regs{$reg2});
+ if (($addr ne "") && ($addr ne 0))
+ {
+ if ($addr !~ /^-/)
+ { $ret.="${addr}+"; }
+ else { $post=$addr; }
+ }
+ if ($reg2 ne "")
+ {
+ $t="";
+ $t="*$idx" if ($idx != 0);
+ $reg1="+".$reg1 if ("$reg1$post" ne "");
+ $ret.="$reg2$t$reg1$post]";
+ }
+ else
+ {
+ $ret.="$reg1$post]"
+ }
+ $ret =~ s/\+\]/]/; # in case $addr was the only argument
+ return($ret);
+ }
+
+sub main'mov { &out2("mov",@_); }
+sub main'movb { &out2("mov",@_); }
+sub main'and { &out2("and",@_); }
+sub main'or { &out2("or",@_); }
+sub main'shl { &out2("shl",@_); }
+sub main'shr { &out2("shr",@_); }
+sub main'xor { &out2("xor",@_); }
+sub main'xorb { &out2("xor",@_); }
+sub main'add { &out2("add",@_); }
+sub main'adc { &out2("adc",@_); }
+sub main'sub { &out2("sub",@_); }
+sub main'sbb { &out2("sbb",@_); }
+sub main'rotl { &out2("rol",@_); }
+sub main'rotr { &out2("ror",@_); }
+sub main'exch { &out2("xchg",@_); }
+sub main'cmp { &out2("cmp",@_); }
+sub main'lea { &out2("lea",@_); }
+sub main'mul { &out1("mul",@_); }
+sub main'imul { &out2("imul",@_); }
+sub main'div { &out1("div",@_); }
+sub main'dec { &out1("dec",@_); }
+sub main'inc { &out1("inc",@_); }
+sub main'jmp { &out1("jmp",@_); }
+sub main'jmp_ptr { &out1p("jmp",@_); }
+
+# This is a bit of a kludge: declare all branches as NEAR.
+$near=($main'mwerks)?'':'NEAR';
+sub main'je { &out1("je $near",@_); }
+sub main'jle { &out1("jle $near",@_); }
+sub main'jz { &out1("jz $near",@_); }
+sub main'jge { &out1("jge $near",@_); }
+sub main'jl { &out1("jl $near",@_); }
+sub main'ja { &out1("ja $near",@_); }
+sub main'jae { &out1("jae $near",@_); }
+sub main'jb { &out1("jb $near",@_); }
+sub main'jbe { &out1("jbe $near",@_); }
+sub main'jc { &out1("jc $near",@_); }
+sub main'jnc { &out1("jnc $near",@_); }
+sub main'jnz { &out1("jnz $near",@_); }
+sub main'jne { &out1("jne $near",@_); }
+sub main'jno { &out1("jno $near",@_); }
+
+sub main'push { &out1("push",@_); $stack+=4; }
+sub main'pop { &out1("pop",@_); $stack-=4; }
+sub main'pushf { &out0("pushfd"); $stack+=4; }
+sub main'popf { &out0("popfd"); $stack-=4; }
+sub main'bswap { &out1("bswap",@_); &using486(); }
+sub main'not { &out1("not",@_); }
+sub main'call { &out1("call",($_[0]=~/^\@L/?'':$under).$_[0]); }
+sub main'call_ptr { &out1p("call",@_); }
+sub main'ret { &out0("ret"); }
+sub main'nop { &out0("nop"); }
+sub main'test { &out2("test",@_); }
+sub main'bt { &out2("bt",@_); }
+sub main'leave { &out0("leave"); }
+sub main'cpuid { &out0("cpuid"); }
+sub main'rdtsc { &out0("rdtsc"); }
+sub main'halt { &out0("hlt"); }
+sub main'movz { &out2("movzx",@_); }
+sub main'neg { &out1("neg",@_); }
+sub main'cld { &out0("cld"); }
+
+# SSE2
+sub main'emms { &out0("emms"); }
+sub main'movd { &out2("movd",@_); }
+sub main'movq { &out2("movq",@_); }
+sub main'movdqu { &out2("movdqu",@_); }
+sub main'movdqa { &out2("movdqa",@_); }
+sub main'movdq2q{ &out2("movdq2q",@_); }
+sub main'movq2dq{ &out2("movq2dq",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'psllq { &out2("psllq",@_); }
+sub main'pxor { &out2("pxor",@_); }
+sub main'por { &out2("por",@_); }
+sub main'pand { &out2("pand",@_); }
+
+sub out2
+ {
+ my($name,$p1,$p2)=@_;
+ my($l,$t);
+
+ push(@out,"\t$name\t");
+ if (!$main'mwerks and $name eq "lea")
+ {
+ $p1 =~ s/^[^\[]*\[/\[/;
+ $p2 =~ s/^[^\[]*\[/\[/;
+ }
+ $t=&conv($p1).",";
+ $l=length($t);
+ push(@out,$t);
+ $l=4-($l+9)/8;
+ push(@out,"\t" x $l);
+ push(@out,&conv($p2));
+ push(@out,"\n");
+ }
+
+sub out0
+ {
+ my($name)=@_;
+
+ push(@out,"\t$name\n");
+ }
+
+sub out1
+ {
+ my($name,$p1)=@_;
+ my($l,$t);
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub conv
+ {
+ my($p)=@_;
+ $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
+ return $p;
+ }
+
+sub using486
+ {
+ return if $using486;
+ $using486++;
+ grep(s/\.386/\.486/,@out);
+ }
+
+sub main'file
+ {
+ if ($main'mwerks) { push(@out,".section\t.text\n"); }
+ else {
+ local $tmp=<<___;
+%ifdef __omf__
+section code use32 class=code
+%else
+section .text
+%endif
+___
+ push(@out,$tmp);
+ }
+ }
+
+sub main'function_begin
+ {
+ my($func,$extra)=@_;
+
+ push(@labels,$func);
+ push(@out,".") if ($main'mwerks);
+ my($tmp)=<<"EOF";
+global $under$func
+$under$func:
+ push ebp
+ push ebx
+ push esi
+ push edi
+EOF
+ push(@out,$tmp);
+ $stack=20;
+ }
+
+sub main'function_begin_B
+ {
+ my($func,$extra)=@_;
+ push(@out,".") if ($main'mwerks);
+ my($tmp)=<<"EOF";
+global $under$func
+$under$func:
+EOF
+ push(@out,$tmp);
+ $stack=4;
+ }
+
+sub main'function_end
+ {
+ my($func)=@_;
+
+ my($tmp)=<<"EOF";
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+EOF
+ push(@out,$tmp);
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_B
+ {
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_A
+ {
+ my($func)=@_;
+
+ my($tmp)=<<"EOF";
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'file_end
+ {
+ }
+
+sub main'wparam
+ {
+ my($num)=@_;
+
+ return(&main'DWP($stack+$num*4,"esp","",0));
+ }
+
+sub main'swtmp
+ {
+ return(&main'DWP($_[0]*4,"esp","",0));
+ }
+
+# Should use swtmp, which is above esp. Linix can trash the stack above esp
+#sub main'wtmp
+# {
+# my($num)=@_;
+#
+# return(&main'DWP(-(($num+1)*4),"esp","",0));
+# }
+
+sub main'comment
+ {
+ foreach (@_)
+ {
+ push(@out,"\t; $_\n");
+ }
+ }
+
+sub main'public_label
+ {
+ $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]}));
+ push(@out,".") if ($main'mwerks);
+ push(@out,"global\t$label{$_[0]}\n");
+ }
+
+sub main'label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="\@${label}${_[0]}";
+ $label++;
+ }
+ return($label{$_[0]});
+ }
+
+sub main'set_label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="\@${label}${_[0]}";
+ $label++;
+ }
+ if ($_[1]!=0 && $_[1]>1)
+ {
+ main'align($_[1]);
+ }
+ push(@out,"$label{$_[0]}:\n");
+ }
+
+sub main'data_byte
+ {
+ push(@out,(($main'mwerks)?".byte\t":"DB\t").join(',',@_)."\n");
+ }
+
+sub main'data_word
+ {
+ push(@out,(($main'mwerks)?".long\t":"DD\t").join(',',@_)."\n");
+ }
+
+sub main'align
+ {
+ push(@out,".") if ($main'mwerks);
+ push(@out,"align\t$_[0]\n");
+ }
+
+sub out1p
+ {
+ my($name,$p1)=@_;
+ my($l,$t);
+
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub main'picmeup
+ {
+ local($dst,$sym)=@_;
+ &main'lea($dst,&main'DWP($sym));
+ }
+
+sub main'blindpop { &out1("pop",@_); }
+
+sub main'initseg
+ {
+ local($f)=@_;
+ if ($main'win32)
+ {
+ local($tmp)=<<___;
+segment .CRT\$XCU data
+extern $under$f
+DD $under$f
+___
+ push(@out,$tmp);
+ }
+ }
+
+1;
diff --git a/openssl/crypto/perlasm/x86unix.pl b/openssl/crypto/perlasm/x86unix.pl
new file mode 100644
index 000000000..a4c947165
--- /dev/null
+++ b/openssl/crypto/perlasm/x86unix.pl
@@ -0,0 +1,725 @@
+#!/usr/local/bin/perl
+
+package x86unix; # GAS actually...
+
+$label="L000";
+$const="";
+$constl=0;
+
+$align=($main'aout)?"4":"16";
+$under=($main'aout or $main'coff)?"_":"";
+$dot=($main'aout)?"":".";
+$com_start="#" if ($main'aout or $main'coff);
+
+sub main'asm_init_output { @out=(); }
+sub main'asm_get_output { return(@out); }
+sub main'get_labels { return(@labels); }
+sub main'external_label { push(@labels,@_); }
+
+if ($main'cpp)
+ {
+ $align="ALIGN";
+ $under="";
+ $com_start='/*';
+ $com_end='*/';
+ }
+
+%lb=( 'eax', '%al',
+ 'ebx', '%bl',
+ 'ecx', '%cl',
+ 'edx', '%dl',
+ 'ax', '%al',
+ 'bx', '%bl',
+ 'cx', '%cl',
+ 'dx', '%dl',
+ );
+
+%hb=( 'eax', '%ah',
+ 'ebx', '%bh',
+ 'ecx', '%ch',
+ 'edx', '%dh',
+ 'ax', '%ah',
+ 'bx', '%bh',
+ 'cx', '%ch',
+ 'dx', '%dh',
+ );
+
+%regs=( 'eax', '%eax',
+ 'ebx', '%ebx',
+ 'ecx', '%ecx',
+ 'edx', '%edx',
+ 'esi', '%esi',
+ 'edi', '%edi',
+ 'ebp', '%ebp',
+ 'esp', '%esp',
+
+ 'mm0', '%mm0',
+ 'mm1', '%mm1',
+ 'mm2', '%mm2',
+ 'mm3', '%mm3',
+ 'mm4', '%mm4',
+ 'mm5', '%mm5',
+ 'mm6', '%mm6',
+ 'mm7', '%mm7',
+
+ 'xmm0', '%xmm0',
+ 'xmm1', '%xmm1',
+ 'xmm2', '%xmm2',
+ 'xmm3', '%xmm3',
+ 'xmm4', '%xmm4',
+ 'xmm5', '%xmm5',
+ 'xmm6', '%xmm6',
+ 'xmm7', '%xmm7',
+ );
+
+%reg_val=(
+ 'eax', 0x00,
+ 'ebx', 0x03,
+ 'ecx', 0x01,
+ 'edx', 0x02,
+ 'esi', 0x06,
+ 'edi', 0x07,
+ 'ebp', 0x05,
+ 'esp', 0x04,
+ );
+
+sub main'LB
+ {
+ (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n";
+ return($lb{$_[0]});
+ }
+
+sub main'HB
+ {
+ (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n";
+ return($hb{$_[0]});
+ }
+
+sub main'DWP
+ {
+ local($addr,$reg1,$reg2,$idx)=@_;
+
+ $ret="";
+ $addr =~ s/(^|[+ \t])([A-Za-z_]+[A-Za-z0-9_]+)($|[+ \t])/$1$under$2$3/;
+ $reg1="$regs{$reg1}" if defined($regs{$reg1});
+ $reg2="$regs{$reg2}" if defined($regs{$reg2});
+ $ret.=$addr if ($addr ne "") && ($addr ne 0);
+ if ($reg2 ne "")
+ {
+ if($idx ne "" && $idx != 0)
+ { $ret.="($reg1,$reg2,$idx)"; }
+ else
+ { $ret.="($reg1,$reg2)"; }
+ }
+ elsif ($reg1 ne "")
+ { $ret.="($reg1)" }
+ return($ret);
+ }
+
+sub main'QWP
+ {
+ return(&main'DWP(@_));
+ }
+
+sub main'BP
+ {
+ return(&main'DWP(@_));
+ }
+
+sub main'BC
+ {
+ return @_;
+ }
+
+sub main'DWC
+ {
+ return @_;
+ }
+
+#sub main'BP
+# {
+# local($addr,$reg1,$reg2,$idx)=@_;
+#
+# $ret="";
+#
+# $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/;
+# $reg1="$regs{$reg1}" if defined($regs{$reg1});
+# $reg2="$regs{$reg2}" if defined($regs{$reg2});
+# $ret.=$addr if ($addr ne "") && ($addr ne 0);
+# if ($reg2 ne "")
+# { $ret.="($reg1,$reg2,$idx)"; }
+# else
+# { $ret.="($reg1)" }
+# return($ret);
+# }
+
+sub main'mov { &out2("movl",@_); }
+sub main'movb { &out2("movb",@_); }
+sub main'and { &out2("andl",@_); }
+sub main'or { &out2("orl",@_); }
+sub main'shl { &out2("sall",@_); }
+sub main'shr { &out2("shrl",@_); }
+sub main'xor { &out2("xorl",@_); }
+sub main'xorb { &out2("xorb",@_); }
+sub main'add { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); }
+sub main'adc { &out2("adcl",@_); }
+sub main'sub { &out2("subl",@_); }
+sub main'sbb { &out2("sbbl",@_); }
+sub main'rotl { &out2("roll",@_); }
+sub main'rotr { &out2("rorl",@_); }
+sub main'exch { &out2($_[0]=~/%[a-d][lh]/?"xchgb":"xchgl",@_); }
+sub main'cmp { &out2("cmpl",@_); }
+sub main'lea { &out2("leal",@_); }
+sub main'mul { &out1("mull",@_); }
+sub main'imul { &out2("imull",@_); }
+sub main'div { &out1("divl",@_); }
+sub main'jmp { &out1("jmp",@_); }
+sub main'jmp_ptr { &out1p("jmp",@_); }
+sub main'je { &out1("je",@_); }
+sub main'jle { &out1("jle",@_); }
+sub main'jne { &out1("jne",@_); }
+sub main'jnz { &out1("jnz",@_); }
+sub main'jz { &out1("jz",@_); }
+sub main'jge { &out1("jge",@_); }
+sub main'jl { &out1("jl",@_); }
+sub main'ja { &out1("ja",@_); }
+sub main'jae { &out1("jae",@_); }
+sub main'jb { &out1("jb",@_); }
+sub main'jbe { &out1("jbe",@_); }
+sub main'jc { &out1("jc",@_); }
+sub main'jnc { &out1("jnc",@_); }
+sub main'jno { &out1("jno",@_); }
+sub main'dec { &out1("decl",@_); }
+sub main'inc { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); }
+sub main'push { &out1("pushl",@_); $stack+=4; }
+sub main'pop { &out1("popl",@_); $stack-=4; }
+sub main'pushf { &out0("pushfl"); $stack+=4; }
+sub main'popf { &out0("popfl"); $stack-=4; }
+sub main'not { &out1("notl",@_); }
+sub main'call { my $pre=$under;
+ foreach $i (%label)
+ { if ($label{$i} eq $_[0]) { $pre=''; last; } }
+ &out1("call",$pre.$_[0]);
+ }
+sub main'call_ptr { &out1p("call",@_); }
+sub main'ret { &out0("ret"); }
+sub main'nop { &out0("nop"); }
+sub main'test { &out2("testl",@_); }
+sub main'bt { &out2("btl",@_); }
+sub main'leave { &out0("leave"); }
+sub main'cpuid { &out0(".byte\t0x0f,0xa2"); }
+sub main'rdtsc { &out0(".byte\t0x0f,0x31"); }
+sub main'halt { &out0("hlt"); }
+sub main'movz { &out2("movzbl",@_); }
+sub main'neg { &out1("negl",@_); }
+sub main'cld { &out0("cld"); }
+
+# SSE2
+sub main'emms { &out0("emms"); }
+sub main'movd { &out2("movd",@_); }
+sub main'movdqu { &out2("movdqu",@_); }
+sub main'movdqa { &out2("movdqa",@_); }
+sub main'movdq2q{ &out2("movdq2q",@_); }
+sub main'movq2dq{ &out2("movq2dq",@_); }
+sub main'paddq { &out2("paddq",@_); }
+sub main'pmuludq{ &out2("pmuludq",@_); }
+sub main'psrlq { &out2("psrlq",@_); }
+sub main'psllq { &out2("psllq",@_); }
+sub main'pxor { &out2("pxor",@_); }
+sub main'por { &out2("por",@_); }
+sub main'pand { &out2("pand",@_); }
+sub main'movq {
+ local($p1,$p2,$optimize)=@_;
+ if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/)
+ # movq between mmx registers can sink Intel CPUs
+ { push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n"); }
+ else { &out2("movq",@_); }
+ }
+
+# The bswapl instruction is new for the 486. Emulate if i386.
+sub main'bswap
+ {
+ if ($main'i386)
+ {
+ &main'comment("bswapl @_");
+ &main'exch(main'HB(@_),main'LB(@_));
+ &main'rotr(@_,16);
+ &main'exch(main'HB(@_),main'LB(@_));
+ }
+ else
+ {
+ &out1("bswapl",@_);
+ }
+ }
+
+sub out2
+ {
+ local($name,$p1,$p2)=@_;
+ local($l,$ll,$t);
+ local(%special)=( "roll",0xD1C0,"rorl",0xD1C8,
+ "rcll",0xD1D0,"rcrl",0xD1D8,
+ "shll",0xD1E0,"shrl",0xD1E8,
+ "sarl",0xD1F8);
+
+ if ((defined($special{$name})) && defined($regs{$p1}) && ($p2 == 1))
+ {
+ $op=$special{$name}|$reg_val{$p1};
+ $tmp1=sprintf(".byte %d\n",($op>>8)&0xff);
+ $tmp2=sprintf(".byte %d\t",$op &0xff);
+ push(@out,$tmp1);
+ push(@out,$tmp2);
+
+ $p2=&conv($p2);
+ $p1=&conv($p1);
+ &main'comment("$name $p2 $p1");
+ return;
+ }
+
+ push(@out,"\t$name\t");
+ $t=&conv($p2).",";
+ $l=length($t);
+ push(@out,$t);
+ $ll=4-($l+9)/8;
+ $tmp1=sprintf("\t" x $ll);
+ push(@out,$tmp1);
+ push(@out,&conv($p1)."\n");
+ }
+
+sub out1
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+ local(%special)=("bswapl",0x0FC8);
+
+ if ((defined($special{$name})) && defined($regs{$p1}))
+ {
+ $op=$special{$name}|$reg_val{$p1};
+ $tmp1=sprintf(".byte %d\n",($op>>8)&0xff);
+ $tmp2=sprintf(".byte %d\t",$op &0xff);
+ push(@out,$tmp1);
+ push(@out,$tmp2);
+
+ $p2=&conv($p2);
+ $p1=&conv($p1);
+ &main'comment("$name $p2 $p1");
+ return;
+ }
+
+ push(@out,"\t$name\t".&conv($p1)."\n");
+ }
+
+sub out1p
+ {
+ local($name,$p1)=@_;
+ local($l,$t);
+
+ push(@out,"\t$name\t*".&conv($p1)."\n");
+ }
+
+sub out0
+ {
+ push(@out,"\t$_[0]\n");
+ }
+
+sub conv
+ {
+ local($p)=@_;
+
+# $p =~ s/0x([0-9A-Fa-f]+)/0$1h/;
+
+ $p=$regs{$p} if (defined($regs{$p}));
+
+ $p =~ s/^(-{0,1}[0-9A-Fa-f]+)$/\$$1/;
+ $p =~ s/^(0x[0-9A-Fa-f]+)$/\$$1/;
+ return $p;
+ }
+
+sub main'file
+ {
+ local($file)=@_;
+
+ local($tmp)=<<"EOF";
+ .file "$file.s"
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'function_begin
+ {
+ local($func)=@_;
+
+ &main'external_label($func);
+ $func=$under.$func;
+
+ local($tmp)=<<"EOF";
+.text
+.globl $func
+EOF
+ push(@out,$tmp);
+ if ($main'cpp)
+ { $tmp=push(@out,"TYPE($func,\@function)\n"); }
+ elsif ($main'coff)
+ { $tmp=push(@out,".def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); }
+ elsif ($main'aout and !$main'pic)
+ { }
+ else { $tmp=push(@out,".type\t$func,\@function\n"); }
+ push(@out,".align\t$align\n");
+ push(@out,"$func:\n");
+ $tmp=<<"EOF";
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+EOF
+ push(@out,$tmp);
+ $stack=20;
+ }
+
+sub main'function_begin_B
+ {
+ local($func,$extra)=@_;
+
+ &main'external_label($func);
+ $func=$under.$func;
+
+ local($tmp)=<<"EOF";
+.text
+.globl $func
+EOF
+ push(@out,$tmp);
+ if ($main'cpp)
+ { push(@out,"TYPE($func,\@function)\n"); }
+ elsif ($main'coff)
+ { $tmp=push(@out,".def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); }
+ elsif ($main'aout and !$main'pic)
+ { }
+ else { push(@out,".type $func,\@function\n"); }
+ push(@out,".align\t$align\n");
+ push(@out,"$func:\n");
+ $stack=4;
+ }
+
+sub main'function_end
+ {
+ local($func)=@_;
+
+ $func=$under.$func;
+
+ local($tmp)=<<"EOF";
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+${dot}L_${func}_end:
+EOF
+ push(@out,$tmp);
+
+ if ($main'cpp)
+ { push(@out,"SIZE($func,${dot}L_${func}_end-$func)\n"); }
+ elsif ($main'coff or $main'aout)
+ { }
+ else { push(@out,".size\t$func,${dot}L_${func}_end-$func\n"); }
+ push(@out,".ident \"$func\"\n");
+ $stack=0;
+ %label=();
+ }
+
+sub main'function_end_A
+ {
+ local($func)=@_;
+
+ local($tmp)=<<"EOF";
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+EOF
+ push(@out,$tmp);
+ }
+
+sub main'function_end_B
+ {
+ local($func)=@_;
+
+ $func=$under.$func;
+
+ push(@out,"${dot}L_${func}_end:\n");
+ if ($main'cpp)
+ { push(@out,"SIZE($func,${dot}L_${func}_end-$func)\n"); }
+ elsif ($main'coff or $main'aout)
+ { }
+ else { push(@out,".size\t$func,${dot}L_${func}_end-$func\n"); }
+ push(@out,".ident \"$func\"\n");
+ $stack=0;
+ %label=();
+ }
+
+sub main'wparam
+ {
+ local($num)=@_;
+
+ return(&main'DWP($stack+$num*4,"esp","",0));
+ }
+
+sub main'stack_push
+ {
+ local($num)=@_;
+ $stack+=$num*4;
+ &main'sub("esp",$num*4);
+ }
+
+sub main'stack_pop
+ {
+ local($num)=@_;
+ $stack-=$num*4;
+ &main'add("esp",$num*4);
+ }
+
+sub main'swtmp
+ {
+ return(&main'DWP($_[0]*4,"esp","",0));
+ }
+
+# Should use swtmp, which is above esp. Linix can trash the stack above esp
+#sub main'wtmp
+# {
+# local($num)=@_;
+#
+# return(&main'DWP(-($num+1)*4,"esp","",0));
+# }
+
+sub main'comment
+ {
+ if (!defined($com_start) or $main'elf)
+ { # Regarding $main'elf above...
+ # GNU and SVR4 as'es use different comment delimiters,
+ push(@out,"\n"); # so we just skip ELF comments...
+ return;
+ }
+ foreach (@_)
+ {
+ if (/^\s*$/)
+ { push(@out,"\n"); }
+ else
+ { push(@out,"\t$com_start $_ $com_end\n"); }
+ }
+ }
+
+sub main'public_label
+ {
+ $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]}));
+ push(@out,".globl\t$label{$_[0]}\n");
+ }
+
+sub main'label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="${dot}${label}${_[0]}";
+ $label++;
+ }
+ return($label{$_[0]});
+ }
+
+sub main'set_label
+ {
+ if (!defined($label{$_[0]}))
+ {
+ $label{$_[0]}="${dot}${label}${_[0]}";
+ $label++;
+ }
+ if ($_[1]!=0)
+ {
+ if ($_[1]>1) { main'align($_[1]); }
+ else { push(@out,".align $align\n"); }
+ }
+ push(@out,"$label{$_[0]}:\n");
+ }
+
+sub main'file_end
+ {
+ # try to detect if SSE2 or MMX extensions were used on ELF platform...
+ if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) {
+ local($tmp);
+
+ push (@out,"\n.section\t.bss\n");
+ push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
+
+ return;
+ }
+
+ if ($const ne "")
+ {
+ push(@out,".section .rodata\n");
+ push(@out,$const);
+ $const="";
+ }
+ }
+
+sub main'data_byte
+ {
+ push(@out,"\t.byte\t".join(',',@_)."\n");
+ }
+
+sub main'data_word
+ {
+ push(@out,"\t.long\t".join(',',@_)."\n");
+ }
+
+sub main'align
+ {
+ my $val=$_[0],$p2,$i;
+ if ($main'aout) {
+ for ($p2=0;$val!=0;$val>>=1) { $p2++; }
+ $val=$p2-1;
+ $val.=",0x90";
+ }
+ push(@out,".align\t$val\n");
+ }
+
+# debug output functions: puts, putx, printf
+
+sub main'puts
+ {
+ &pushvars();
+ &main'push('$Lstring' . ++$constl);
+ &main'call('puts');
+ $stack-=4;
+ &main'add("esp",4);
+ &popvars();
+
+ $const .= "Lstring$constl:\n\t.string \"@_[0]\"\n";
+ }
+
+sub main'putx
+ {
+ &pushvars();
+ &main'push($_[0]);
+ &main'push('$Lstring' . ++$constl);
+ &main'call('printf');
+ &main'add("esp",8);
+ $stack-=8;
+ &popvars();
+
+ $const .= "Lstring$constl:\n\t.string \"\%X\"\n";
+ }
+
+sub main'printf
+ {
+ $ostack = $stack;
+ &pushvars();
+ for ($i = @_ - 1; $i >= 0; $i--)
+ {
+ if ($i == 0) # change this to support %s format strings
+ {
+ &main'push('$Lstring' . ++$constl);
+ $const .= "Lstring$constl:\n\t.string \"@_[$i]\"\n";
+ }
+ else
+ {
+ if ($_[$i] =~ /([0-9]*)\(%esp\)/)
+ {
+ &main'push(($1 + $stack - $ostack) . '(%esp)');
+ }
+ else
+ {
+ &main'push($_[$i]);
+ }
+ }
+ }
+ &main'call('printf');
+ $stack-=4*@_;
+ &main'add("esp",4*@_);
+ &popvars();
+ }
+
+sub pushvars
+ {
+ &main'pushf();
+ &main'push("edx");
+ &main'push("ecx");
+ &main'push("eax");
+ }
+
+sub popvars
+ {
+ &main'pop("eax");
+ &main'pop("ecx");
+ &main'pop("edx");
+ &main'popf();
+ }
+
+sub main'picmeup
+ {
+ local($dst,$sym)=@_;
+ if ($main'cpp)
+ {
+ local($tmp)=<<___;
+#if (defined(ELF) || defined(SOL)) && defined(PIC)
+ call 1f
+1: popl $regs{$dst}
+ addl \$_GLOBAL_OFFSET_TABLE_+[.-1b],$regs{$dst}
+ movl $sym\@GOT($regs{$dst}),$regs{$dst}
+#else
+ leal $sym,$regs{$dst}
+#endif
+___
+ push(@out,$tmp);
+ }
+ elsif ($main'pic && ($main'elf || $main'aout))
+ {
+ &main'call(&main'label("PIC_me_up"));
+ &main'set_label("PIC_me_up");
+ &main'blindpop($dst);
+ &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-".
+ &main'label("PIC_me_up") . "]");
+ &main'mov($dst,&main'DWP($under.$sym."\@GOT",$dst));
+ }
+ else
+ {
+ &main'lea($dst,&main'DWP($sym));
+ }
+ }
+
+sub main'blindpop { &out1("popl",@_); }
+
+sub main'initseg
+ {
+ local($f)=@_;
+ local($tmp);
+ if ($main'elf)
+ {
+ $tmp=<<___;
+.section .init
+ call $under$f
+ jmp .Linitalign
+.align $align
+.Linitalign:
+___
+ }
+ elsif ($main'coff)
+ {
+ $tmp=<<___; # applies to both Cygwin and Mingw
+.section .ctors
+.long $under$f
+___
+ }
+ elsif ($main'aout)
+ {
+ local($ctor)="${under}_GLOBAL_\$I\$$f";
+ $tmp=".text\n";
+ $tmp.=".type $ctor,\@function\n" if ($main'pic);
+ $tmp.=<<___; # OpenBSD way...
+.globl $ctor
+.align 2
+$ctor:
+ jmp $under$f
+___
+ }
+ push(@out,$tmp) if ($tmp);
+ }
+
+1;