diff options
Diffstat (limited to 'openssl/crypto/perlasm')
-rw-r--r-- | openssl/crypto/perlasm/cbc.pl | 351 | ||||
-rw-r--r-- | openssl/crypto/perlasm/readme | 124 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86_64-xlate.pl | 554 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86asm.pl | 130 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86ms.pl | 472 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86nasm.pl | 455 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86unix.pl | 725 |
7 files changed, 2811 insertions, 0 deletions
diff --git a/openssl/crypto/perlasm/cbc.pl b/openssl/crypto/perlasm/cbc.pl new file mode 100644 index 000000000..e43dc9ae1 --- /dev/null +++ b/openssl/crypto/perlasm/cbc.pl @@ -0,0 +1,351 @@ +#!/usr/local/bin/perl + +# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) +# des_cblock (*input); +# des_cblock (*output); +# long length; +# des_key_schedule schedule; +# des_cblock (*ivec); +# int enc; +# +# calls +# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); +# + +#&cbc("des_ncbc_encrypt","des_encrypt",0); +#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt", +# 1,4,5,3,5,-1); +#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt", +# 0,4,5,3,5,-1); +#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3", +# 0,6,7,3,4,5); +# +# When doing a cipher that needs bigendian order, +# for encrypt, the iv is kept in bigendian form, +# while for decrypt, it is kept in little endian. +sub cbc + { + local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_; + # name is the function name + # enc_func and dec_func and the functions to call for encrypt/decrypt + # swap is true if byte order needs to be reversed + # iv_off is parameter number for the iv + # enc_off is parameter number for the encrypt/decrypt flag + # p1,p2,p3 are the offsets for parameters to be passed to the + # underlying calls. + + &function_begin_B($name,""); + &comment(""); + + $in="esi"; + $out="edi"; + $count="ebp"; + + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + $data_off=4; + $data_off+=4 if ($p1 > 0); + $data_off+=4 if ($p2 > 0); + $data_off+=4 if ($p3 > 0); + + &mov($count, &wparam(2)); # length + + &comment("getting iv ptr from parameter $iv_off"); + &mov("ebx", &wparam($iv_off)); # Get iv ptr + + &mov($in, &DWP(0,"ebx","",0));# iv[0] + &mov($out, &DWP(4,"ebx","",0));# iv[1] + + &push($out); + &push($in); + &push($out); # used in decrypt for iv[1] + &push($in); # used in decrypt for iv[0] + + &mov("ebx", "esp"); # This is the address of tin[2] + + &mov($in, &wparam(0)); # in + &mov($out, &wparam(1)); # out + + # We have loaded them all, how lets push things + &comment("getting encrypt flag from parameter $enc_off"); + &mov("ecx", &wparam($enc_off)); # Get enc flag + if ($p3 > 0) + { + &comment("get and push parameter $p3"); + if ($enc_off != $p3) + { &mov("eax", &wparam($p3)); &push("eax"); } + else { &push("ecx"); } + } + if ($p2 > 0) + { + &comment("get and push parameter $p2"); + if ($enc_off != $p2) + { &mov("eax", &wparam($p2)); &push("eax"); } + else { &push("ecx"); } + } + if ($p1 > 0) + { + &comment("get and push parameter $p1"); + if ($enc_off != $p1) + { &mov("eax", &wparam($p1)); &push("eax"); } + else { &push("ecx"); } + } + &push("ebx"); # push data/iv + + &cmp("ecx",0); + &jz(&label("decrypt")); + + &and($count,0xfffffff8); + &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0] + &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1] + + &jz(&label("encrypt_finish")); + + ############################################################# + + &set_label("encrypt_loop"); + # encrypt start + # "eax" and "ebx" hold iv (or the last cipher text) + + &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + # eax and ebx are the next iv. + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("encrypt_loop")); + +###################################################################3 + &set_label("encrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + &call(&label("PIC_point")); +&set_label("PIC_point"); + &blindpop("edx"); + &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); + &mov($count,&DWP(0,"ecx",$count,4)) + &add($count,"edx"); + &xor("ecx","ecx"); + &xor("edx","edx"); + #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); + &jmp_ptr($count); + +&set_label("ej7"); + &xor("edx", "edx") if $ppro; # ppro friendly + &movb(&HB("edx"), &BP(6,$in,"",0)); + &shl("edx",8); +&set_label("ej6"); + &movb(&HB("edx"), &BP(5,$in,"",0)); +&set_label("ej5"); + &movb(&LB("edx"), &BP(4,$in,"",0)); +&set_label("ej4"); + &mov("ecx", &DWP(0,$in,"",0)); + &jmp(&label("ejend")); +&set_label("ej3"); + &movb(&HB("ecx"), &BP(2,$in,"",0)); + &xor("ecx", "ecx") if $ppro; # ppro friendly + &shl("ecx",8); +&set_label("ej2"); + &movb(&HB("ecx"), &BP(1,$in,"",0)); +&set_label("ej1"); + &movb(&LB("ecx"), &BP(0,$in,"",0)); +&set_label("ejend"); + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + &jmp(&label("finish")); + + ############################################################# + ############################################################# + &set_label("decrypt",1); + # decrypt start + &and($count,0xfffffff8); + # The next 2 instructions are only for if the jz is taken + &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1] + &jz(&label("decrypt_finish")); + + &set_label("decrypt_loop"); + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + + &mov(&DWP(0,$out,"",0),"ecx"); + &mov(&DWP(4,$out,"",0),"edx"); + + &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv + &mov(&DWP($data_off+12,"esp","",0), "ebx"); # + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("decrypt_loop")); +############################ ENDIT #######################3 + &set_label("decrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + # this is for when we exit + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + +&set_label("dj7"); + &rotr("edx", 16); + &movb(&BP(6,$out,"",0), &LB("edx")); + &shr("edx",16); +&set_label("dj6"); + &movb(&BP(5,$out,"",0), &HB("edx")); +&set_label("dj5"); + &movb(&BP(4,$out,"",0), &LB("edx")); +&set_label("dj4"); + &mov(&DWP(0,$out,"",0), "ecx"); + &jmp(&label("djend")); +&set_label("dj3"); + &rotr("ecx", 16); + &movb(&BP(2,$out,"",0), &LB("ecx")); + &shl("ecx",16); +&set_label("dj2"); + &movb(&BP(1,$in,"",0), &HB("ecx")); +&set_label("dj1"); + &movb(&BP(0,$in,"",0), &LB("ecx")); +&set_label("djend"); + + # final iv is still in eax:ebx + &jmp(&label("finish")); + + +############################ FINISH #######################3 + &set_label("finish",1); + &mov("ecx", &wparam($iv_off)); # Get iv ptr + + ################################################# + $total=16+4; + $total+=4 if ($p1 > 0); + $total+=4 if ($p2 > 0); + $total+=4 if ($p3 > 0); + &add("esp",$total); + + &mov(&DWP(0,"ecx","",0), "eax"); # save iv + &mov(&DWP(4,"ecx","",0), "ebx"); # save iv + + &function_end_A($name); + + &align(64); + &set_label("cbc_enc_jmp_table"); + &data_word("0"); + &data_word(&label("ej1")."-".&label("PIC_point")); + &data_word(&label("ej2")."-".&label("PIC_point")); + &data_word(&label("ej3")."-".&label("PIC_point")); + &data_word(&label("ej4")."-".&label("PIC_point")); + &data_word(&label("ej5")."-".&label("PIC_point")); + &data_word(&label("ej6")."-".&label("PIC_point")); + &data_word(&label("ej7")."-".&label("PIC_point")); + # not used + #&set_label("cbc_dec_jmp_table",1); + #&data_word("0"); + #&data_word(&label("dj1")."-".&label("PIC_point")); + #&data_word(&label("dj2")."-".&label("PIC_point")); + #&data_word(&label("dj3")."-".&label("PIC_point")); + #&data_word(&label("dj4")."-".&label("PIC_point")); + #&data_word(&label("dj5")."-".&label("PIC_point")); + #&data_word(&label("dj6")."-".&label("PIC_point")); + #&data_word(&label("dj7")."-".&label("PIC_point")); + &align(64); + + &function_end_B($name); + + } + +1; diff --git a/openssl/crypto/perlasm/readme b/openssl/crypto/perlasm/readme new file mode 100644 index 000000000..f02bbee75 --- /dev/null +++ b/openssl/crypto/perlasm/readme @@ -0,0 +1,124 @@ +The perl scripts in this directory are my 'hack' to generate +multiple different assembler formats via the one origional script. + +The way to use this library is to start with adding the path to this directory +and then include it. + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +The first thing we do is setup the file and type of assember + +&asm_init($ARGV[0],$0); + +The first argument is the 'type'. Currently +'cpp', 'sol', 'a.out', 'elf' or 'win32'. +Argument 2 is the file name. + +The reciprocal function is +&asm_finish() which should be called at the end. + +There are 2 main 'packages'. x86ms.pl, which is the microsoft assembler, +and x86unix.pl which is the unix (gas) version. + +Functions of interest are: +&external_label("des_SPtrans"); declare and external variable +&LB(reg); Low byte for a register +&HB(reg); High byte for a register +&BP(off,base,index,scale) Byte pointer addressing +&DWP(off,base,index,scale) Word pointer addressing +&stack_push(num) Basically a 'sub esp, num*4' with extra +&stack_pop(num) inverse of stack_push +&function_begin(name,extra) Start a function with pushing of + edi, esi, ebx and ebp. extra is extra win32 + external info that may be required. +&function_begin_B(name,extra) Same as norma function_begin but no pushing. +&function_end(name) Call at end of function. +&function_end_A(name) Standard pop and ret, for use inside functions +&function_end_B(name) Call at end but with poping or 'ret'. +&swtmp(num) Address on stack temp word. +&wparam(num) Parameter number num, that was push + in C convention. This all works over pushes + and pops. +&comment("hello there") Put in a comment. +&label("loop") Refer to a label, normally a jmp target. +&set_label("loop") Set a label at this point. +&data_word(word) Put in a word of data. + +So how does this all hold together? Given + +int calc(int len, int *data) + { + int i,j=0; + + for (i=0; i<len; i++) + { + j+=other(data[i]); + } + } + +So a very simple version of this function could be coded as + + push(@INC,"perlasm","../../perlasm"); + require "x86asm.pl"; + + &asm_init($ARGV[0],"cacl.pl"); + + &external_label("other"); + + $tmp1= "eax"; + $j= "edi"; + $data= "esi"; + $i= "ebp"; + + &comment("a simple function"); + &function_begin("calc"); + &mov( $data, &wparam(1)); # data + &xor( $j, $j); + &xor( $i, $i); + + &set_label("loop"); + &cmp( $i, &wparam(0)); + &jge( &label("end")); + + &mov( $tmp1, &DWP(0,$data,$i,4)); + &push( $tmp1); + &call( "other"); + &add( $j, "eax"); + &pop( $tmp1); + &inc( $i); + &jmp( &label("loop")); + + &set_label("end"); + &mov( "eax", $j); + + &function_end("calc"); + + &asm_finish(); + +The above example is very very unoptimised but gives an idea of how +things work. + +There is also a cbc mode function generator in cbc.pl + +&cbc( $name, + $encrypt_function_name, + $decrypt_function_name, + $true_if_byte_swap_needed, + $parameter_number_for_iv, + $parameter_number_for_encrypt_flag, + $first_parameter_to_pass, + $second_parameter_to_pass, + $third_parameter_to_pass); + +So for example, given +void BF_encrypt(BF_LONG *data,BF_KEY *key); +void BF_decrypt(BF_LONG *data,BF_KEY *key); +void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length, + BF_KEY *ks, unsigned char *iv, int enc); + +&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1); + +&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1); +&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5); + diff --git a/openssl/crypto/perlasm/x86_64-xlate.pl b/openssl/crypto/perlasm/x86_64-xlate.pl new file mode 100644 index 000000000..a4af769b4 --- /dev/null +++ b/openssl/crypto/perlasm/x86_64-xlate.pl @@ -0,0 +1,554 @@ +#!/usr/bin/env perl + +# Ascetic x86_64 AT&T to MASM assembler translator by <appro>. +# +# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T +# format is way easier to parse. Because it's simpler to "gear" from +# Unix ABI to Windows one [see cross-reference "card" at the end of +# file]. Because Linux targets were available first... +# +# In addition the script also "distills" code suitable for GNU +# assembler, so that it can be compiled with more rigid assemblers, +# such as Solaris /usr/ccs/bin/as. +# +# This translator is not designed to convert *arbitrary* assembler +# code from AT&T format to MASM one. It's designed to convert just +# enough to provide for dual-ABI OpenSSL modules development... +# There *are* limitations and you might have to modify your assembler +# code or this script to achieve the desired result... +# +# Currently recognized limitations: +# +# - can't use multiple ops per line; +# - indirect calls and jumps are not supported; +# +# Dual-ABI styling rules. +# +# 1. Adhere to Unix register and stack layout [see the end for +# explanation]. +# 2. Forget about "red zone," stick to more traditional blended +# stack frame allocation. If volatile storage is actually required +# that is. If not, just leave the stack as is. +# 3. Functions tagged with ".type name,@function" get crafted with +# unified Win64 prologue and epilogue automatically. If you want +# to take care of ABI differences yourself, tag functions as +# ".type name,@abi-omnipotent" instead. +# 4. To optimize the Win64 prologue you can specify number of input +# arguments as ".type name,@function,N." Keep in mind that if N is +# larger than 6, then you *have to* write "abi-omnipotent" code, +# because >6 cases can't be addressed with unified prologue. +# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: +# (sorry about latter). +# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is +# required to identify the spots, where to inject Win64 epilogue! +# But on the pros, it's then prefixed with rep automatically:-) +# 7. Due to MASM limitations [and certain general counter-intuitivity +# of ip-relative addressing] generation of position-independent +# code is assisted by synthetic directive, .picmeup, which puts +# address of the *next* instruction into target register. +# +# Example 1: +# .picmeup %rax +# lea .Label-.(%rax),%rax +# Example 2: +# .picmeup %rcx +# .Lpic_point: +# ... +# lea .Label-.Lpic_point(%rcx),%rbp + +my $output = shift; + +{ my ($stddev,$stdino,@junk)=stat(STDOUT); + my ($outdev,$outino,@junk)=stat($output); + + open STDOUT,">$output" || die "can't open $output: $!" + if ($stddev!=$outdev || $stdino!=$outino); +} + +my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005 +my $masm=$masmref if ($output =~ /\.asm/); +if ($masm && `ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/) +{ $masm=$1 + $2*2**-16 + $4*2**-32; } + +my $current_segment; +my $current_function; + +{ package opcode; # pick up opcodes + sub re { + my $self = shift; # single instance in enough... + local *line = shift; + undef $ret; + + if ($line =~ /^([a-z][a-z0-9]*)/i) { + $self->{op} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + undef $self->{sz}; + if ($self->{op} =~ /^(movz)b.*/) { # movz is pain... + $self->{op} = $1; + $self->{sz} = "b"; + } elsif ($self->{op} =~ /call/) { + $self->{sz} = "" + } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { + $self->{op} = $1; + $self->{sz} = $2; + } + } + $ret; + } + sub size { + my $self = shift; + my $sz = shift; + $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); + $self->{sz}; + } + sub out { + my $self = shift; + if (!$masm) { + if ($self->{op} eq "movz") { # movz is pain... + sprintf "%s%s%s",$self->{op},$self->{sz},shift; + } elsif ($self->{op} =~ /^set/) { + "$self->{op}"; + } elsif ($self->{op} eq "ret") { + ".byte 0xf3,0xc3"; + } else { + "$self->{op}$self->{sz}"; + } + } else { + $self->{op} =~ s/^movz/movzx/; + if ($self->{op} eq "ret") { + $self->{op} = ""; + if ($current_function->{abi} eq "svr4") { + $self->{op} = "mov rdi,QWORD PTR 8[rsp]\t;WIN64 epilogue\n\t". + "mov rsi,QWORD PTR 16[rsp]\n\t"; + } + $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; + } + $self->{op}; + } + } +} +{ package const; # pick up constants, which start with $ + sub re { + my $self = shift; # single instance in enough... + local *line = shift; + undef $ret; + + if ($line =~ /^\$([^,]+)/) { + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + } + $ret; + } + sub out { + my $self = shift; + + if (!$masm) { + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{value} + $self->{value} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi; + $self->{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + sprintf "\$%s",$self->{value}; + } else { + $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig; + sprintf "%s",$self->{value}; + } + } +} +{ package ea; # pick up effective addresses: expr(%reg,%reg,scale) + sub re { + my $self = shift; # single instance in enough... + local *line = shift; + undef $ret; + + if ($line =~ /^([^\(,]*)\(([%\w,]+)\)/) { + $self->{label} = $1; + ($self->{base},$self->{index},$self->{scale})=split(/,/,$2); + $self->{scale} = 1 if (!defined($self->{scale})); + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + $self->{base} =~ s/^%//; + $self->{index} =~ s/^%// if (defined($self->{index})); + } + $ret; + } + sub size {} + sub out { + my $self = shift; + my $sz = shift; + + # Silently convert all EAs to 64-bit. This is required for + # elder GNU assembler and results in more compact code, + # *but* most importantly AES module depends on this feature! + $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + + if (!$masm) { + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{label} + $self->{label} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi; + $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + + if (defined($self->{index})) { + sprintf "%s(%%%s,%%%s,%d)", + $self->{label},$self->{base}, + $self->{index},$self->{scale}; + } else { + sprintf "%s(%%%s)", $self->{label},$self->{base}; + } + } else { + %szmap = ( b=>"BYTE", w=>"WORD", l=>"DWORD", q=>"QWORD" ); + + $self->{label} =~ s/\./\$/g; + $self->{label} =~ s/0x([0-9a-f]+)/0$1h/ig; + $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); + + if (defined($self->{index})) { + sprintf "%s PTR %s[%s*%d+%s]",$szmap{$sz}, + $self->{label}, + $self->{index},$self->{scale}, + $self->{base}; + } elsif ($self->{base} eq "rip") { + sprintf "%s PTR %s",$szmap{$sz},$self->{label}; + } else { + sprintf "%s PTR %s[%s]",$szmap{$sz}, + $self->{label},$self->{base}; + } + } + } +} +{ package register; # pick up registers, which start with %. + sub re { + my $class = shift; # muliple instances... + my $self = {}; + local *line = shift; + undef $ret; + + if ($line =~ /^%(\w+)/) { + bless $self,$class; + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + } + $ret; + } + sub size { + my $self = shift; + undef $ret; + + if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } + elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } + elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } + elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } + elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } + elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } + elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } + elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } + + $ret; + } + sub out { + my $self = shift; + sprintf $masm?"%s":"%%%s",$self->{value}; + } +} +{ package label; # pick up labels, which end with : + sub re { + my $self = shift; # single instance is enough... + local *line = shift; + undef $ret; + + if ($line =~ /(^[\.\w]+\:)/) { + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + $self->{value} =~ s/\.L/\$L/ if ($masm); + } + $ret; + } + sub out { + my $self = shift; + + if (!$masm) { + $self->{value}; + } elsif ($self->{value} ne "$current_function->{name}:") { + $self->{value}; + } elsif ($current_function->{abi} eq "svr4") { + my $func = "$current_function->{name} PROC\n". + " mov QWORD PTR 8[rsp],rdi\t;WIN64 prologue\n". + " mov QWORD PTR 16[rsp],rsi\n"; + my $narg = $current_function->{narg}; + $narg=6 if (!defined($narg)); + $func .= " mov rdi,rcx\n" if ($narg>0); + $func .= " mov rsi,rdx\n" if ($narg>1); + $func .= " mov rdx,r8\n" if ($narg>2); + $func .= " mov rcx,r9\n" if ($narg>3); + $func .= " mov r8,QWORD PTR 40[rsp]\n" if ($narg>4); + $func .= " mov r9,QWORD PTR 48[rsp]\n" if ($narg>5); + $func .= "\n"; + } else { + "$current_function->{name} PROC"; + } + } +} +{ package expr; # pick up expressioins + sub re { + my $self = shift; # single instance is enough... + local *line = shift; + undef $ret; + + if ($line =~ /(^[^,]+)/) { + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + $self->{value} =~ s/\.L/\$L/g if ($masm); + } + $ret; + } + sub out { + my $self = shift; + $self->{value}; + } +} +{ package directive; # pick up directives, which start with . + sub re { + my $self = shift; # single instance is enough... + local *line = shift; + undef $ret; + my $dir; + my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2: + ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48, + "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48, + "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48, + "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48, + "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c, + "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c, + "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c, + "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c ); + + if ($line =~ /^\s*(\.\w+)/) { + if (!$masm) { + $self->{value} = $1; + $line =~ s/\@abi\-omnipotent/\@function/; + $line =~ s/\@function.*/\@function/; + if ($line =~ /\.picmeup\s+(%r[\w]+)/i) { + $self->{value} = sprintf "\t.long\t0x%x,0x90000000",$opcode{$1}; + } elsif ($line =~ /\.asciz\s+"(.*)"$/) { + $self->{value} = ".byte\t".join(",",unpack("C*",$1),0); + } elsif ($line =~ /\.extern/) { + $self->{value} = ""; # swallow extern + } else { + $self->{value} = $line; + } + $line = ""; + return $self; + } + + $dir = $1; + $ret = $self; + undef $self->{value}; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + SWITCH: for ($dir) { + /\.(text)/ + && do { my $v=undef; + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = "_$1\$"; + $current_segment =~ tr/[a-z]/[A-Z]/; + $v.="$current_segment\tSEGMENT "; + $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE"; + $v.=" 'CODE'"; + $self->{value} = $v; + last; + }; + /\.extern/ && do { $self->{value} = "EXTRN\t".$line.":BYTE"; last; }; + /\.globl/ && do { $self->{value} = "PUBLIC\t".$line; last; }; + /\.type/ && do { ($sym,$type,$narg) = split(',',$line); + if ($type eq "\@function") { + undef $current_function; + $current_function->{name} = $sym; + $current_function->{abi} = "svr4"; + $current_function->{narg} = $narg; + } elsif ($type eq "\@abi-omnipotent") { + undef $current_function; + $current_function->{name} = $sym; + } + last; + }; + /\.size/ && do { if (defined($current_function)) { + $self->{value}="$current_function->{name}\tENDP"; + undef $current_function; + } + last; + }; + /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; }; + /\.(byte|value|long|quad)/ + && do { my @arr = split(',',$line); + my $sz = substr($1,0,1); + my $last = pop(@arr); + my $conv = sub { my $var=shift; + if ($var=~s/0x([0-9a-f]+)/0$1h/i) { $var; } + else { sprintf"0%Xh",$var; } + }; + + $sz =~ tr/bvlq/BWDQ/; + $self->{value} = "\tD$sz\t"; + for (@arr) { $self->{value} .= &$conv($_).","; } + $self->{value} .= &$conv($last); + last; + }; + /\.picmeup/ && do { $self->{value} = sprintf"\tDD\t 0%Xh,090000000h",$opcode{$line}; + last; + }; + /\.asciz/ && do { if ($line =~ /^"(.*)"$/) { + my @str=unpack("C*",$1); + push @str,0; + while ($#str>15) { + $self->{value}.="DB\t" + .join(",",@str[0..15])."\n"; + foreach (0..15) { shift @str; } + } + $self->{value}.="DB\t" + .join(",",@str) if (@str); + } + last; + }; + } + $line = ""; + } + + $ret; + } + sub out { + my $self = shift; + $self->{value}; + } +} + +while($line=<>) { + + chomp($line); + + $line =~ s|[#!].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning + + undef $label; + undef $opcode; + undef $dst; + undef $src; + undef $sz; + + if ($label=label->re(\$line)) { print $label->out(); } + + if (directive->re(\$line)) { + printf "%s",directive->out(); + } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: { + + if ($src=register->re(\$line)) { opcode->size($src->size()); } + elsif ($src=const->re(\$line)) { } + elsif ($src=ea->re(\$line)) { } + elsif ($src=expr->re(\$line)) { } + + last ARGUMENT if ($line !~ /^,/); + + $line = substr($line,1); $line =~ s/^\s+//; + + if ($dst=register->re(\$line)) { opcode->size($dst->size()); } + elsif ($dst=const->re(\$line)) { } + elsif ($dst=ea->re(\$line)) { } + + } # ARGUMENT: + + $sz=opcode->size(); + + if (defined($dst)) { + if (!$masm) { + printf "\t%s\t%s,%s", $opcode->out($dst->size()), + $src->out($sz),$dst->out($sz); + } else { + printf "\t%s\t%s,%s", $opcode->out(), + $dst->out($sz),$src->out($sz); + } + } elsif (defined($src)) { + printf "\t%s\t%s",$opcode->out(),$src->out($sz); + } else { + printf "\t%s",$opcode->out(); + } + } + + print $line,"\n"; +} + +print "\n$current_segment\tENDS\nEND\n" if ($masm); + +close STDOUT; + +################################################# +# Cross-reference x86_64 ABI "card" +# +# Unix Win64 +# %rax * * +# %rbx - - +# %rcx #4 #1 +# %rdx #3 #2 +# %rsi #2 - +# %rdi #1 - +# %rbp - - +# %rsp - - +# %r8 #5 #3 +# %r9 #6 #4 +# %r10 * * +# %r11 * * +# %r12 - - +# %r13 - - +# %r14 - - +# %r15 - - +# +# (*) volatile register +# (-) preserved by callee +# (#) Nth argument, volatile +# +# In Unix terms top of stack is argument transfer area for arguments +# which could not be accomodated in registers. Or in other words 7th +# [integer] argument resides at 8(%rsp) upon function entry point. +# 128 bytes above %rsp constitute a "red zone" which is not touched +# by signal handlers and can be used as temporal storage without +# allocating a frame. +# +# In Win64 terms N*8 bytes on top of stack is argument transfer area, +# which belongs to/can be overwritten by callee. N is the number of +# arguments passed to callee, *but* not less than 4! This means that +# upon function entry point 5th argument resides at 40(%rsp), as well +# as that 32 bytes from 8(%rsp) can always be used as temporal +# storage [without allocating a frame]. One can actually argue that +# one can assume a "red zone" above stack pointer under Win64 as well. +# Point is that at apparently no occasion Windows kernel would alter +# the area above user stack pointer in true asynchronous manner... +# +# All the above means that if assembler programmer adheres to Unix +# register and stack layout, but disregards the "red zone" existense, +# it's possible to use following prologue and epilogue to "gear" from +# Unix to Win64 ABI in leaf functions with not more than 6 arguments. +# +# omnipotent_function: +# ifdef WIN64 +# movq %rdi,8(%rsp) +# movq %rsi,16(%rsp) +# movq %rcx,%rdi ; if 1st argument is actually present +# movq %rdx,%rsi ; if 2nd argument is actually ... +# movq %r8,%rdx ; if 3rd argument is ... +# movq %r9,%rcx ; if 4th argument ... +# movq 40(%rsp),%r8 ; if 5th ... +# movq 48(%rsp),%r9 ; if 6th ... +# endif +# ... +# ifdef WIN64 +# movq 8(%rsp),%rdi +# movq 16(%rsp),%rsi +# endif +# ret diff --git a/openssl/crypto/perlasm/x86asm.pl b/openssl/crypto/perlasm/x86asm.pl new file mode 100644 index 000000000..597912215 --- /dev/null +++ b/openssl/crypto/perlasm/x86asm.pl @@ -0,0 +1,130 @@ +#!/usr/local/bin/perl + +# require 'x86asm.pl'; +# &asm_init("cpp","des-586.pl"); +# XXX +# XXX +# main'asm_finish + +sub main'asm_finish + { + &file_end(); + &asm_finish_cpp() if $cpp; + print &asm_get_output(); + } + +sub main'asm_init + { + ($type,$fn,$i386)=@_; + $filename=$fn; + + $elf=$cpp=$coff=$aout=$win32=$netware=$mwerks=0; + if ( ($type eq "elf")) + { $elf=1; require "x86unix.pl"; } + elsif ( ($type eq "a.out")) + { $aout=1; require "x86unix.pl"; } + elsif ( ($type eq "coff" or $type eq "gaswin")) + { $coff=1; require "x86unix.pl"; } + elsif ( ($type eq "cpp")) + { $cpp=1; require "x86unix.pl"; } + elsif ( ($type eq "win32")) + { $win32=1; require "x86ms.pl"; } + elsif ( ($type eq "win32n")) + { $win32=1; require "x86nasm.pl"; } + elsif ( ($type eq "nw-nasm")) + { $netware=1; require "x86nasm.pl"; } + elsif ( ($type eq "nw-mwasm")) + { $netware=1; $mwerks=1; require "x86nasm.pl"; } + else + { + print STDERR <<"EOF"; +Pick one target type from + elf - Linux, FreeBSD, Solaris x86, etc. + a.out - OpenBSD, DJGPP, etc. + coff - GAS/COFF such as Win32 targets + win32 - Windows 95/Windows NT + win32n - Windows 95/Windows NT NASM format + nw-nasm - NetWare NASM format + nw-mwasm- NetWare Metrowerks Assembler +EOF + exit(1); + } + + $pic=0; + for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } + + &asm_init_output(); + +&comment("Don't even think of reading this code"); +&comment("It was automatically generated by $filename"); +&comment("Which is a perl program used to generate the x86 assember for"); +&comment("any of ELF, a.out, COFF, Win32, ..."); +&comment("eric <eay\@cryptsoft.com>"); +&comment(""); + + $filename =~ s/\.pl$//; + &file($filename); + } + +sub asm_finish_cpp + { + return unless $cpp; + + local($tmp,$i); + foreach $i (&get_labels()) + { + $tmp.="#define $i _$i\n"; + } + print <<"EOF"; +/* Run the C pre-processor over this file with one of the following defined + * ELF - elf object files, + * OUT - a.out object files, + * BSDI - BSDI style a.out object files + * SOL - Solaris style elf + */ + +#define TYPE(a,b) .type a,b +#define SIZE(a,b) .size a,b + +#if defined(OUT) || (defined(BSDI) && !defined(ELF)) +$tmp +#endif + +#ifdef OUT +#define OK 1 +#define ALIGN 4 +#if defined(__CYGWIN__) || defined(__DJGPP__) || (__MINGW32__) +#undef SIZE +#undef TYPE +#define SIZE(a,b) +#define TYPE(a,b) .def a; .scl 2; .type 32; .endef +#endif /* __CYGWIN || __DJGPP */ +#endif + +#if defined(BSDI) && !defined(ELF) +#define OK 1 +#define ALIGN 4 +#undef SIZE +#undef TYPE +#define SIZE(a,b) +#define TYPE(a,b) +#endif + +#if defined(ELF) || defined(SOL) +#define OK 1 +#define ALIGN 16 +#endif + +#ifndef OK +You need to define one of +ELF - elf systems - linux-elf, NetBSD and DG-UX +OUT - a.out systems - linux-a.out and FreeBSD +SOL - solaris systems, which are elf with strange comment lines +BSDI - a.out with a very primative version of as. +#endif + +/* Let the Assembler begin :-) */ +EOF + } + +1; diff --git a/openssl/crypto/perlasm/x86ms.pl b/openssl/crypto/perlasm/x86ms.pl new file mode 100644 index 000000000..a0be2934c --- /dev/null +++ b/openssl/crypto/perlasm/x86ms.pl @@ -0,0 +1,472 @@ +#!/usr/local/bin/perl + +package x86ms; + +$label="L000"; + +%lb=( 'eax', 'al', + 'ebx', 'bl', + 'ecx', 'cl', + 'edx', 'dl', + 'ax', 'al', + 'bx', 'bl', + 'cx', 'cl', + 'dx', 'dl', + ); + +%hb=( 'eax', 'ah', + 'ebx', 'bh', + 'ecx', 'ch', + 'edx', 'dh', + 'ax', 'ah', + 'bx', 'bh', + 'cx', 'ch', + 'dx', 'dh', + ); + +sub main'asm_init_output { @out=(); } +sub main'asm_get_output { return(@out); } +sub main'get_labels { return(@labels); } +sub main'external_label +{ + push(@labels,@_); + foreach (@_) { + push(@out, "EXTRN\t_$_:DWORD\n"); + } +} + +sub main'LB + { + (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n"; + return($lb{$_[0]}); + } + +sub main'HB + { + (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n"; + return($hb{$_[0]}); + } + +sub main'BP + { + &get_mem("BYTE",@_); + } + +sub main'DWP + { + &get_mem("DWORD",@_); + } + +sub main'QWP + { + &get_mem("QWORD",@_); + } + +sub main'BC + { + return @_; + } + +sub main'DWC + { + return @_; + } + +sub main'stack_push + { + local($num)=@_; + $stack+=$num*4; + &main'sub("esp",$num*4); + } + +sub main'stack_pop + { + local($num)=@_; + $stack-=$num*4; + &main'add("esp",$num*4); + } + +sub get_mem + { + local($size,$addr,$reg1,$reg2,$idx)=@_; + local($t,$post); + local($ret)="$size PTR "; + + $addr =~ s/^\s+//; + if ($addr =~ /^(.+)\+(.+)$/) + { + $reg2=&conv($1); + $addr="_$2"; + } + elsif ($addr =~ /^[_a-z][_a-z0-9]*$/i) + { + $addr="_$addr"; + } + + if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; } + + $reg1="$regs{$reg1}" if defined($regs{$reg1}); + $reg2="$regs{$reg2}" if defined($regs{$reg2}); + if (($addr ne "") && ($addr ne 0)) + { + if ($addr !~ /^-/) + { $ret.=$addr; } + else { $post=$addr; } + } + if ($reg2 ne "") + { + $t=""; + $t="*$idx" if ($idx != 0); + $reg1="+".$reg1 if ("$reg1$post" ne ""); + $ret.="[$reg2$t$reg1$post]"; + } + else + { + $ret.="[$reg1$post]" + } + $ret =~ s/\[\]//; # in case $addr was the only argument + return($ret); + } + +sub main'mov { &out2("mov",@_); } +sub main'movb { &out2("mov",@_); } +sub main'and { &out2("and",@_); } +sub main'or { &out2("or",@_); } +sub main'shl { &out2("shl",@_); } +sub main'shr { &out2("shr",@_); } +sub main'xor { &out2("xor",@_); } +sub main'xorb { &out2("xor",@_); } +sub main'add { &out2("add",@_); } +sub main'adc { &out2("adc",@_); } +sub main'sub { &out2("sub",@_); } +sub main'sbb { &out2("sbb",@_); } +sub main'rotl { &out2("rol",@_); } +sub main'rotr { &out2("ror",@_); } +sub main'exch { &out2("xchg",@_); } +sub main'cmp { &out2("cmp",@_); } +sub main'lea { &out2("lea",@_); } +sub main'mul { &out1("mul",@_); } +sub main'imul { &out2("imul",@_); } +sub main'div { &out1("div",@_); } +sub main'dec { &out1("dec",@_); } +sub main'inc { &out1("inc",@_); } +sub main'jmp { &out1("jmp",@_); } +sub main'jmp_ptr { &out1p("jmp",@_); } +sub main'je { &out1("je",@_); } +sub main'jle { &out1("jle",@_); } +sub main'jz { &out1("jz",@_); } +sub main'jge { &out1("jge",@_); } +sub main'jl { &out1("jl",@_); } +sub main'ja { &out1("ja",@_); } +sub main'jae { &out1("jae",@_); } +sub main'jb { &out1("jb",@_); } +sub main'jbe { &out1("jbe",@_); } +sub main'jc { &out1("jc",@_); } +sub main'jnc { &out1("jnc",@_); } +sub main'jnz { &out1("jnz",@_); } +sub main'jne { &out1("jne",@_); } +sub main'jno { &out1("jno",@_); } +sub main'push { &out1("push",@_); $stack+=4; } +sub main'pop { &out1("pop",@_); $stack-=4; } +sub main'pushf { &out0("pushfd"); $stack+=4; } +sub main'popf { &out0("popfd"); $stack-=4; } +sub main'bswap { &out1("bswap",@_); &using486(); } +sub main'not { &out1("not",@_); } +sub main'call { &out1("call",($_[0]=~/^\$L/?'':'_').$_[0]); } +sub main'call_ptr { &out1p("call",@_); } +sub main'ret { &out0("ret"); } +sub main'nop { &out0("nop"); } +sub main'test { &out2("test",@_); } +sub main'bt { &out2("bt",@_); } +sub main'leave { &out0("leave"); } +sub main'cpuid { &out0("DW\t0A20Fh"); } +sub main'rdtsc { &out0("DW\t0310Fh"); } +sub main'halt { &out0("hlt"); } +sub main'movz { &out2("movzx",@_); } +sub main'neg { &out1("neg",@_); } +sub main'cld { &out0("cld"); } + +# SSE2 +sub main'emms { &out0("emms"); } +sub main'movd { &out2("movd",@_); } +sub main'movq { &out2("movq",@_); } +sub main'movdqu { &out2("movdqu",@_); } +sub main'movdqa { &out2("movdqa",@_); } +sub main'movdq2q{ &out2("movdq2q",@_); } +sub main'movq2dq{ &out2("movq2dq",@_); } +sub main'paddq { &out2("paddq",@_); } +sub main'pmuludq{ &out2("pmuludq",@_); } +sub main'psrlq { &out2("psrlq",@_); } +sub main'psllq { &out2("psllq",@_); } +sub main'pxor { &out2("pxor",@_); } +sub main'por { &out2("por",@_); } +sub main'pand { &out2("pand",@_); } + +sub out2 + { + local($name,$p1,$p2)=@_; + local($l,$t,$line); + + $line="\t$name\t"; + $t=&conv($p1).","; + $l=length($t); + $line.="$t"; + $l=4-($l+9)/8; + $line.="\t" x $l; + $line.=&conv($p2); + if ($line=~/\bxmm[0-7]\b/i) { $line=~s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i; } + push(@out,$line."\n"); + } + +sub out0 + { + local($name)=@_; + + push(@out,"\t$name\n"); + } + +sub out1 + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub conv + { + local($p)=@_; + + $p =~ s/0x([0-9A-Fa-f]+)/0$1h/; + return $p; + } + +sub using486 + { + return if $using486; + $using486++; + grep(s/\.386/\.486/,@out); + } + +sub main'file + { + local($file)=@_; + + local($tmp)=<<"EOF"; + TITLE $file.asm + .386 +.model FLAT +_TEXT\$ SEGMENT PAGE 'CODE' + +EOF + push(@out,$tmp); + } + +sub main'function_begin + { + local($func,$extra)=@_; + + push(@labels,$func); + + local($tmp)=<<"EOF"; +PUBLIC _$func +$extra +_$func PROC NEAR + push ebp + push ebx + push esi + push edi +EOF + push(@out,$tmp); + $stack=20; + } + +sub main'function_begin_B + { + local($func,$extra)=@_; + + local($tmp)=<<"EOF"; +PUBLIC _$func +$extra +_$func PROC NEAR +EOF + push(@out,$tmp); + $stack=4; + } + +sub main'function_end + { + local($func)=@_; + + local($tmp)=<<"EOF"; + pop edi + pop esi + pop ebx + pop ebp + ret +_$func ENDP +EOF + push(@out,$tmp); + $stack=0; + %label=(); + } + +sub main'function_end_B + { + local($func)=@_; + + local($tmp)=<<"EOF"; +_$func ENDP +EOF + push(@out,$tmp); + $stack=0; + %label=(); + } + +sub main'function_end_A + { + local($func)=@_; + + local($tmp)=<<"EOF"; + pop edi + pop esi + pop ebx + pop ebp + ret +EOF + push(@out,$tmp); + } + +sub main'file_end + { + # try to detect if SSE2 or MMX extensions were used... + my $xmmheader=<<___; +.686 +.XMM +IF \@Version LT 800 +XMMWORD STRUCT 16 + DQ 2 dup (?) +XMMWORD ENDS +ENDIF +___ + if (grep {/\b[x]?mm[0-7]\b/i} @out) { + grep {s/\.[3-7]86/$xmmheader/} @out; + } + push(@out,"_TEXT\$ ENDS\n"); + push(@out,"END\n"); + } + +sub main'wparam + { + local($num)=@_; + + return(&main'DWP($stack+$num*4,"esp","",0)); + } + +sub main'swtmp + { + return(&main'DWP($_[0]*4,"esp","",0)); + } + +# Should use swtmp, which is above esp. Linix can trash the stack above esp +#sub main'wtmp +# { +# local($num)=@_; +# +# return(&main'DWP(-(($num+1)*4),"esp","",0)); +# } + +sub main'comment + { + foreach (@_) + { + push(@out,"\t; $_\n"); + } + } + +sub main'public_label + { + $label{$_[0]}="_$_[0]" if (!defined($label{$_[0]})); + push(@out,"PUBLIC\t$label{$_[0]}\n"); + } + +sub main'label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="\$${label}${_[0]}"; + $label++; + } + return($label{$_[0]}); + } + +sub main'set_label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="\$${label}${_[0]}"; + $label++; + } + if ($_[1]!=0 && $_[1]>1) + { + main'align($_[1]); + } + if((defined $_[2]) && ($_[2] == 1)) + { + push(@out,"$label{$_[0]}::\n"); + } + elsif ($label{$_[0]} !~ /^\$/) + { + push(@out,"$label{$_[0]}\tLABEL PTR\n"); + } + else + { + push(@out,"$label{$_[0]}:\n"); + } + } + +sub main'data_byte + { + push(@out,"\tDB\t".join(',',@_)."\n"); + } + +sub main'data_word + { + push(@out,"\tDD\t".join(',',@_)."\n"); + } + +sub main'align + { + push(@out,"\tALIGN\t$_[0]\n"); + } + +sub out1p + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub main'picmeup + { + local($dst,$sym)=@_; + &main'lea($dst,&main'DWP($sym)); + } + +sub main'blindpop { &out1("pop",@_); } + +sub main'initseg + { + local($f)=@_; + local($tmp)=<<___; +OPTION DOTNAME +.CRT\$XCU SEGMENT DWORD PUBLIC 'DATA' +EXTRN _$f:NEAR +DD _$f +.CRT\$XCU ENDS +___ + push(@out,$tmp); + } + +1; diff --git a/openssl/crypto/perlasm/x86nasm.pl b/openssl/crypto/perlasm/x86nasm.pl new file mode 100644 index 000000000..fa38f89c0 --- /dev/null +++ b/openssl/crypto/perlasm/x86nasm.pl @@ -0,0 +1,455 @@ +#!/usr/local/bin/perl + +package x86nasm; + +$label="L000"; +$under=($main'netware)?'':'_'; + +%lb=( 'eax', 'al', + 'ebx', 'bl', + 'ecx', 'cl', + 'edx', 'dl', + 'ax', 'al', + 'bx', 'bl', + 'cx', 'cl', + 'dx', 'dl', + ); + +%hb=( 'eax', 'ah', + 'ebx', 'bh', + 'ecx', 'ch', + 'edx', 'dh', + 'ax', 'ah', + 'bx', 'bh', + 'cx', 'ch', + 'dx', 'dh', + ); + +sub main'asm_init_output { @out=(); } +sub main'asm_get_output { return(@out); } +sub main'get_labels { return(@labels); } + +sub main'external_label +{ + push(@labels,@_); + foreach (@_) { + push(@out,".") if ($main'mwerks); + push(@out, "extern\t${under}$_\n"); + } +} + +sub main'LB + { + (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n"; + return($lb{$_[0]}); + } + +sub main'HB + { + (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n"; + return($hb{$_[0]}); + } + +sub main'BP + { + &get_mem("BYTE",@_); + } + +sub main'DWP + { + &get_mem("DWORD",@_); + } + +sub main'QWP + { + &get_mem("",@_); + } + +sub main'BC + { + return (($main'mwerks)?"":"BYTE ")."@_"; + } + +sub main'DWC + { + return (($main'mwerks)?"":"DWORD ")."@_"; + } + +sub main'stack_push + { + my($num)=@_; + $stack+=$num*4; + &main'sub("esp",$num*4); + } + +sub main'stack_pop + { + my($num)=@_; + $stack-=$num*4; + &main'add("esp",$num*4); + } + +sub get_mem + { + my($size,$addr,$reg1,$reg2,$idx)=@_; + my($t,$post); + my($ret)=$size; + if ($ret ne "") + { + $ret .= " PTR" if ($main'mwerks); + $ret .= " "; + } + $ret .= "["; + $addr =~ s/^\s+//; + if ($addr =~ /^(.+)\+(.+)$/) + { + $reg2=&conv($1); + $addr="$under$2"; + } + elsif ($addr =~ /^[_a-z][_a-z0-9]*$/i) + { + $addr="$under$addr"; + } + + if ($addr =~ /^.+\-.+$/) { $addr="($addr)"; } + + $reg1="$regs{$reg1}" if defined($regs{$reg1}); + $reg2="$regs{$reg2}" if defined($regs{$reg2}); + if (($addr ne "") && ($addr ne 0)) + { + if ($addr !~ /^-/) + { $ret.="${addr}+"; } + else { $post=$addr; } + } + if ($reg2 ne "") + { + $t=""; + $t="*$idx" if ($idx != 0); + $reg1="+".$reg1 if ("$reg1$post" ne ""); + $ret.="$reg2$t$reg1$post]"; + } + else + { + $ret.="$reg1$post]" + } + $ret =~ s/\+\]/]/; # in case $addr was the only argument + return($ret); + } + +sub main'mov { &out2("mov",@_); } +sub main'movb { &out2("mov",@_); } +sub main'and { &out2("and",@_); } +sub main'or { &out2("or",@_); } +sub main'shl { &out2("shl",@_); } +sub main'shr { &out2("shr",@_); } +sub main'xor { &out2("xor",@_); } +sub main'xorb { &out2("xor",@_); } +sub main'add { &out2("add",@_); } +sub main'adc { &out2("adc",@_); } +sub main'sub { &out2("sub",@_); } +sub main'sbb { &out2("sbb",@_); } +sub main'rotl { &out2("rol",@_); } +sub main'rotr { &out2("ror",@_); } +sub main'exch { &out2("xchg",@_); } +sub main'cmp { &out2("cmp",@_); } +sub main'lea { &out2("lea",@_); } +sub main'mul { &out1("mul",@_); } +sub main'imul { &out2("imul",@_); } +sub main'div { &out1("div",@_); } +sub main'dec { &out1("dec",@_); } +sub main'inc { &out1("inc",@_); } +sub main'jmp { &out1("jmp",@_); } +sub main'jmp_ptr { &out1p("jmp",@_); } + +# This is a bit of a kludge: declare all branches as NEAR. +$near=($main'mwerks)?'':'NEAR'; +sub main'je { &out1("je $near",@_); } +sub main'jle { &out1("jle $near",@_); } +sub main'jz { &out1("jz $near",@_); } +sub main'jge { &out1("jge $near",@_); } +sub main'jl { &out1("jl $near",@_); } +sub main'ja { &out1("ja $near",@_); } +sub main'jae { &out1("jae $near",@_); } +sub main'jb { &out1("jb $near",@_); } +sub main'jbe { &out1("jbe $near",@_); } +sub main'jc { &out1("jc $near",@_); } +sub main'jnc { &out1("jnc $near",@_); } +sub main'jnz { &out1("jnz $near",@_); } +sub main'jne { &out1("jne $near",@_); } +sub main'jno { &out1("jno $near",@_); } + +sub main'push { &out1("push",@_); $stack+=4; } +sub main'pop { &out1("pop",@_); $stack-=4; } +sub main'pushf { &out0("pushfd"); $stack+=4; } +sub main'popf { &out0("popfd"); $stack-=4; } +sub main'bswap { &out1("bswap",@_); &using486(); } +sub main'not { &out1("not",@_); } +sub main'call { &out1("call",($_[0]=~/^\@L/?'':$under).$_[0]); } +sub main'call_ptr { &out1p("call",@_); } +sub main'ret { &out0("ret"); } +sub main'nop { &out0("nop"); } +sub main'test { &out2("test",@_); } +sub main'bt { &out2("bt",@_); } +sub main'leave { &out0("leave"); } +sub main'cpuid { &out0("cpuid"); } +sub main'rdtsc { &out0("rdtsc"); } +sub main'halt { &out0("hlt"); } +sub main'movz { &out2("movzx",@_); } +sub main'neg { &out1("neg",@_); } +sub main'cld { &out0("cld"); } + +# SSE2 +sub main'emms { &out0("emms"); } +sub main'movd { &out2("movd",@_); } +sub main'movq { &out2("movq",@_); } +sub main'movdqu { &out2("movdqu",@_); } +sub main'movdqa { &out2("movdqa",@_); } +sub main'movdq2q{ &out2("movdq2q",@_); } +sub main'movq2dq{ &out2("movq2dq",@_); } +sub main'paddq { &out2("paddq",@_); } +sub main'pmuludq{ &out2("pmuludq",@_); } +sub main'psrlq { &out2("psrlq",@_); } +sub main'psllq { &out2("psllq",@_); } +sub main'pxor { &out2("pxor",@_); } +sub main'por { &out2("por",@_); } +sub main'pand { &out2("pand",@_); } + +sub out2 + { + my($name,$p1,$p2)=@_; + my($l,$t); + + push(@out,"\t$name\t"); + if (!$main'mwerks and $name eq "lea") + { + $p1 =~ s/^[^\[]*\[/\[/; + $p2 =~ s/^[^\[]*\[/\[/; + } + $t=&conv($p1).","; + $l=length($t); + push(@out,$t); + $l=4-($l+9)/8; + push(@out,"\t" x $l); + push(@out,&conv($p2)); + push(@out,"\n"); + } + +sub out0 + { + my($name)=@_; + + push(@out,"\t$name\n"); + } + +sub out1 + { + my($name,$p1)=@_; + my($l,$t); + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub conv + { + my($p)=@_; + $p =~ s/0x([0-9A-Fa-f]+)/0$1h/; + return $p; + } + +sub using486 + { + return if $using486; + $using486++; + grep(s/\.386/\.486/,@out); + } + +sub main'file + { + if ($main'mwerks) { push(@out,".section\t.text\n"); } + else { + local $tmp=<<___; +%ifdef __omf__ +section code use32 class=code +%else +section .text +%endif +___ + push(@out,$tmp); + } + } + +sub main'function_begin + { + my($func,$extra)=@_; + + push(@labels,$func); + push(@out,".") if ($main'mwerks); + my($tmp)=<<"EOF"; +global $under$func +$under$func: + push ebp + push ebx + push esi + push edi +EOF + push(@out,$tmp); + $stack=20; + } + +sub main'function_begin_B + { + my($func,$extra)=@_; + push(@out,".") if ($main'mwerks); + my($tmp)=<<"EOF"; +global $under$func +$under$func: +EOF + push(@out,$tmp); + $stack=4; + } + +sub main'function_end + { + my($func)=@_; + + my($tmp)=<<"EOF"; + pop edi + pop esi + pop ebx + pop ebp + ret +EOF + push(@out,$tmp); + $stack=0; + %label=(); + } + +sub main'function_end_B + { + $stack=0; + %label=(); + } + +sub main'function_end_A + { + my($func)=@_; + + my($tmp)=<<"EOF"; + pop edi + pop esi + pop ebx + pop ebp + ret +EOF + push(@out,$tmp); + } + +sub main'file_end + { + } + +sub main'wparam + { + my($num)=@_; + + return(&main'DWP($stack+$num*4,"esp","",0)); + } + +sub main'swtmp + { + return(&main'DWP($_[0]*4,"esp","",0)); + } + +# Should use swtmp, which is above esp. Linix can trash the stack above esp +#sub main'wtmp +# { +# my($num)=@_; +# +# return(&main'DWP(-(($num+1)*4),"esp","",0)); +# } + +sub main'comment + { + foreach (@_) + { + push(@out,"\t; $_\n"); + } + } + +sub main'public_label + { + $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]})); + push(@out,".") if ($main'mwerks); + push(@out,"global\t$label{$_[0]}\n"); + } + +sub main'label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="\@${label}${_[0]}"; + $label++; + } + return($label{$_[0]}); + } + +sub main'set_label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="\@${label}${_[0]}"; + $label++; + } + if ($_[1]!=0 && $_[1]>1) + { + main'align($_[1]); + } + push(@out,"$label{$_[0]}:\n"); + } + +sub main'data_byte + { + push(@out,(($main'mwerks)?".byte\t":"DB\t").join(',',@_)."\n"); + } + +sub main'data_word + { + push(@out,(($main'mwerks)?".long\t":"DD\t").join(',',@_)."\n"); + } + +sub main'align + { + push(@out,".") if ($main'mwerks); + push(@out,"align\t$_[0]\n"); + } + +sub out1p + { + my($name,$p1)=@_; + my($l,$t); + + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub main'picmeup + { + local($dst,$sym)=@_; + &main'lea($dst,&main'DWP($sym)); + } + +sub main'blindpop { &out1("pop",@_); } + +sub main'initseg + { + local($f)=@_; + if ($main'win32) + { + local($tmp)=<<___; +segment .CRT\$XCU data +extern $under$f +DD $under$f +___ + push(@out,$tmp); + } + } + +1; diff --git a/openssl/crypto/perlasm/x86unix.pl b/openssl/crypto/perlasm/x86unix.pl new file mode 100644 index 000000000..a4c947165 --- /dev/null +++ b/openssl/crypto/perlasm/x86unix.pl @@ -0,0 +1,725 @@ +#!/usr/local/bin/perl + +package x86unix; # GAS actually... + +$label="L000"; +$const=""; +$constl=0; + +$align=($main'aout)?"4":"16"; +$under=($main'aout or $main'coff)?"_":""; +$dot=($main'aout)?"":"."; +$com_start="#" if ($main'aout or $main'coff); + +sub main'asm_init_output { @out=(); } +sub main'asm_get_output { return(@out); } +sub main'get_labels { return(@labels); } +sub main'external_label { push(@labels,@_); } + +if ($main'cpp) + { + $align="ALIGN"; + $under=""; + $com_start='/*'; + $com_end='*/'; + } + +%lb=( 'eax', '%al', + 'ebx', '%bl', + 'ecx', '%cl', + 'edx', '%dl', + 'ax', '%al', + 'bx', '%bl', + 'cx', '%cl', + 'dx', '%dl', + ); + +%hb=( 'eax', '%ah', + 'ebx', '%bh', + 'ecx', '%ch', + 'edx', '%dh', + 'ax', '%ah', + 'bx', '%bh', + 'cx', '%ch', + 'dx', '%dh', + ); + +%regs=( 'eax', '%eax', + 'ebx', '%ebx', + 'ecx', '%ecx', + 'edx', '%edx', + 'esi', '%esi', + 'edi', '%edi', + 'ebp', '%ebp', + 'esp', '%esp', + + 'mm0', '%mm0', + 'mm1', '%mm1', + 'mm2', '%mm2', + 'mm3', '%mm3', + 'mm4', '%mm4', + 'mm5', '%mm5', + 'mm6', '%mm6', + 'mm7', '%mm7', + + 'xmm0', '%xmm0', + 'xmm1', '%xmm1', + 'xmm2', '%xmm2', + 'xmm3', '%xmm3', + 'xmm4', '%xmm4', + 'xmm5', '%xmm5', + 'xmm6', '%xmm6', + 'xmm7', '%xmm7', + ); + +%reg_val=( + 'eax', 0x00, + 'ebx', 0x03, + 'ecx', 0x01, + 'edx', 0x02, + 'esi', 0x06, + 'edi', 0x07, + 'ebp', 0x05, + 'esp', 0x04, + ); + +sub main'LB + { + (defined($lb{$_[0]})) || die "$_[0] does not have a 'low byte'\n"; + return($lb{$_[0]}); + } + +sub main'HB + { + (defined($hb{$_[0]})) || die "$_[0] does not have a 'high byte'\n"; + return($hb{$_[0]}); + } + +sub main'DWP + { + local($addr,$reg1,$reg2,$idx)=@_; + + $ret=""; + $addr =~ s/(^|[+ \t])([A-Za-z_]+[A-Za-z0-9_]+)($|[+ \t])/$1$under$2$3/; + $reg1="$regs{$reg1}" if defined($regs{$reg1}); + $reg2="$regs{$reg2}" if defined($regs{$reg2}); + $ret.=$addr if ($addr ne "") && ($addr ne 0); + if ($reg2 ne "") + { + if($idx ne "" && $idx != 0) + { $ret.="($reg1,$reg2,$idx)"; } + else + { $ret.="($reg1,$reg2)"; } + } + elsif ($reg1 ne "") + { $ret.="($reg1)" } + return($ret); + } + +sub main'QWP + { + return(&main'DWP(@_)); + } + +sub main'BP + { + return(&main'DWP(@_)); + } + +sub main'BC + { + return @_; + } + +sub main'DWC + { + return @_; + } + +#sub main'BP +# { +# local($addr,$reg1,$reg2,$idx)=@_; +# +# $ret=""; +# +# $addr =~ s/(^|[+ \t])([A-Za-z_]+)($|[+ \t])/$1$under$2$3/; +# $reg1="$regs{$reg1}" if defined($regs{$reg1}); +# $reg2="$regs{$reg2}" if defined($regs{$reg2}); +# $ret.=$addr if ($addr ne "") && ($addr ne 0); +# if ($reg2 ne "") +# { $ret.="($reg1,$reg2,$idx)"; } +# else +# { $ret.="($reg1)" } +# return($ret); +# } + +sub main'mov { &out2("movl",@_); } +sub main'movb { &out2("movb",@_); } +sub main'and { &out2("andl",@_); } +sub main'or { &out2("orl",@_); } +sub main'shl { &out2("sall",@_); } +sub main'shr { &out2("shrl",@_); } +sub main'xor { &out2("xorl",@_); } +sub main'xorb { &out2("xorb",@_); } +sub main'add { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); } +sub main'adc { &out2("adcl",@_); } +sub main'sub { &out2("subl",@_); } +sub main'sbb { &out2("sbbl",@_); } +sub main'rotl { &out2("roll",@_); } +sub main'rotr { &out2("rorl",@_); } +sub main'exch { &out2($_[0]=~/%[a-d][lh]/?"xchgb":"xchgl",@_); } +sub main'cmp { &out2("cmpl",@_); } +sub main'lea { &out2("leal",@_); } +sub main'mul { &out1("mull",@_); } +sub main'imul { &out2("imull",@_); } +sub main'div { &out1("divl",@_); } +sub main'jmp { &out1("jmp",@_); } +sub main'jmp_ptr { &out1p("jmp",@_); } +sub main'je { &out1("je",@_); } +sub main'jle { &out1("jle",@_); } +sub main'jne { &out1("jne",@_); } +sub main'jnz { &out1("jnz",@_); } +sub main'jz { &out1("jz",@_); } +sub main'jge { &out1("jge",@_); } +sub main'jl { &out1("jl",@_); } +sub main'ja { &out1("ja",@_); } +sub main'jae { &out1("jae",@_); } +sub main'jb { &out1("jb",@_); } +sub main'jbe { &out1("jbe",@_); } +sub main'jc { &out1("jc",@_); } +sub main'jnc { &out1("jnc",@_); } +sub main'jno { &out1("jno",@_); } +sub main'dec { &out1("decl",@_); } +sub main'inc { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); } +sub main'push { &out1("pushl",@_); $stack+=4; } +sub main'pop { &out1("popl",@_); $stack-=4; } +sub main'pushf { &out0("pushfl"); $stack+=4; } +sub main'popf { &out0("popfl"); $stack-=4; } +sub main'not { &out1("notl",@_); } +sub main'call { my $pre=$under; + foreach $i (%label) + { if ($label{$i} eq $_[0]) { $pre=''; last; } } + &out1("call",$pre.$_[0]); + } +sub main'call_ptr { &out1p("call",@_); } +sub main'ret { &out0("ret"); } +sub main'nop { &out0("nop"); } +sub main'test { &out2("testl",@_); } +sub main'bt { &out2("btl",@_); } +sub main'leave { &out0("leave"); } +sub main'cpuid { &out0(".byte\t0x0f,0xa2"); } +sub main'rdtsc { &out0(".byte\t0x0f,0x31"); } +sub main'halt { &out0("hlt"); } +sub main'movz { &out2("movzbl",@_); } +sub main'neg { &out1("negl",@_); } +sub main'cld { &out0("cld"); } + +# SSE2 +sub main'emms { &out0("emms"); } +sub main'movd { &out2("movd",@_); } +sub main'movdqu { &out2("movdqu",@_); } +sub main'movdqa { &out2("movdqa",@_); } +sub main'movdq2q{ &out2("movdq2q",@_); } +sub main'movq2dq{ &out2("movq2dq",@_); } +sub main'paddq { &out2("paddq",@_); } +sub main'pmuludq{ &out2("pmuludq",@_); } +sub main'psrlq { &out2("psrlq",@_); } +sub main'psllq { &out2("psllq",@_); } +sub main'pxor { &out2("pxor",@_); } +sub main'por { &out2("por",@_); } +sub main'pand { &out2("pand",@_); } +sub main'movq { + local($p1,$p2,$optimize)=@_; + if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/) + # movq between mmx registers can sink Intel CPUs + { push(@out,"\tpshufw\t\$0xe4,%$p2,%$p1\n"); } + else { &out2("movq",@_); } + } + +# The bswapl instruction is new for the 486. Emulate if i386. +sub main'bswap + { + if ($main'i386) + { + &main'comment("bswapl @_"); + &main'exch(main'HB(@_),main'LB(@_)); + &main'rotr(@_,16); + &main'exch(main'HB(@_),main'LB(@_)); + } + else + { + &out1("bswapl",@_); + } + } + +sub out2 + { + local($name,$p1,$p2)=@_; + local($l,$ll,$t); + local(%special)=( "roll",0xD1C0,"rorl",0xD1C8, + "rcll",0xD1D0,"rcrl",0xD1D8, + "shll",0xD1E0,"shrl",0xD1E8, + "sarl",0xD1F8); + + if ((defined($special{$name})) && defined($regs{$p1}) && ($p2 == 1)) + { + $op=$special{$name}|$reg_val{$p1}; + $tmp1=sprintf(".byte %d\n",($op>>8)&0xff); + $tmp2=sprintf(".byte %d\t",$op &0xff); + push(@out,$tmp1); + push(@out,$tmp2); + + $p2=&conv($p2); + $p1=&conv($p1); + &main'comment("$name $p2 $p1"); + return; + } + + push(@out,"\t$name\t"); + $t=&conv($p2).","; + $l=length($t); + push(@out,$t); + $ll=4-($l+9)/8; + $tmp1=sprintf("\t" x $ll); + push(@out,$tmp1); + push(@out,&conv($p1)."\n"); + } + +sub out1 + { + local($name,$p1)=@_; + local($l,$t); + local(%special)=("bswapl",0x0FC8); + + if ((defined($special{$name})) && defined($regs{$p1})) + { + $op=$special{$name}|$reg_val{$p1}; + $tmp1=sprintf(".byte %d\n",($op>>8)&0xff); + $tmp2=sprintf(".byte %d\t",$op &0xff); + push(@out,$tmp1); + push(@out,$tmp2); + + $p2=&conv($p2); + $p1=&conv($p1); + &main'comment("$name $p2 $p1"); + return; + } + + push(@out,"\t$name\t".&conv($p1)."\n"); + } + +sub out1p + { + local($name,$p1)=@_; + local($l,$t); + + push(@out,"\t$name\t*".&conv($p1)."\n"); + } + +sub out0 + { + push(@out,"\t$_[0]\n"); + } + +sub conv + { + local($p)=@_; + +# $p =~ s/0x([0-9A-Fa-f]+)/0$1h/; + + $p=$regs{$p} if (defined($regs{$p})); + + $p =~ s/^(-{0,1}[0-9A-Fa-f]+)$/\$$1/; + $p =~ s/^(0x[0-9A-Fa-f]+)$/\$$1/; + return $p; + } + +sub main'file + { + local($file)=@_; + + local($tmp)=<<"EOF"; + .file "$file.s" +EOF + push(@out,$tmp); + } + +sub main'function_begin + { + local($func)=@_; + + &main'external_label($func); + $func=$under.$func; + + local($tmp)=<<"EOF"; +.text +.globl $func +EOF + push(@out,$tmp); + if ($main'cpp) + { $tmp=push(@out,"TYPE($func,\@function)\n"); } + elsif ($main'coff) + { $tmp=push(@out,".def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); } + elsif ($main'aout and !$main'pic) + { } + else { $tmp=push(@out,".type\t$func,\@function\n"); } + push(@out,".align\t$align\n"); + push(@out,"$func:\n"); + $tmp=<<"EOF"; + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + +EOF + push(@out,$tmp); + $stack=20; + } + +sub main'function_begin_B + { + local($func,$extra)=@_; + + &main'external_label($func); + $func=$under.$func; + + local($tmp)=<<"EOF"; +.text +.globl $func +EOF + push(@out,$tmp); + if ($main'cpp) + { push(@out,"TYPE($func,\@function)\n"); } + elsif ($main'coff) + { $tmp=push(@out,".def\t$func;\t.scl\t2;\t.type\t32;\t.endef\n"); } + elsif ($main'aout and !$main'pic) + { } + else { push(@out,".type $func,\@function\n"); } + push(@out,".align\t$align\n"); + push(@out,"$func:\n"); + $stack=4; + } + +sub main'function_end + { + local($func)=@_; + + $func=$under.$func; + + local($tmp)=<<"EOF"; + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +${dot}L_${func}_end: +EOF + push(@out,$tmp); + + if ($main'cpp) + { push(@out,"SIZE($func,${dot}L_${func}_end-$func)\n"); } + elsif ($main'coff or $main'aout) + { } + else { push(@out,".size\t$func,${dot}L_${func}_end-$func\n"); } + push(@out,".ident \"$func\"\n"); + $stack=0; + %label=(); + } + +sub main'function_end_A + { + local($func)=@_; + + local($tmp)=<<"EOF"; + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +EOF + push(@out,$tmp); + } + +sub main'function_end_B + { + local($func)=@_; + + $func=$under.$func; + + push(@out,"${dot}L_${func}_end:\n"); + if ($main'cpp) + { push(@out,"SIZE($func,${dot}L_${func}_end-$func)\n"); } + elsif ($main'coff or $main'aout) + { } + else { push(@out,".size\t$func,${dot}L_${func}_end-$func\n"); } + push(@out,".ident \"$func\"\n"); + $stack=0; + %label=(); + } + +sub main'wparam + { + local($num)=@_; + + return(&main'DWP($stack+$num*4,"esp","",0)); + } + +sub main'stack_push + { + local($num)=@_; + $stack+=$num*4; + &main'sub("esp",$num*4); + } + +sub main'stack_pop + { + local($num)=@_; + $stack-=$num*4; + &main'add("esp",$num*4); + } + +sub main'swtmp + { + return(&main'DWP($_[0]*4,"esp","",0)); + } + +# Should use swtmp, which is above esp. Linix can trash the stack above esp +#sub main'wtmp +# { +# local($num)=@_; +# +# return(&main'DWP(-($num+1)*4,"esp","",0)); +# } + +sub main'comment + { + if (!defined($com_start) or $main'elf) + { # Regarding $main'elf above... + # GNU and SVR4 as'es use different comment delimiters, + push(@out,"\n"); # so we just skip ELF comments... + return; + } + foreach (@_) + { + if (/^\s*$/) + { push(@out,"\n"); } + else + { push(@out,"\t$com_start $_ $com_end\n"); } + } + } + +sub main'public_label + { + $label{$_[0]}="${under}${_[0]}" if (!defined($label{$_[0]})); + push(@out,".globl\t$label{$_[0]}\n"); + } + +sub main'label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="${dot}${label}${_[0]}"; + $label++; + } + return($label{$_[0]}); + } + +sub main'set_label + { + if (!defined($label{$_[0]})) + { + $label{$_[0]}="${dot}${label}${_[0]}"; + $label++; + } + if ($_[1]!=0) + { + if ($_[1]>1) { main'align($_[1]); } + else { push(@out,".align $align\n"); } + } + push(@out,"$label{$_[0]}:\n"); + } + +sub main'file_end + { + # try to detect if SSE2 or MMX extensions were used on ELF platform... + if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) { + local($tmp); + + push (@out,"\n.section\t.bss\n"); + push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n"); + + return; + } + + if ($const ne "") + { + push(@out,".section .rodata\n"); + push(@out,$const); + $const=""; + } + } + +sub main'data_byte + { + push(@out,"\t.byte\t".join(',',@_)."\n"); + } + +sub main'data_word + { + push(@out,"\t.long\t".join(',',@_)."\n"); + } + +sub main'align + { + my $val=$_[0],$p2,$i; + if ($main'aout) { + for ($p2=0;$val!=0;$val>>=1) { $p2++; } + $val=$p2-1; + $val.=",0x90"; + } + push(@out,".align\t$val\n"); + } + +# debug output functions: puts, putx, printf + +sub main'puts + { + &pushvars(); + &main'push('$Lstring' . ++$constl); + &main'call('puts'); + $stack-=4; + &main'add("esp",4); + &popvars(); + + $const .= "Lstring$constl:\n\t.string \"@_[0]\"\n"; + } + +sub main'putx + { + &pushvars(); + &main'push($_[0]); + &main'push('$Lstring' . ++$constl); + &main'call('printf'); + &main'add("esp",8); + $stack-=8; + &popvars(); + + $const .= "Lstring$constl:\n\t.string \"\%X\"\n"; + } + +sub main'printf + { + $ostack = $stack; + &pushvars(); + for ($i = @_ - 1; $i >= 0; $i--) + { + if ($i == 0) # change this to support %s format strings + { + &main'push('$Lstring' . ++$constl); + $const .= "Lstring$constl:\n\t.string \"@_[$i]\"\n"; + } + else + { + if ($_[$i] =~ /([0-9]*)\(%esp\)/) + { + &main'push(($1 + $stack - $ostack) . '(%esp)'); + } + else + { + &main'push($_[$i]); + } + } + } + &main'call('printf'); + $stack-=4*@_; + &main'add("esp",4*@_); + &popvars(); + } + +sub pushvars + { + &main'pushf(); + &main'push("edx"); + &main'push("ecx"); + &main'push("eax"); + } + +sub popvars + { + &main'pop("eax"); + &main'pop("ecx"); + &main'pop("edx"); + &main'popf(); + } + +sub main'picmeup + { + local($dst,$sym)=@_; + if ($main'cpp) + { + local($tmp)=<<___; +#if (defined(ELF) || defined(SOL)) && defined(PIC) + call 1f +1: popl $regs{$dst} + addl \$_GLOBAL_OFFSET_TABLE_+[.-1b],$regs{$dst} + movl $sym\@GOT($regs{$dst}),$regs{$dst} +#else + leal $sym,$regs{$dst} +#endif +___ + push(@out,$tmp); + } + elsif ($main'pic && ($main'elf || $main'aout)) + { + &main'call(&main'label("PIC_me_up")); + &main'set_label("PIC_me_up"); + &main'blindpop($dst); + &main'add($dst,"\$${under}_GLOBAL_OFFSET_TABLE_+[.-". + &main'label("PIC_me_up") . "]"); + &main'mov($dst,&main'DWP($under.$sym."\@GOT",$dst)); + } + else + { + &main'lea($dst,&main'DWP($sym)); + } + } + +sub main'blindpop { &out1("popl",@_); } + +sub main'initseg + { + local($f)=@_; + local($tmp); + if ($main'elf) + { + $tmp=<<___; +.section .init + call $under$f + jmp .Linitalign +.align $align +.Linitalign: +___ + } + elsif ($main'coff) + { + $tmp=<<___; # applies to both Cygwin and Mingw +.section .ctors +.long $under$f +___ + } + elsif ($main'aout) + { + local($ctor)="${under}_GLOBAL_\$I\$$f"; + $tmp=".text\n"; + $tmp.=".type $ctor,\@function\n" if ($main'pic); + $tmp.=<<___; # OpenBSD way... +.globl $ctor +.align 2 +$ctor: + jmp $under$f +___ + } + push(@out,$tmp) if ($tmp); + } + +1; |