diff options
Diffstat (limited to 'openssl/crypto/cast/asm')
-rw-r--r-- | openssl/crypto/cast/asm/cast-586.pl | 176 | ||||
-rw-r--r-- | openssl/crypto/cast/asm/readme | 7 |
2 files changed, 183 insertions, 0 deletions
diff --git a/openssl/crypto/cast/asm/cast-586.pl b/openssl/crypto/cast/asm/cast-586.pl new file mode 100644 index 000000000..6be0bfe57 --- /dev/null +++ b/openssl/crypto/cast/asm/cast-586.pl @@ -0,0 +1,176 @@ +#!/usr/local/bin/perl + +# define for pentium pro friendly version +$ppro=1; + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; +require "cbc.pl"; + +&asm_init($ARGV[0],"cast-586.pl",$ARGV[$#ARGV] eq "386"); + +$CAST_ROUNDS=16; +$L="edi"; +$R="esi"; +$K="ebp"; +$tmp1="ecx"; +$tmp2="ebx"; +$tmp3="eax"; +$tmp4="edx"; +$S1="CAST_S_table0"; +$S2="CAST_S_table1"; +$S3="CAST_S_table2"; +$S4="CAST_S_table3"; + +@F1=("add","xor","sub"); +@F2=("xor","sub","add"); +@F3=("sub","add","xor"); + +&CAST_encrypt("CAST_encrypt",1); +&CAST_encrypt("CAST_decrypt",0); +&cbc("CAST_cbc_encrypt","CAST_encrypt","CAST_decrypt",1,4,5,3,-1,-1); + +&asm_finish(); + +sub CAST_encrypt { + local($name,$enc)=@_; + + local($win_ex)=<<"EOF"; +EXTERN _CAST_S_table0:DWORD +EXTERN _CAST_S_table1:DWORD +EXTERN _CAST_S_table2:DWORD +EXTERN _CAST_S_table3:DWORD +EOF + &main::external_label( + "CAST_S_table0", + "CAST_S_table1", + "CAST_S_table2", + "CAST_S_table3", + ); + + &function_begin_B($name,$win_ex); + + &comment(""); + + &push("ebp"); + &push("ebx"); + &mov($tmp2,&wparam(0)); + &mov($K,&wparam(1)); + &push("esi"); + &push("edi"); + + &comment("Load the 2 words"); + &mov($L,&DWP(0,$tmp2,"",0)); + &mov($R,&DWP(4,$tmp2,"",0)); + + &comment('Get short key flag'); + &mov($tmp3,&DWP(128,$K,"",0)); + if($enc) { + &push($tmp3); + } else { + &or($tmp3,$tmp3); + &jnz(&label('cast_dec_skip')); + } + + &xor($tmp3, $tmp3); + + # encrypting part + + if ($enc) { + &E_CAST( 0,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 1,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 2,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 3,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 4,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 5,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 6,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 7,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 8,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 9,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(10,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(11,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &comment('test short key flag'); + &pop($tmp4); + &or($tmp4,$tmp4); + &jnz(&label('cast_enc_done')); + &E_CAST(12,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(13,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(14,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(15,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + } else { + &E_CAST(15,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(14,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(13,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(12,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &set_label('cast_dec_skip'); + &E_CAST(11,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST(10,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 9,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 8,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 7,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 6,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 5,$S,$L,$R,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 4,$S,$R,$L,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 3,$S,$L,$R,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 2,$S,$R,$L,$K,@F3,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 1,$S,$L,$R,$K,@F2,$tmp1,$tmp2,$tmp3,$tmp4); + &E_CAST( 0,$S,$R,$L,$K,@F1,$tmp1,$tmp2,$tmp3,$tmp4); + } + + &set_label('cast_enc_done') if $enc; +# Why the nop? - Ben 17/1/99 + &nop(); + &mov($tmp3,&wparam(0)); + &mov(&DWP(4,$tmp3,"",0),$L); + &mov(&DWP(0,$tmp3,"",0),$R); + &function_end($name); +} + +sub E_CAST { + local($i,$S,$L,$R,$K,$OP1,$OP2,$OP3,$tmp1,$tmp2,$tmp3,$tmp4)=@_; + # Ri needs to have 16 pre added. + + &comment("round $i"); + &mov( $tmp4, &DWP($i*8,$K,"",1)); + + &mov( $tmp1, &DWP($i*8+4,$K,"",1)); + &$OP1( $tmp4, $R); + + &rotl( $tmp4, &LB($tmp1)); + + if ($ppro) { + &mov( $tmp2, $tmp4); # B + &xor( $tmp1, $tmp1); + + &movb( &LB($tmp1), &HB($tmp4)); # A + &and( $tmp2, 0xff); + + &shr( $tmp4, 16); # + &xor( $tmp3, $tmp3); + } else { + &mov( $tmp2, $tmp4); # B + &movb( &LB($tmp1), &HB($tmp4)); # A # BAD BAD BAD + + &shr( $tmp4, 16); # + &and( $tmp2, 0xff); + } + + &movb( &LB($tmp3), &HB($tmp4)); # C # BAD BAD BAD + &and( $tmp4, 0xff); # D + + &mov( $tmp1, &DWP($S1,"",$tmp1,4)); + &mov( $tmp2, &DWP($S2,"",$tmp2,4)); + + &$OP2( $tmp1, $tmp2); + &mov( $tmp2, &DWP($S3,"",$tmp3,4)); + + &$OP3( $tmp1, $tmp2); + &mov( $tmp2, &DWP($S4,"",$tmp4,4)); + + &$OP1( $tmp1, $tmp2); + # XXX + + &xor( $L, $tmp1); + # XXX +} + diff --git a/openssl/crypto/cast/asm/readme b/openssl/crypto/cast/asm/readme new file mode 100644 index 000000000..fbcd76289 --- /dev/null +++ b/openssl/crypto/cast/asm/readme @@ -0,0 +1,7 @@ +There is a ppro flag in cast-586 which turns on/off +generation of pentium pro/II friendly code + +This flag makes the inner loop one cycle longer, but generates +code that runs %30 faster on the pentium pro/II, while only %7 slower +on the pentium. By default, this flag is on. + |