diff options
Diffstat (limited to 'openssl/crypto/sha/asm/sha1-parisc.pl')
-rw-r--r-- | openssl/crypto/sha/asm/sha1-parisc.pl | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/openssl/crypto/sha/asm/sha1-parisc.pl b/openssl/crypto/sha/asm/sha1-parisc.pl new file mode 100644 index 000000000..6d7bf495b --- /dev/null +++ b/openssl/crypto/sha/asm/sha1-parisc.pl @@ -0,0 +1,259 @@ +#!/usr/bin/env perl + +# ==================================================================== +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# SHA1 block procedure for PA-RISC. + +# June 2009. +# +# On PA-7100LC performance is >30% better than gcc 3.2 generated code +# for aligned input and >50% better for unaligned. Compared to vendor +# compiler on PA-8600 it's almost 60% faster in 64-bit build and just +# few percent faster in 32-bit one (this for aligned input, data for +# unaligned input is not available). +# +# Special thanks to polarhome.com for providing HP-UX account. + +$flavour = shift; +$output = shift; +open STDOUT,">$output"; + +if ($flavour =~ /64/) { + $LEVEL ="2.0W"; + $SIZE_T =8; + $FRAME_MARKER =80; + $SAVED_RP =16; + $PUSH ="std"; + $PUSHMA ="std,ma"; + $POP ="ldd"; + $POPMB ="ldd,mb"; +} else { + $LEVEL ="1.0"; + $SIZE_T =4; + $FRAME_MARKER =48; + $SAVED_RP =20; + $PUSH ="stw"; + $PUSHMA ="stwm"; + $POP ="ldw"; + $POPMB ="ldwm"; +} + +$FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker + # [+ argument transfer] +$ctx="%r26"; # arg0 +$inp="%r25"; # arg1 +$num="%r24"; # arg2 + +$t0="%r28"; +$t1="%r29"; +$K="%r31"; + +@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8", + "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0); + +@V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23"); + +sub BODY_00_19 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<15); + addl $K,$e,$e ; $i + shd $a,$a,27,$t1 + addl @X[$i],$e,$e + and $c,$b,$t0 + addl $t1,$e,$e + andcm $d,$b,$t1 + shd $b,$b,2,$b + or $t1,$t0,$t0 + addl $t0,$e,$e +___ +$code.=<<___ if ($i>=15); # with forward Xupdate + addl $K,$e,$e ; $i + shd $a,$a,27,$t1 + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + addl @X[$i%16],$e,$e + and $c,$b,$t0 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + addl $t1,$e,$e + andcm $d,$b,$t1 + shd $b,$b,2,$b + or $t1,$t0,$t0 + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + add $t0,$e,$e + shd @X[$j%16],@X[$j%16],31,@X[$j%16] +___ +} + +sub BODY_20_39 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___ if ($i<79); + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i + addl $K,$e,$e + shd $a,$a,27,$t1 + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + addl @X[$i%16],$e,$e + xor $b,$c,$t0 + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + addl $t1,$e,$e + shd $b,$b,2,$b + xor $d,$t0,$t0 + shd @X[$j%16],@X[$j%16],31,@X[$j%16] + addl $t0,$e,$e +___ +$code.=<<___ if ($i==79); # with context load + ldw 0($ctx),@X[0] ; $i + addl $K,$e,$e + shd $a,$a,27,$t1 + ldw 4($ctx),@X[1] + addl @X[$i%16],$e,$e + xor $b,$c,$t0 + ldw 8($ctx),@X[2] + addl $t1,$e,$e + shd $b,$b,2,$b + xor $d,$t0,$t0 + ldw 12($ctx),@X[3] + addl $t0,$e,$e + ldw 16($ctx),@X[4] +___ +} + +sub BODY_40_59 { +my ($i,$a,$b,$c,$d,$e)=@_; +my $j=$i+1; +$code.=<<___; + shd $a,$a,27,$t1 ; $i + addl $K,$e,$e + xor @X[($j+2)%16],@X[$j%16],@X[$j%16] + xor $d,$c,$t0 + addl @X[$i%16],$e,$e + xor @X[($j+8)%16],@X[$j%16],@X[$j%16] + and $b,$t0,$t0 + addl $t1,$e,$e + shd $b,$b,2,$b + xor @X[($j+13)%16],@X[$j%16],@X[$j%16] + addl $t0,$e,$e + and $d,$c,$t1 + shd @X[$j%16],@X[$j%16],31,@X[$j%16] + addl $t1,$e,$e +___ +} + +$code=<<___; + .LEVEL $LEVEL + .SPACE \$TEXT\$ + .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY + + .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR +sha1_block_data_order + .PROC + .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16 + .ENTRY + $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue + $PUSHMA %r3,$FRAME(%sp) + $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp) + $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp) + $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp) + $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp) + $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp) + $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp) + $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp) + $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp) + $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp) + $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp) + $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp) + $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp) + $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp) + + ldw 0($ctx),$A + ldw 4($ctx),$B + ldw 8($ctx),$C + ldw 12($ctx),$D + ldw 16($ctx),$E + + extru $inp,31,2,$t0 ; t0=inp&3; + sh3addl $t0,%r0,$t0 ; t0*=8; + subi 32,$t0,$t0 ; t0=32-t0; + mtctl $t0,%cr11 ; %sar=t0; + +L\$oop + ldi 3,$t0 + andcm $inp,$t0,$t0 ; 64-bit neutral +___ + for ($i=0;$i<15;$i++) { # load input block + $code.="\tldw `4*$i`($t0),@X[$i]\n"; } +$code.=<<___; + cmpb,*= $inp,$t0,L\$aligned + ldw 60($t0),@X[15] + ldw 64($t0),@X[16] +___ + for ($i=0;$i<16;$i++) { # align input + $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; } +$code.=<<___; +L\$aligned + ldil L'0x5a827000,$K ; K_00_19 + ldo 0x999($K),$K +___ +for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + ldil L'0x6ed9e000,$K ; K_20_39 + ldo 0xba1($K),$K +___ + +for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + ldil L'0x8f1bb000,$K ; K_40_59 + ldo 0xcdc($K),$K +___ + +for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } +$code.=<<___; + ldil L'0xca62c000,$K ; K_60_79 + ldo 0x1d6($K),$K +___ +for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } + +$code.=<<___; + addl @X[0],$A,$A + addl @X[1],$B,$B + addl @X[2],$C,$C + addl @X[3],$D,$D + addl @X[4],$E,$E + stw $A,0($ctx) + stw $B,4($ctx) + stw $C,8($ctx) + stw $D,12($ctx) + stw $E,16($ctx) + addib,*<> -1,$num,L\$oop + ldo 64($inp),$inp + + $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue + $POP `-$FRAME+1*$SIZE_T`(%sp),%r4 + $POP `-$FRAME+2*$SIZE_T`(%sp),%r5 + $POP `-$FRAME+3*$SIZE_T`(%sp),%r6 + $POP `-$FRAME+4*$SIZE_T`(%sp),%r7 + $POP `-$FRAME+5*$SIZE_T`(%sp),%r8 + $POP `-$FRAME+6*$SIZE_T`(%sp),%r9 + $POP `-$FRAME+7*$SIZE_T`(%sp),%r10 + $POP `-$FRAME+8*$SIZE_T`(%sp),%r11 + $POP `-$FRAME+9*$SIZE_T`(%sp),%r12 + $POP `-$FRAME+10*$SIZE_T`(%sp),%r13 + $POP `-$FRAME+11*$SIZE_T`(%sp),%r14 + $POP `-$FRAME+12*$SIZE_T`(%sp),%r15 + $POP `-$FRAME+13*$SIZE_T`(%sp),%r16 + bv (%r2) + .EXIT + $POPMB -$FRAME(%sp),%r3 + .PROCEND + .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>" +___ + +$code =~ s/\`([^\`]*)\`/eval $1/gem; +$code =~ s/,\*/,/gm if ($SIZE_T==4); +print $code; +close STDOUT; |