diff options
Diffstat (limited to 'openssl/crypto/bn/asm')
-rw-r--r-- | openssl/crypto/bn/asm/mips-mont.pl | 2 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/mips.pl | 44 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/parisc-mont.pl | 4 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/x86_64-gf2m.pl | 3 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/x86_64-mont5.pl | 4 |
5 files changed, 29 insertions, 28 deletions
diff --git a/openssl/crypto/bn/asm/mips-mont.pl b/openssl/crypto/bn/asm/mips-mont.pl index b944a12b8..caae04ed3 100644 --- a/openssl/crypto/bn/asm/mips-mont.pl +++ b/openssl/crypto/bn/asm/mips-mont.pl @@ -133,7 +133,7 @@ $code.=<<___; bnez $at,1f li $t0,0 slt $at,$num,17 # on in-order CPU - bnezl $at,bn_mul_mont_internal + bnez $at,bn_mul_mont_internal nop 1: jr $ra li $a0,0 diff --git a/openssl/crypto/bn/asm/mips.pl b/openssl/crypto/bn/asm/mips.pl index 38b51645f..d2f3ef7bb 100644 --- a/openssl/crypto/bn/asm/mips.pl +++ b/openssl/crypto/bn/asm/mips.pl @@ -140,10 +140,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_mul_add_words_tail .L_bn_mul_add_words_loop: + $LD $t0,0($a1) $MULTU $t0,$a3 $LD $t1,0($a0) $LD $t2,$BNSZ($a1) @@ -200,10 +200,9 @@ $code.=<<___; $ADDU $v0,$ta2 sltu $at,$ta3,$at $ST $ta3,-$BNSZ($a0) - $ADDU $v0,$at .set noreorder - bgtzl $ta0,.L_bn_mul_add_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_mul_add_words_loop + $ADDU $v0,$at beqz $a2,.L_bn_mul_add_words_return nop @@ -300,10 +299,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_mul_words_tail .L_bn_mul_words_loop: + $LD $t0,0($a1) $MULTU $t0,$a3 $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) @@ -341,10 +340,9 @@ $code.=<<___; $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) - $ADDU $v0,$ta3,$ta2 .set noreorder - bgtzl $ta0,.L_bn_mul_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_mul_words_loop + $ADDU $v0,$ta3,$ta2 beqz $a2,.L_bn_mul_words_return nop @@ -429,10 +427,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_sqr_words_tail .L_bn_sqr_words_loop: + $LD $t0,0($a1) $MULTU $t0,$t0 $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) @@ -463,11 +461,10 @@ $code.=<<___; mflo $ta3 mfhi $ta2 $ST $ta3,-2*$BNSZ($a0) - $ST $ta2,-$BNSZ($a0) .set noreorder - bgtzl $ta0,.L_bn_sqr_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_sqr_words_loop + $ST $ta2,-$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return nop @@ -547,10 +544,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $at,$a3,$minus4 - $LD $t0,0($a1) beqz $at,.L_bn_add_words_tail .L_bn_add_words_loop: + $LD $t0,0($a1) $LD $ta0,0($a2) subu $a3,4 $LD $t1,$BNSZ($a1) @@ -589,11 +586,10 @@ $code.=<<___; $ADDU $t3,$ta3,$v0 sltu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - $ADDU $v0,$t9 .set noreorder - bgtzl $at,.L_bn_add_words_loop - $LD $t0,0($a1) + bgtz $at,.L_bn_add_words_loop + $ADDU $v0,$t9 beqz $a3,.L_bn_add_words_return nop @@ -679,10 +675,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $at,$a3,$minus4 - $LD $t0,0($a1) beqz $at,.L_bn_sub_words_tail .L_bn_sub_words_loop: + $LD $t0,0($a1) $LD $ta0,0($a2) subu $a3,4 $LD $t1,$BNSZ($a1) @@ -722,11 +718,10 @@ $code.=<<___; $SUBU $t3,$ta3,$v0 sgtu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - $ADDU $v0,$t9 .set noreorder - bgtzl $at,.L_bn_sub_words_loop - $LD $t0,0($a1) + bgtz $at,.L_bn_sub_words_loop + $ADDU $v0,$t9 beqz $a3,.L_bn_sub_words_return nop @@ -840,8 +835,9 @@ $code.=<<___; sltu $ta0,$a1,$a2 or $t8,$ta0 .set noreorder - beqzl $at,.L_bn_div_3_words_inner_loop + beqz $at,.L_bn_div_3_words_inner_loop $SUBU $v0,1 + $ADDU $v0,1 .set reorder .L_bn_div_3_words_inner_loop_done: .set noreorder @@ -902,7 +898,8 @@ $code.=<<___; and $t2,$a0 $SRL $at,$a1,$t1 .set noreorder - bnezl $t2,.+8 + beqz $t2,.+12 + nop break 6 # signal overflow .set reorder $SLL $a0,$t9 @@ -917,7 +914,8 @@ $code.=<<___; $SRL $DH,$a2,4*$BNSZ # bits sgeu $at,$a0,$a2 .set noreorder - bnezl $at,.+8 + beqz $at,.+12 + nop $SUBU $a0,$a2 .set reorder diff --git a/openssl/crypto/bn/asm/parisc-mont.pl b/openssl/crypto/bn/asm/parisc-mont.pl index 4a766a87f..c02ef6f01 100644 --- a/openssl/crypto/bn/asm/parisc-mont.pl +++ b/openssl/crypto/bn/asm/parisc-mont.pl @@ -40,7 +40,7 @@ # of arithmetic operations, most notably multiplications. It requires # more memory references, most notably to tp[num], but this doesn't # seem to exhaust memory port capacity. And indeed, dedicated PA-RISC -# 2.0 code path, provides virtually same performance as pa-risc2[W].s: +# 2.0 code path provides virtually same performance as pa-risc2[W].s: # it's ~10% better for shortest key length and ~10% worse for longest # one. # @@ -988,6 +988,8 @@ foreach (split("\n",$code)) { # assemble 2.0 instructions in 32-bit mode... s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4); + s/\bbv\b/bve/gm if ($SIZE_T==8); + print $_,"\n"; } close STDOUT; diff --git a/openssl/crypto/bn/asm/x86_64-gf2m.pl b/openssl/crypto/bn/asm/x86_64-gf2m.pl index a30d4ef02..226c66c35 100644 --- a/openssl/crypto/bn/asm/x86_64-gf2m.pl +++ b/openssl/crypto/bn/asm/x86_64-gf2m.pl @@ -31,7 +31,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; ($lo,$hi)=("%rax","%rdx"); $a=$lo; ($i0,$i1)=("%rsi","%rdi"); diff --git a/openssl/crypto/bn/asm/x86_64-mont5.pl b/openssl/crypto/bn/asm/x86_64-mont5.pl index 8f8dc5a59..dae0fe245 100644 --- a/openssl/crypto/bn/asm/x86_64-mont5.pl +++ b/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -901,8 +901,8 @@ $code.=<<___; jnz .Lgather ___ $code.=<<___ if ($win64); - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 lea 0x28(%rsp),%rsp ___ $code.=<<___; |