diff options
Diffstat (limited to 'openssl/crypto/bn')
-rw-r--r-- | openssl/crypto/bn/Makefile | 4 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/mips-mont.pl | 2 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/mips.pl | 44 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/parisc-mont.pl | 4 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/x86_64-gf2m.pl | 3 | ||||
-rw-r--r-- | openssl/crypto/bn/asm/x86_64-mont5.pl | 4 | ||||
-rw-r--r-- | openssl/crypto/bn/bn_nist.c | 55 |
7 files changed, 63 insertions, 53 deletions
diff --git a/openssl/crypto/bn/Makefile b/openssl/crypto/bn/Makefile index 672773454..6dd136be5 100644 --- a/openssl/crypto/bn/Makefile +++ b/openssl/crypto/bn/Makefile @@ -125,7 +125,9 @@ ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@ ppc64-mont.s: asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@ alpha-mont.s: asm/alpha-mont.pl - $(PERL) $< | $(CC) -E - | tee $@ > /dev/null + (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \ + $(PERL) asm/alpha-mont.pl > $$preproc && \ + $(CC) -E $$preproc > $@ && rm $$preproc) # GNU make "catch all" %-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@ diff --git a/openssl/crypto/bn/asm/mips-mont.pl b/openssl/crypto/bn/asm/mips-mont.pl index b944a12b8..caae04ed3 100644 --- a/openssl/crypto/bn/asm/mips-mont.pl +++ b/openssl/crypto/bn/asm/mips-mont.pl @@ -133,7 +133,7 @@ $code.=<<___; bnez $at,1f li $t0,0 slt $at,$num,17 # on in-order CPU - bnezl $at,bn_mul_mont_internal + bnez $at,bn_mul_mont_internal nop 1: jr $ra li $a0,0 diff --git a/openssl/crypto/bn/asm/mips.pl b/openssl/crypto/bn/asm/mips.pl index 38b51645f..d2f3ef7bb 100644 --- a/openssl/crypto/bn/asm/mips.pl +++ b/openssl/crypto/bn/asm/mips.pl @@ -140,10 +140,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_mul_add_words_tail .L_bn_mul_add_words_loop: + $LD $t0,0($a1) $MULTU $t0,$a3 $LD $t1,0($a0) $LD $t2,$BNSZ($a1) @@ -200,10 +200,9 @@ $code.=<<___; $ADDU $v0,$ta2 sltu $at,$ta3,$at $ST $ta3,-$BNSZ($a0) - $ADDU $v0,$at .set noreorder - bgtzl $ta0,.L_bn_mul_add_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_mul_add_words_loop + $ADDU $v0,$at beqz $a2,.L_bn_mul_add_words_return nop @@ -300,10 +299,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_mul_words_tail .L_bn_mul_words_loop: + $LD $t0,0($a1) $MULTU $t0,$a3 $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) @@ -341,10 +340,9 @@ $code.=<<___; $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) - $ADDU $v0,$ta3,$ta2 .set noreorder - bgtzl $ta0,.L_bn_mul_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_mul_words_loop + $ADDU $v0,$ta3,$ta2 beqz $a2,.L_bn_mul_words_return nop @@ -429,10 +427,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $ta0,$a2,$minus4 - $LD $t0,0($a1) beqz $ta0,.L_bn_sqr_words_tail .L_bn_sqr_words_loop: + $LD $t0,0($a1) $MULTU $t0,$t0 $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) @@ -463,11 +461,10 @@ $code.=<<___; mflo $ta3 mfhi $ta2 $ST $ta3,-2*$BNSZ($a0) - $ST $ta2,-$BNSZ($a0) .set noreorder - bgtzl $ta0,.L_bn_sqr_words_loop - $LD $t0,0($a1) + bgtz $ta0,.L_bn_sqr_words_loop + $ST $ta2,-$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return nop @@ -547,10 +544,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $at,$a3,$minus4 - $LD $t0,0($a1) beqz $at,.L_bn_add_words_tail .L_bn_add_words_loop: + $LD $t0,0($a1) $LD $ta0,0($a2) subu $a3,4 $LD $t1,$BNSZ($a1) @@ -589,11 +586,10 @@ $code.=<<___; $ADDU $t3,$ta3,$v0 sltu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - $ADDU $v0,$t9 .set noreorder - bgtzl $at,.L_bn_add_words_loop - $LD $t0,0($a1) + bgtz $at,.L_bn_add_words_loop + $ADDU $v0,$t9 beqz $a3,.L_bn_add_words_return nop @@ -679,10 +675,10 @@ $code.=<<___; .set reorder li $minus4,-4 and $at,$a3,$minus4 - $LD $t0,0($a1) beqz $at,.L_bn_sub_words_tail .L_bn_sub_words_loop: + $LD $t0,0($a1) $LD $ta0,0($a2) subu $a3,4 $LD $t1,$BNSZ($a1) @@ -722,11 +718,10 @@ $code.=<<___; $SUBU $t3,$ta3,$v0 sgtu $v0,$t3,$ta3 $ST $t3,-$BNSZ($a0) - $ADDU $v0,$t9 .set noreorder - bgtzl $at,.L_bn_sub_words_loop - $LD $t0,0($a1) + bgtz $at,.L_bn_sub_words_loop + $ADDU $v0,$t9 beqz $a3,.L_bn_sub_words_return nop @@ -840,8 +835,9 @@ $code.=<<___; sltu $ta0,$a1,$a2 or $t8,$ta0 .set noreorder - beqzl $at,.L_bn_div_3_words_inner_loop + beqz $at,.L_bn_div_3_words_inner_loop $SUBU $v0,1 + $ADDU $v0,1 .set reorder .L_bn_div_3_words_inner_loop_done: .set noreorder @@ -902,7 +898,8 @@ $code.=<<___; and $t2,$a0 $SRL $at,$a1,$t1 .set noreorder - bnezl $t2,.+8 + beqz $t2,.+12 + nop break 6 # signal overflow .set reorder $SLL $a0,$t9 @@ -917,7 +914,8 @@ $code.=<<___; $SRL $DH,$a2,4*$BNSZ # bits sgeu $at,$a0,$a2 .set noreorder - bnezl $at,.+8 + beqz $at,.+12 + nop $SUBU $a0,$a2 .set reorder diff --git a/openssl/crypto/bn/asm/parisc-mont.pl b/openssl/crypto/bn/asm/parisc-mont.pl index 4a766a87f..c02ef6f01 100644 --- a/openssl/crypto/bn/asm/parisc-mont.pl +++ b/openssl/crypto/bn/asm/parisc-mont.pl @@ -40,7 +40,7 @@ # of arithmetic operations, most notably multiplications. It requires # more memory references, most notably to tp[num], but this doesn't # seem to exhaust memory port capacity. And indeed, dedicated PA-RISC -# 2.0 code path, provides virtually same performance as pa-risc2[W].s: +# 2.0 code path provides virtually same performance as pa-risc2[W].s: # it's ~10% better for shortest key length and ~10% worse for longest # one. # @@ -988,6 +988,8 @@ foreach (split("\n",$code)) { # assemble 2.0 instructions in 32-bit mode... s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4); + s/\bbv\b/bve/gm if ($SIZE_T==8); + print $_,"\n"; } close STDOUT; diff --git a/openssl/crypto/bn/asm/x86_64-gf2m.pl b/openssl/crypto/bn/asm/x86_64-gf2m.pl index a30d4ef02..226c66c35 100644 --- a/openssl/crypto/bn/asm/x86_64-gf2m.pl +++ b/openssl/crypto/bn/asm/x86_64-gf2m.pl @@ -31,7 +31,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open STDOUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" $xlate $flavour $output"; +*STDOUT=*OUT; ($lo,$hi)=("%rax","%rdx"); $a=$lo; ($i0,$i1)=("%rsi","%rdi"); diff --git a/openssl/crypto/bn/asm/x86_64-mont5.pl b/openssl/crypto/bn/asm/x86_64-mont5.pl index 8f8dc5a59..dae0fe245 100644 --- a/openssl/crypto/bn/asm/x86_64-mont5.pl +++ b/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -901,8 +901,8 @@ $code.=<<___; jnz .Lgather ___ $code.=<<___ if ($win64); - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) + movaps (%rsp),%xmm6 + movaps 0x10(%rsp),%xmm7 lea 0x28(%rsp),%rsp ___ $code.=<<___; diff --git a/openssl/crypto/bn/bn_nist.c b/openssl/crypto/bn/bn_nist.c index 43caee477..e22968d4a 100644 --- a/openssl/crypto/bn/bn_nist.c +++ b/openssl/crypto/bn/bn_nist.c @@ -286,26 +286,25 @@ const BIGNUM *BN_get0_nist_prime_521(void) } -static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max) +static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max) { int i; - BN_ULONG *_tmp1 = (buf), *_tmp2 = (a); #ifdef BN_DEBUG OPENSSL_assert(top <= max); #endif - for (i = (top); i != 0; i--) - *_tmp1++ = *_tmp2++; - for (i = (max) - (top); i != 0; i--) - *_tmp1++ = (BN_ULONG) 0; + for (i = 0; i < top; i++) + dst[i] = src[i]; + for (; i < max; i++) + dst[i] = 0; } -static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top) +static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top) { int i; - BN_ULONG *_tmp1 = (buf), *_tmp2 = (a); - for (i = (top); i != 0; i--) - *_tmp1++ = *_tmp2++; + + for (i = 0; i < top; i++) + dst[i] = src[i]; } #if BN_BITS2 == 64 @@ -451,8 +450,9 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, */ mask = 0-(PTR_SIZE_INT)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP); mask &= 0-(PTR_SIZE_INT)carry; + res = c_d; res = (BN_ULONG *) - (((PTR_SIZE_INT)c_d&~mask) | ((PTR_SIZE_INT)r_d&mask)); + (((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_192_TOP); r->top = BN_NIST_192_TOP; bn_correct_top(r); @@ -479,8 +479,11 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, int top = a->top, i; int carry; BN_ULONG *r_d, *a_d = a->d; - BN_ULONG buf[BN_NIST_224_TOP], - c_d[BN_NIST_224_TOP], + union { + BN_ULONG bn[BN_NIST_224_TOP]; + unsigned int ui[BN_NIST_224_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)]; + } buf; + BN_ULONG c_d[BN_NIST_224_TOP], *res; PTR_SIZE_INT mask; union { bn_addsub_f f; PTR_SIZE_INT p; } u; @@ -519,18 +522,18 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, /* copy upper 256 bits of 448 bit number ... */ nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP); /* ... and right shift by 32 to obtain upper 224 bits */ - nist_set_224(buf, c_d, 14, 13, 12, 11, 10, 9, 8); + nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8); /* truncate lower part to 224 bits too */ r_d[BN_NIST_224_TOP-1] &= BN_MASK2l; #else - nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); + nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP); #endif #if defined(NIST_INT64) && BN_BITS2!=64 { NIST_INT64 acc; /* accumulator */ unsigned int *rp=(unsigned int *)r_d; - const unsigned int *bp=(const unsigned int *)buf; + const unsigned int *bp=(const unsigned int *)buf.ui; acc = rp[0]; acc -= bp[7-7]; acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32; @@ -565,13 +568,13 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, { BN_ULONG t_d[BN_NIST_224_TOP]; - nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0); + nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0); carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP); - nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0); + nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0); carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP); - nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7); + nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP); - nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11); + nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11); carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP); #if BN_BITS2==64 @@ -606,7 +609,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, /* otherwise it's effectively same as in BN_nist_mod_192... */ mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP); mask &= 0-(PTR_SIZE_INT)carry; - res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) | + res = c_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_224_TOP); r->top = BN_NIST_224_TOP; @@ -805,7 +809,8 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP); mask &= 0-(PTR_SIZE_INT)carry; - res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) | + res = c_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_256_TOP); r->top = BN_NIST_256_TOP; @@ -1026,7 +1031,8 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP); mask &= 0-(PTR_SIZE_INT)carry; - res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) | + res = c_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d, res, BN_NIST_384_TOP); r->top = BN_NIST_384_TOP; @@ -1092,7 +1098,8 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field, bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP); mask = 0-(PTR_SIZE_INT)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP); - res = (BN_ULONG *)(((PTR_SIZE_INT)t_d&~mask) | + res = t_d; + res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask)); nist_cp_bn(r_d,res,BN_NIST_521_TOP); r->top = BN_NIST_521_TOP; |