aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'openssl/crypto/bn')
-rw-r--r--openssl/crypto/bn/Makefile4
-rw-r--r--openssl/crypto/bn/asm/mips-mont.pl2
-rw-r--r--openssl/crypto/bn/asm/mips.pl44
-rw-r--r--openssl/crypto/bn/asm/parisc-mont.pl4
-rw-r--r--openssl/crypto/bn/asm/x86_64-gf2m.pl3
-rw-r--r--openssl/crypto/bn/asm/x86_64-mont5.pl4
-rw-r--r--openssl/crypto/bn/bn_nist.c55
7 files changed, 63 insertions, 53 deletions
diff --git a/openssl/crypto/bn/Makefile b/openssl/crypto/bn/Makefile
index 672773454..6dd136be5 100644
--- a/openssl/crypto/bn/Makefile
+++ b/openssl/crypto/bn/Makefile
@@ -125,7 +125,9 @@ ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
ppc64-mont.s: asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@
alpha-mont.s: asm/alpha-mont.pl
- $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
+ (preproc=/tmp/$$$$.$@; trap "rm $$preproc" INT; \
+ $(PERL) asm/alpha-mont.pl > $$preproc && \
+ $(CC) -E $$preproc > $@ && rm $$preproc)
# GNU make "catch all"
%-mont.s: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@
diff --git a/openssl/crypto/bn/asm/mips-mont.pl b/openssl/crypto/bn/asm/mips-mont.pl
index b944a12b8..caae04ed3 100644
--- a/openssl/crypto/bn/asm/mips-mont.pl
+++ b/openssl/crypto/bn/asm/mips-mont.pl
@@ -133,7 +133,7 @@ $code.=<<___;
bnez $at,1f
li $t0,0
slt $at,$num,17 # on in-order CPU
- bnezl $at,bn_mul_mont_internal
+ bnez $at,bn_mul_mont_internal
nop
1: jr $ra
li $a0,0
diff --git a/openssl/crypto/bn/asm/mips.pl b/openssl/crypto/bn/asm/mips.pl
index 38b51645f..d2f3ef7bb 100644
--- a/openssl/crypto/bn/asm/mips.pl
+++ b/openssl/crypto/bn/asm/mips.pl
@@ -140,10 +140,10 @@ $code.=<<___;
.set reorder
li $minus4,-4
and $ta0,$a2,$minus4
- $LD $t0,0($a1)
beqz $ta0,.L_bn_mul_add_words_tail
.L_bn_mul_add_words_loop:
+ $LD $t0,0($a1)
$MULTU $t0,$a3
$LD $t1,0($a0)
$LD $t2,$BNSZ($a1)
@@ -200,10 +200,9 @@ $code.=<<___;
$ADDU $v0,$ta2
sltu $at,$ta3,$at
$ST $ta3,-$BNSZ($a0)
- $ADDU $v0,$at
.set noreorder
- bgtzl $ta0,.L_bn_mul_add_words_loop
- $LD $t0,0($a1)
+ bgtz $ta0,.L_bn_mul_add_words_loop
+ $ADDU $v0,$at
beqz $a2,.L_bn_mul_add_words_return
nop
@@ -300,10 +299,10 @@ $code.=<<___;
.set reorder
li $minus4,-4
and $ta0,$a2,$minus4
- $LD $t0,0($a1)
beqz $ta0,.L_bn_mul_words_tail
.L_bn_mul_words_loop:
+ $LD $t0,0($a1)
$MULTU $t0,$a3
$LD $t2,$BNSZ($a1)
$LD $ta0,2*$BNSZ($a1)
@@ -341,10 +340,9 @@ $code.=<<___;
$ADDU $v0,$at
sltu $ta3,$v0,$at
$ST $v0,-$BNSZ($a0)
- $ADDU $v0,$ta3,$ta2
.set noreorder
- bgtzl $ta0,.L_bn_mul_words_loop
- $LD $t0,0($a1)
+ bgtz $ta0,.L_bn_mul_words_loop
+ $ADDU $v0,$ta3,$ta2
beqz $a2,.L_bn_mul_words_return
nop
@@ -429,10 +427,10 @@ $code.=<<___;
.set reorder
li $minus4,-4
and $ta0,$a2,$minus4
- $LD $t0,0($a1)
beqz $ta0,.L_bn_sqr_words_tail
.L_bn_sqr_words_loop:
+ $LD $t0,0($a1)
$MULTU $t0,$t0
$LD $t2,$BNSZ($a1)
$LD $ta0,2*$BNSZ($a1)
@@ -463,11 +461,10 @@ $code.=<<___;
mflo $ta3
mfhi $ta2
$ST $ta3,-2*$BNSZ($a0)
- $ST $ta2,-$BNSZ($a0)
.set noreorder
- bgtzl $ta0,.L_bn_sqr_words_loop
- $LD $t0,0($a1)
+ bgtz $ta0,.L_bn_sqr_words_loop
+ $ST $ta2,-$BNSZ($a0)
beqz $a2,.L_bn_sqr_words_return
nop
@@ -547,10 +544,10 @@ $code.=<<___;
.set reorder
li $minus4,-4
and $at,$a3,$minus4
- $LD $t0,0($a1)
beqz $at,.L_bn_add_words_tail
.L_bn_add_words_loop:
+ $LD $t0,0($a1)
$LD $ta0,0($a2)
subu $a3,4
$LD $t1,$BNSZ($a1)
@@ -589,11 +586,10 @@ $code.=<<___;
$ADDU $t3,$ta3,$v0
sltu $v0,$t3,$ta3
$ST $t3,-$BNSZ($a0)
- $ADDU $v0,$t9
.set noreorder
- bgtzl $at,.L_bn_add_words_loop
- $LD $t0,0($a1)
+ bgtz $at,.L_bn_add_words_loop
+ $ADDU $v0,$t9
beqz $a3,.L_bn_add_words_return
nop
@@ -679,10 +675,10 @@ $code.=<<___;
.set reorder
li $minus4,-4
and $at,$a3,$minus4
- $LD $t0,0($a1)
beqz $at,.L_bn_sub_words_tail
.L_bn_sub_words_loop:
+ $LD $t0,0($a1)
$LD $ta0,0($a2)
subu $a3,4
$LD $t1,$BNSZ($a1)
@@ -722,11 +718,10 @@ $code.=<<___;
$SUBU $t3,$ta3,$v0
sgtu $v0,$t3,$ta3
$ST $t3,-$BNSZ($a0)
- $ADDU $v0,$t9
.set noreorder
- bgtzl $at,.L_bn_sub_words_loop
- $LD $t0,0($a1)
+ bgtz $at,.L_bn_sub_words_loop
+ $ADDU $v0,$t9
beqz $a3,.L_bn_sub_words_return
nop
@@ -840,8 +835,9 @@ $code.=<<___;
sltu $ta0,$a1,$a2
or $t8,$ta0
.set noreorder
- beqzl $at,.L_bn_div_3_words_inner_loop
+ beqz $at,.L_bn_div_3_words_inner_loop
$SUBU $v0,1
+ $ADDU $v0,1
.set reorder
.L_bn_div_3_words_inner_loop_done:
.set noreorder
@@ -902,7 +898,8 @@ $code.=<<___;
and $t2,$a0
$SRL $at,$a1,$t1
.set noreorder
- bnezl $t2,.+8
+ beqz $t2,.+12
+ nop
break 6 # signal overflow
.set reorder
$SLL $a0,$t9
@@ -917,7 +914,8 @@ $code.=<<___;
$SRL $DH,$a2,4*$BNSZ # bits
sgeu $at,$a0,$a2
.set noreorder
- bnezl $at,.+8
+ beqz $at,.+12
+ nop
$SUBU $a0,$a2
.set reorder
diff --git a/openssl/crypto/bn/asm/parisc-mont.pl b/openssl/crypto/bn/asm/parisc-mont.pl
index 4a766a87f..c02ef6f01 100644
--- a/openssl/crypto/bn/asm/parisc-mont.pl
+++ b/openssl/crypto/bn/asm/parisc-mont.pl
@@ -40,7 +40,7 @@
# of arithmetic operations, most notably multiplications. It requires
# more memory references, most notably to tp[num], but this doesn't
# seem to exhaust memory port capacity. And indeed, dedicated PA-RISC
-# 2.0 code path, provides virtually same performance as pa-risc2[W].s:
+# 2.0 code path provides virtually same performance as pa-risc2[W].s:
# it's ~10% better for shortest key length and ~10% worse for longest
# one.
#
@@ -988,6 +988,8 @@ foreach (split("\n",$code)) {
# assemble 2.0 instructions in 32-bit mode...
s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4);
+ s/\bbv\b/bve/gm if ($SIZE_T==8);
+
print $_,"\n";
}
close STDOUT;
diff --git a/openssl/crypto/bn/asm/x86_64-gf2m.pl b/openssl/crypto/bn/asm/x86_64-gf2m.pl
index a30d4ef02..226c66c35 100644
--- a/openssl/crypto/bn/asm/x86_64-gf2m.pl
+++ b/openssl/crypto/bn/asm/x86_64-gf2m.pl
@@ -31,7 +31,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open STDOUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" $xlate $flavour $output";
+*STDOUT=*OUT;
($lo,$hi)=("%rax","%rdx"); $a=$lo;
($i0,$i1)=("%rsi","%rdi");
diff --git a/openssl/crypto/bn/asm/x86_64-mont5.pl b/openssl/crypto/bn/asm/x86_64-mont5.pl
index 8f8dc5a59..dae0fe245 100644
--- a/openssl/crypto/bn/asm/x86_64-mont5.pl
+++ b/openssl/crypto/bn/asm/x86_64-mont5.pl
@@ -901,8 +901,8 @@ $code.=<<___;
jnz .Lgather
___
$code.=<<___ if ($win64);
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
+ movaps (%rsp),%xmm6
+ movaps 0x10(%rsp),%xmm7
lea 0x28(%rsp),%rsp
___
$code.=<<___;
diff --git a/openssl/crypto/bn/bn_nist.c b/openssl/crypto/bn/bn_nist.c
index 43caee477..e22968d4a 100644
--- a/openssl/crypto/bn/bn_nist.c
+++ b/openssl/crypto/bn/bn_nist.c
@@ -286,26 +286,25 @@ const BIGNUM *BN_get0_nist_prime_521(void)
}
-static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max)
+static void nist_cp_bn_0(BN_ULONG *dst, const BN_ULONG *src, int top, int max)
{
int i;
- BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
#ifdef BN_DEBUG
OPENSSL_assert(top <= max);
#endif
- for (i = (top); i != 0; i--)
- *_tmp1++ = *_tmp2++;
- for (i = (max) - (top); i != 0; i--)
- *_tmp1++ = (BN_ULONG) 0;
+ for (i = 0; i < top; i++)
+ dst[i] = src[i];
+ for (; i < max; i++)
+ dst[i] = 0;
}
-static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
+static void nist_cp_bn(BN_ULONG *dst, const BN_ULONG *src, int top)
{
int i;
- BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
- for (i = (top); i != 0; i--)
- *_tmp1++ = *_tmp2++;
+
+ for (i = 0; i < top; i++)
+ dst[i] = src[i];
}
#if BN_BITS2 == 64
@@ -451,8 +450,9 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
*/
mask = 0-(PTR_SIZE_INT)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP);
mask &= 0-(PTR_SIZE_INT)carry;
+ res = c_d;
res = (BN_ULONG *)
- (((PTR_SIZE_INT)c_d&~mask) | ((PTR_SIZE_INT)r_d&mask));
+ (((PTR_SIZE_INT)res&~mask) | ((PTR_SIZE_INT)r_d&mask));
nist_cp_bn(r_d, res, BN_NIST_192_TOP);
r->top = BN_NIST_192_TOP;
bn_correct_top(r);
@@ -479,8 +479,11 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
int top = a->top, i;
int carry;
BN_ULONG *r_d, *a_d = a->d;
- BN_ULONG buf[BN_NIST_224_TOP],
- c_d[BN_NIST_224_TOP],
+ union {
+ BN_ULONG bn[BN_NIST_224_TOP];
+ unsigned int ui[BN_NIST_224_TOP*sizeof(BN_ULONG)/sizeof(unsigned int)];
+ } buf;
+ BN_ULONG c_d[BN_NIST_224_TOP],
*res;
PTR_SIZE_INT mask;
union { bn_addsub_f f; PTR_SIZE_INT p; } u;
@@ -519,18 +522,18 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
/* copy upper 256 bits of 448 bit number ... */
nist_cp_bn_0(c_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
/* ... and right shift by 32 to obtain upper 224 bits */
- nist_set_224(buf, c_d, 14, 13, 12, 11, 10, 9, 8);
+ nist_set_224(buf.bn, c_d, 14, 13, 12, 11, 10, 9, 8);
/* truncate lower part to 224 bits too */
r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
#else
- nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
+ nist_cp_bn_0(buf.bn, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
#endif
#if defined(NIST_INT64) && BN_BITS2!=64
{
NIST_INT64 acc; /* accumulator */
unsigned int *rp=(unsigned int *)r_d;
- const unsigned int *bp=(const unsigned int *)buf;
+ const unsigned int *bp=(const unsigned int *)buf.ui;
acc = rp[0]; acc -= bp[7-7];
acc -= bp[11-7]; rp[0] = (unsigned int)acc; acc >>= 32;
@@ -565,13 +568,13 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
{
BN_ULONG t_d[BN_NIST_224_TOP];
- nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
+ nist_set_224(t_d, buf.bn, 10, 9, 8, 7, 0, 0, 0);
carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
- nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
+ nist_set_224(t_d, buf.bn, 0, 13, 12, 11, 0, 0, 0);
carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
- nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7);
+ nist_set_224(t_d, buf.bn, 13, 12, 11, 10, 9, 8, 7);
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
- nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11);
+ nist_set_224(t_d, buf.bn, 0, 0, 0, 0, 13, 12, 11);
carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
#if BN_BITS2==64
@@ -606,7 +609,8 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
/* otherwise it's effectively same as in BN_nist_mod_192... */
mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP);
mask &= 0-(PTR_SIZE_INT)carry;
- res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
+ res = c_d;
+ res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
((PTR_SIZE_INT)r_d&mask));
nist_cp_bn(r_d, res, BN_NIST_224_TOP);
r->top = BN_NIST_224_TOP;
@@ -805,7 +809,8 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP);
mask &= 0-(PTR_SIZE_INT)carry;
- res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
+ res = c_d;
+ res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
((PTR_SIZE_INT)r_d&mask));
nist_cp_bn(r_d, res, BN_NIST_256_TOP);
r->top = BN_NIST_256_TOP;
@@ -1026,7 +1031,8 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
mask = 0-(PTR_SIZE_INT)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP);
mask &= 0-(PTR_SIZE_INT)carry;
- res = (BN_ULONG *)(((PTR_SIZE_INT)c_d&~mask) |
+ res = c_d;
+ res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
((PTR_SIZE_INT)r_d&mask));
nist_cp_bn(r_d, res, BN_NIST_384_TOP);
r->top = BN_NIST_384_TOP;
@@ -1092,7 +1098,8 @@ int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP);
mask = 0-(PTR_SIZE_INT)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP);
- res = (BN_ULONG *)(((PTR_SIZE_INT)t_d&~mask) |
+ res = t_d;
+ res = (BN_ULONG *)(((PTR_SIZE_INT)res&~mask) |
((PTR_SIZE_INT)r_d&mask));
nist_cp_bn(r_d,res,BN_NIST_521_TOP);
r->top = BN_NIST_521_TOP;