aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/bn
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2010-03-30 12:36:28 +0000
committermarha <marha@users.sourceforge.net>2010-03-30 12:36:28 +0000
commitff48c0d9098080b51ea12710029135916d117806 (patch)
tree96e6af9caf170ba21a1027b24e306a07e27d7b75 /openssl/crypto/bn
parentbb731f5ac92655c4860a41fa818a7a63005f8369 (diff)
downloadvcxsrv-ff48c0d9098080b51ea12710029135916d117806.tar.gz
vcxsrv-ff48c0d9098080b51ea12710029135916d117806.tar.bz2
vcxsrv-ff48c0d9098080b51ea12710029135916d117806.zip
svn merge -r514:HEAD ^/branches/released .
Diffstat (limited to 'openssl/crypto/bn')
-rw-r--r--openssl/crypto/bn/Makefile79
-rw-r--r--openssl/crypto/bn/asm/armv4-mont.pl1
-rw-r--r--openssl/crypto/bn/asm/bn-586.pl203
-rw-r--r--openssl/crypto/bn/asm/co-586.pl3
-rw-r--r--openssl/crypto/bn/asm/mo-586.pl603
-rw-r--r--openssl/crypto/bn/asm/ppc.pl233
-rw-r--r--openssl/crypto/bn/asm/sparcv8plus.S15
-rw-r--r--openssl/crypto/bn/asm/x86_64-gcc.c29
-rw-r--r--openssl/crypto/bn/asm/x86_64-mont.pl136
-rw-r--r--openssl/crypto/bn/bn.h181
-rw-r--r--openssl/crypto/bn/bn_asm.c322
-rw-r--r--openssl/crypto/bn/bn_blind.c17
-rw-r--r--openssl/crypto/bn/bn_ctx.c6
-rw-r--r--openssl/crypto/bn/bn_div.c15
-rw-r--r--openssl/crypto/bn/bn_exp.c3
-rw-r--r--openssl/crypto/bn/bn_gf2m.c145
-rw-r--r--openssl/crypto/bn/bn_lcl.h3
-rw-r--r--openssl/crypto/bn/bn_lib.c29
-rw-r--r--openssl/crypto/bn/bn_mont.c269
-rw-r--r--openssl/crypto/bn/bn_mul.c10
-rw-r--r--openssl/crypto/bn/bn_opt.c87
-rw-r--r--openssl/crypto/bn/bn_print.c21
-rw-r--r--openssl/crypto/bn/bn_x931p.c272
-rw-r--r--openssl/crypto/bn/bntest.c38
-rw-r--r--openssl/crypto/bn/exptest.c4
25 files changed, 949 insertions, 1775 deletions
diff --git a/openssl/crypto/bn/Makefile b/openssl/crypto/bn/Makefile
index f5e8f65a4..aabc4f56b 100644
--- a/openssl/crypto/bn/Makefile
+++ b/openssl/crypto/bn/Makefile
@@ -12,8 +12,6 @@ MAKEFILE= Makefile
AR= ar r
BN_ASM= bn_asm.o
-# or use
-#BN_ASM= bn86-elf.o
CFLAGS= $(INCLUDES) $(CFLAG)
ASFLAGS= $(INCLUDES) $(ASFLAG)
@@ -28,13 +26,13 @@ LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
- bn_depr.c bn_x931p.c bn_const.c bn_opt.c
+ bn_depr.c bn_const.c
LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \
- bn_depr.o bn_x931p.o bn_const.o bn_opt.o
+ bn_depr.o bn_const.o
SRC= $(LIBSRC)
@@ -58,36 +56,25 @@ bnbug: bnbug.c ../../libcrypto.a top
cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a
lib: $(LIBOBJ)
- $(ARX) $(LIB) $(LIBOBJ)
+ $(AR) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
-# ELF
-bn86-elf.s: asm/bn-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > ../$@)
-co86-elf.s: asm/co-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) co-586.pl elf $(CFLAGS) > ../$@)
-mo86-elf.s: asm/mo-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) mo-586.pl elf $(CFLAGS) > ../$@)
-# COFF
-bn86-cof.s: asm/bn-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) bn-586.pl coff $(CFLAGS) > ../$@)
-co86-cof.s: asm/co-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) co-586.pl coff $(CFLAGS) > ../$@)
-mo86-cof.s: asm/mo-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) mo-586.pl coff $(CFLAGS) > ../$@)
-# a.out
-bn86-out.s: asm/bn-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) bn-586.pl a.out $(CFLAGS) > ../$@)
-co86-out.s: asm/co-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) co-586.pl a.out $(CFLAGS) > ../$@)
-mo86-out.s: asm/mo-586.pl ../perlasm/x86asm.pl
- (cd asm; $(PERL) mo-586.pl a.out $(CFLAGS) > ../$@)
+bn-586.s: asm/bn-586.pl ../perlasm/x86asm.pl
+ $(PERL) asm/bn-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
+co-586.s: asm/co-586.pl ../perlasm/x86asm.pl
+ $(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
+x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl
+ $(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
sparcv8.o: asm/sparcv8.S
$(CC) $(CFLAGS) -c asm/sparcv8.S
-sparcv8plus.o: asm/sparcv8plus.S
- $(CC) $(CFLAGS) -c asm/sparcv8plus.S
+bn-sparcv9.o: asm/sparcv8plus.S
+ $(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S
+sparcv9a-mont.s: asm/sparcv9a-mont.pl
+ $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
+sparcv9-mont.s: asm/sparcv9-mont.pl
+ $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
bn-mips3.o: asm/mips3.s
@if [ "$(CC)" = "gcc" ]; then \
@@ -95,10 +82,13 @@ bn-mips3.o: asm/mips3.s
as -$$ABI -O -o $@ asm/mips3.s; \
else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi
+bn-s390x.o: asm/s390x.S
+ $(CC) $(CFLAGS) -c -o $@ asm/s390x.S
+
x86_64-gcc.o: asm/x86_64-gcc.c
$(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c
x86_64-mont.s: asm/x86_64-mont.pl
- $(PERL) asm/x86_64-mont.pl $@
+ $(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@
bn-ia64.s: asm/ia64.S
$(CC) $(CFLAGS) -E asm/ia64.S > $@
@@ -111,12 +101,14 @@ pa-risc2.o: asm/pa-risc2.s
/usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s
# ppc - AIX, Linux, MacOS X...
-linux_ppc32.s: asm/ppc.pl; $(PERL) $< $@
-linux_ppc64.s: asm/ppc.pl; $(PERL) $< $@
-aix_ppc32.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@
-aix_ppc64.s: asm/ppc.pl; $(PERL) asm/ppc.pl $@
-osx_ppc32.s: asm/ppc.pl; $(PERL) $< $@
-osx_ppc64.s: asm/ppc.pl; $(PERL) $< $@
+bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@
+ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
+
+alpha-mont.s: asm/alpha-mont.pl
+ $(PERL) $< | $(CC) -E - | tee $@ > /dev/null
+
+# GNU make "catch all"
+%-mont.s: asm/%-mont.pl; $(PERL) $< $(CFLAGS) > $@
files:
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
@@ -184,8 +176,11 @@ bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_blind.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_blind.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_blind.c bn_lcl.h
-bn_const.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
-bn_const.o: ../../include/openssl/ossl_typ.h bn.h bn_const.c
+bn_const.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
+bn_const.o: ../../include/openssl/opensslconf.h
+bn_const.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+bn_const.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+bn_const.o: ../../include/openssl/symhacks.h bn.h bn_const.c
bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
@@ -292,13 +287,6 @@ bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c
-bn_opt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
-bn_opt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
-bn_opt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
-bn_opt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
-bn_opt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-bn_opt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-bn_opt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_opt.c
bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
@@ -357,6 +345,3 @@ bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c
-bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h
-bn_x931p.o: ../../include/openssl/opensslconf.h
-bn_x931p.o: ../../include/openssl/ossl_typ.h bn_x931p.c
diff --git a/openssl/crypto/bn/asm/armv4-mont.pl b/openssl/crypto/bn/asm/armv4-mont.pl
index 05d5dc1a4..14e0d2d1d 100644
--- a/openssl/crypto/bn/asm/armv4-mont.pl
+++ b/openssl/crypto/bn/asm/armv4-mont.pl
@@ -193,6 +193,7 @@ bn_mul_mont:
bx lr @ interoperable with Thumb ISA:-)
.size bn_mul_mont,.-bn_mul_mont
.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
___
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
diff --git a/openssl/crypto/bn/asm/bn-586.pl b/openssl/crypto/bn/asm/bn-586.pl
index 26c2685a7..332ef3e91 100644
--- a/openssl/crypto/bn/asm/bn-586.pl
+++ b/openssl/crypto/bn/asm/bn-586.pl
@@ -1,6 +1,7 @@
#!/usr/local/bin/perl
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],$0);
@@ -24,38 +25,25 @@ sub bn_mul_add_words
{
local($name)=@_;
- &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+ &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
- &comment("");
- $Low="eax";
- $High="edx";
- $a="ebx";
- $w="ebp";
- $r="edi";
- $c="esi";
-
- &xor($c,$c); # clear carry
- &mov($r,&wparam(0)); #
-
- &mov("ecx",&wparam(2)); #
- &mov($a,&wparam(1)); #
-
- &and("ecx",0xfffffff8); # num / 8
- &mov($w,&wparam(3)); #
-
- &push("ecx"); # Up the stack for a tmp variable
-
- &jz(&label("maw_finish"));
+ $r="eax";
+ $a="edx";
+ $c="ecx";
if ($sse2) {
&picmeup("eax","OPENSSL_ia32cap_P");
&bt(&DWP(0,"eax"),26);
- &jnc(&label("maw_loop"));
+ &jnc(&label("maw_non_sse2"));
- &movd("mm0",$w); # mm0 = w
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &mov($c,&wparam(2));
+ &movd("mm0",&wparam(3)); # mm0 = w
&pxor("mm1","mm1"); # mm1 = carry_in
-
- &set_label("maw_sse2_loop",0);
+ &jmp(&label("maw_sse2_entry"));
+
+ &set_label("maw_sse2_unrolled",16);
&movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
&paddq("mm1","mm3"); # mm1 = carry_in + r[0]
&movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
@@ -112,42 +100,82 @@ sub bn_mul_add_words
&psrlq("mm1",32); # mm1 = carry6
&paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
&movd(&DWP(28,$r,"",0),"mm1");
- &add($r,32);
+ &lea($r,&DWP(32,$r));
&psrlq("mm1",32); # mm1 = carry_out
- &sub("ecx",8);
+ &sub($c,8);
+ &jz(&label("maw_sse2_exit"));
+ &set_label("maw_sse2_entry");
+ &test($c,0xfffffff8);
+ &jnz(&label("maw_sse2_unrolled"));
+
+ &set_label("maw_sse2_loop",4);
+ &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
+ &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
+ &pmuludq("mm2","mm0"); # a[i] *= w
+ &lea($a,&DWP(4,$a));
+ &paddq("mm1","mm3"); # carry += r[i]
+ &paddq("mm1","mm2"); # carry += a[i]*w
+ &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
+ &sub($c,1);
+ &psrlq("mm1",32); # carry = carry_high
+ &lea($r,&DWP(4,$r));
&jnz(&label("maw_sse2_loop"));
-
- &movd($c,"mm1"); # c = carry_out
+ &set_label("maw_sse2_exit");
+ &movd("eax","mm1"); # c = carry_out
&emms();
+ &ret();
- &jmp(&label("maw_finish"));
+ &set_label("maw_non_sse2",16);
}
- &set_label("maw_loop",0);
+ # function_begin prologue
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
+
+ &comment("");
+ $Low="eax";
+ $High="edx";
+ $a="ebx";
+ $w="ebp";
+ $r="edi";
+ $c="esi";
+
+ &xor($c,$c); # clear carry
+ &mov($r,&wparam(0)); #
+
+ &mov("ecx",&wparam(2)); #
+ &mov($a,&wparam(1)); #
+
+ &and("ecx",0xfffffff8); # num / 8
+ &mov($w,&wparam(3)); #
- &mov(&swtmp(0),"ecx"); #
+ &push("ecx"); # Up the stack for a tmp variable
+
+ &jz(&label("maw_finish"));
+
+ &set_label("maw_loop",16);
for ($i=0; $i<32; $i+=4)
{
&comment("Round $i");
- &mov("eax",&DWP($i,$a,"",0)); # *a
+ &mov("eax",&DWP($i,$a)); # *a
&mul($w); # *a * w
- &add("eax",$c); # L(t)+= *r
- &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
+ &add("eax",$c); # L(t)+= c
&adc("edx",0); # H(t)+=carry
- &add("eax",$c); # L(t)+=c
+ &add("eax",&DWP($i,$r)); # L(t)+= *r
&adc("edx",0); # H(t)+=carry
- &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
+ &mov(&DWP($i,$r),"eax"); # *r= L(t);
&mov($c,"edx"); # c= H(t);
}
&comment("");
- &mov("ecx",&swtmp(0)); #
- &add($a,32);
- &add($r,32);
&sub("ecx",8);
+ &lea($a,&DWP(32,$a));
+ &lea($r,&DWP(32,$r));
&jnz(&label("maw_loop"));
&set_label("maw_finish",0);
@@ -160,16 +188,15 @@ sub bn_mul_add_words
for ($i=0; $i<7; $i++)
{
&comment("Tail Round $i");
- &mov("eax",&DWP($i*4,$a,"",0));# *a
+ &mov("eax",&DWP($i*4,$a)); # *a
&mul($w); # *a * w
&add("eax",$c); # L(t)+=c
- &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
&adc("edx",0); # H(t)+=carry
- &add("eax",$c);
+ &add("eax",&DWP($i*4,$r)); # L(t)+= *r
&adc("edx",0); # H(t)+=carry
&dec("ecx") if ($i != 7-1);
- &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
- &mov($c,"edx"); # c= H(t);
+ &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
+ &mov($c,"edx"); # c= H(t);
&jz(&label("maw_end")) if ($i != 7-1);
}
&set_label("maw_end",0);
@@ -184,7 +211,45 @@ sub bn_mul_words
{
local($name)=@_;
- &function_begin($name,"");
+ &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+ $r="eax";
+ $a="edx";
+ $c="ecx";
+
+ if ($sse2) {
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt(&DWP(0,"eax"),26);
+ &jnc(&label("mw_non_sse2"));
+
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &mov($c,&wparam(2));
+ &movd("mm0",&wparam(3)); # mm0 = w
+ &pxor("mm1","mm1"); # mm1 = carry = 0
+
+ &set_label("mw_sse2_loop",16);
+ &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
+ &pmuludq("mm2","mm0"); # a[i] *= w
+ &lea($a,&DWP(4,$a));
+ &paddq("mm1","mm2"); # carry += a[i]*w
+ &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
+ &sub($c,1);
+ &psrlq("mm1",32); # carry = carry_high
+ &lea($r,&DWP(4,$r));
+ &jnz(&label("mw_sse2_loop"));
+
+ &movd("eax","mm1"); # return carry
+ &emms();
+ &ret();
+ &set_label("mw_non_sse2",16);
+ }
+
+ # function_begin prologue
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
&comment("");
$Low="eax";
@@ -257,7 +322,40 @@ sub bn_sqr_words
{
local($name)=@_;
- &function_begin($name,"");
+ &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+ $r="eax";
+ $a="edx";
+ $c="ecx";
+
+ if ($sse2) {
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt(&DWP(0,"eax"),26);
+ &jnc(&label("sqr_non_sse2"));
+
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &mov($c,&wparam(2));
+
+ &set_label("sqr_sse2_loop",16);
+ &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
+ &pmuludq("mm0","mm0"); # a[i] *= a[i]
+ &lea($a,&DWP(4,$a)); # a++
+ &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
+ &sub($c,1);
+ &lea($r,&DWP(8,$r)); # r += 2
+ &jnz(&label("sqr_sse2_loop"));
+
+ &emms();
+ &ret();
+ &set_label("sqr_non_sse2",16);
+ }
+
+ # function_begin prologue
+ &push("ebp");
+ &push("ebx");
+ &push("esi");
+ &push("edi");
&comment("");
$r="esi";
@@ -313,12 +411,13 @@ sub bn_div_words
{
local($name)=@_;
- &function_begin($name,"");
+ &function_begin_B($name,"");
&mov("edx",&wparam(0)); #
&mov("eax",&wparam(1)); #
- &mov("ebx",&wparam(2)); #
- &div("ebx");
- &function_end($name);
+ &mov("ecx",&wparam(2)); #
+ &div("ecx");
+ &ret();
+ &function_end_B($name);
}
sub bn_add_words
diff --git a/openssl/crypto/bn/asm/co-586.pl b/openssl/crypto/bn/asm/co-586.pl
index 5d962cb95..57101a6bd 100644
--- a/openssl/crypto/bn/asm/co-586.pl
+++ b/openssl/crypto/bn/asm/co-586.pl
@@ -1,6 +1,7 @@
#!/usr/local/bin/perl
-push(@INC,"perlasm","../../perlasm");
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],$0);
diff --git a/openssl/crypto/bn/asm/mo-586.pl b/openssl/crypto/bn/asm/mo-586.pl
deleted file mode 100644
index 098229309..000000000
--- a/openssl/crypto/bn/asm/mo-586.pl
+++ /dev/null
@@ -1,603 +0,0 @@
-#!/usr/bin/env perl
-
-# This is crypto/bn/asm/x86-mont.pl (with asciz from crypto/perlasm/x86asm.pl)
-# from OpenSSL 0.9.9-dev
-
-sub ::asciz
-{ my @str=unpack("C*",shift);
- push @str,0;
- while ($#str>15) {
- &data_byte(@str[0..15]);
- foreach (0..15) { shift @str; }
- }
- &data_byte(@str) if (@str);
-}
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# October 2005
-#
-# This is a "teaser" code, as it can be improved in several ways...
-# First of all non-SSE2 path should be implemented (yes, for now it
-# performs Montgomery multiplication/convolution only on SSE2-capable
-# CPUs such as P4, others fall down to original code). Then inner loop
-# can be unrolled and modulo-scheduled to improve ILP and possibly
-# moved to 128-bit XMM register bank (though it would require input
-# rearrangement and/or increase bus bandwidth utilization). Dedicated
-# squaring procedure should give further performance improvement...
-# Yet, for being draft, the code improves rsa512 *sign* benchmark by
-# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
-
-# December 2006
-#
-# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
-# Integer-only code [being equipped with dedicated squaring procedure]
-# gives ~40% on rsa512 sign benchmark...
-
-push(@INC,"perlasm","../../perlasm");
-require "x86asm.pl";
-
-&asm_init($ARGV[0],$0);
-
-$sse2=0;
-for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
-
-&external_label("OPENSSL_ia32cap_P") if ($sse2);
-
-&function_begin("bn_mul_mont");
-
-$i="edx";
-$j="ecx";
-$ap="esi"; $tp="esi"; # overlapping variables!!!
-$rp="edi"; $bp="edi"; # overlapping variables!!!
-$np="ebp";
-$num="ebx";
-
-$_num=&DWP(4*0,"esp"); # stack top layout
-$_rp=&DWP(4*1,"esp");
-$_ap=&DWP(4*2,"esp");
-$_bp=&DWP(4*3,"esp");
-$_np=&DWP(4*4,"esp");
-$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
-$_sp=&DWP(4*6,"esp");
-$_bpend=&DWP(4*7,"esp");
-$frame=32; # size of above frame rounded up to 16n
-
- &xor ("eax","eax");
- &mov ("edi",&wparam(5)); # int num
- &cmp ("edi",4);
- &jl (&label("just_leave"));
-
- &lea ("esi",&wparam(0)); # put aside pointer to argument block
- &lea ("edx",&wparam(1)); # load ap
- &mov ("ebp","esp"); # saved stack pointer!
- &add ("edi",2); # extra two words on top of tp
- &neg ("edi");
- &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
- &neg ("edi");
-
- # minimize cache contention by arraning 2K window between stack
- # pointer and ap argument [np is also position sensitive vector,
- # but it's assumed to be near ap, as it's allocated at ~same
- # time].
- &mov ("eax","esp");
- &sub ("eax","edx");
- &and ("eax",2047);
- &sub ("esp","eax"); # this aligns sp and ap modulo 2048
-
- &xor ("edx","esp");
- &and ("edx",2048);
- &xor ("edx",2048);
- &sub ("esp","edx"); # this splits them apart modulo 4096
-
- &and ("esp",-64); # align to cache line
-
- ################################# load argument block...
- &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
- &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
- &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
- &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
- &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
- #&mov ("edi",&DWP(5*4,"esi"));# int num
-
- &mov ("esi",&DWP(0,"esi")); # pull n0[0]
- &mov ($_rp,"eax"); # ... save a copy of argument block
- &mov ($_ap,"ebx");
- &mov ($_bp,"ecx");
- &mov ($_np,"edx");
- &mov ($_n0,"esi");
- &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
- #&mov ($_num,$num); # redundant as $num is not reused
- &mov ($_sp,"ebp"); # saved stack pointer!
-
-if($sse2) {
-$acc0="mm0"; # mmx register bank layout
-$acc1="mm1";
-$car0="mm2";
-$car1="mm3";
-$mul0="mm4";
-$mul1="mm5";
-$temp="mm6";
-$mask="mm7";
-
- &picmeup("eax","OPENSSL_ia32cap_P");
- &bt (&DWP(0,"eax"),26);
- &jnc (&label("non_sse2"));
-
- &mov ("eax",-1);
- &movd ($mask,"eax"); # mask 32 lower bits
-
- &mov ($ap,$_ap); # load input pointers
- &mov ($bp,$_bp);
- &mov ($np,$_np);
-
- &xor ($i,$i); # i=0
- &xor ($j,$j); # j=0
-
- &movd ($mul0,&DWP(0,$bp)); # bp[0]
- &movd ($mul1,&DWP(0,$ap)); # ap[0]
- &movd ($car1,&DWP(0,$np)); # np[0]
-
- &pmuludq($mul1,$mul0); # ap[0]*bp[0]
- &movq ($car0,$mul1);
- &movq ($acc0,$mul1); # I wish movd worked for
- &pand ($acc0,$mask); # inter-register transfers
-
- &pmuludq($mul1,$_n0q); # *=n0
-
- &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
- &paddq ($car1,$acc0);
-
- &movd ($acc1,&DWP(4,$np)); # np[1]
- &movd ($acc0,&DWP(4,$ap)); # ap[1]
-
- &psrlq ($car0,32);
- &psrlq ($car1,32);
-
- &inc ($j); # j++
-&set_label("1st",16);
- &pmuludq($acc0,$mul0); # ap[j]*bp[0]
- &pmuludq($acc1,$mul1); # np[j]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &pand ($acc0,$mask);
- &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
- &paddq ($car1,$acc0); # +=ap[j]*bp[0];
- &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
- &psrlq ($car0,32);
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
- &psrlq ($car1,32);
-
- &lea ($j,&DWP(1,$j));
- &cmp ($j,$num);
- &jl (&label("1st"));
-
- &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
- &pmuludq($acc1,$mul1); # np[num-1]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &pand ($acc0,$mask);
- &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
-
- &psrlq ($car0,32);
- &psrlq ($car1,32);
-
- &paddq ($car1,$car0);
- &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
-
- &inc ($i); # i++
-&set_label("outer");
- &xor ($j,$j); # j=0
-
- &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
- &movd ($mul1,&DWP(0,$ap)); # ap[0]
- &movd ($temp,&DWP($frame,"esp")); # tp[0]
- &movd ($car1,&DWP(0,$np)); # np[0]
- &pmuludq($mul1,$mul0); # ap[0]*bp[i]
-
- &paddq ($mul1,$temp); # +=tp[0]
- &movq ($acc0,$mul1);
- &movq ($car0,$mul1);
- &pand ($acc0,$mask);
-
- &pmuludq($mul1,$_n0q); # *=n0
-
- &pmuludq($car1,$mul1);
- &paddq ($car1,$acc0);
-
- &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
- &movd ($acc1,&DWP(4,$np)); # np[1]
- &movd ($acc0,&DWP(4,$ap)); # ap[1]
-
- &psrlq ($car0,32);
- &psrlq ($car1,32);
- &paddq ($car0,$temp); # +=tp[1]
-
- &inc ($j); # j++
- &dec ($num);
-&set_label("inner");
- &pmuludq($acc0,$mul0); # ap[j]*bp[i]
- &pmuludq($acc1,$mul1); # np[j]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
- &pand ($acc0,$mask);
- &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
- &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
- &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
- &psrlq ($car0,32);
- &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
- &psrlq ($car1,32);
- &paddq ($car0,$temp); # +=tp[j+1]
-
- &dec ($num);
- &lea ($j,&DWP(1,$j)); # j++
- &jnz (&label("inner"));
-
- &mov ($num,$j);
- &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
- &pmuludq($acc1,$mul1); # np[num-1]*m1
- &paddq ($car0,$acc0); # +=c0
- &paddq ($car1,$acc1); # +=c1
-
- &movq ($acc0,$car0);
- &pand ($acc0,$mask);
- &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
- &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
- &psrlq ($car0,32);
- &psrlq ($car1,32);
-
- &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
- &paddq ($car1,$car0);
- &paddq ($car1,$temp);
- &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
-
- &lea ($i,&DWP(1,$i)); # i++
- &cmp ($i,$num);
- &jle (&label("outer"));
-
- &emms (); # done with mmx bank
- &jmp (&label("common_tail"));
-
-&set_label("non_sse2",16);
-}
-
-if (0) {
- &mov ("esp",$_sp);
- &xor ("eax","eax"); # signal "not fast enough [yet]"
- &jmp (&label("just_leave"));
- # While the below code provides competitive performance for
- # all key lengthes on modern Intel cores, it's still more
- # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
- # means compared to the original integer-only assembler.
- # 512-bit RSA sign is better by ~40%, but that's about all
- # one can say about all CPUs...
-} else {
-$inp="esi"; # integer path uses these registers differently
-$word="edi";
-$carry="ebp";
-
- &mov ($inp,$_ap);
- &lea ($carry,&DWP(1,$num));
- &mov ($word,$_bp);
- &xor ($j,$j); # j=0
- &mov ("edx",$inp);
- &and ($carry,1); # see if num is even
- &sub ("edx",$word); # see if ap==bp
- &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
- &or ($carry,"edx");
- &mov ($word,&DWP(0,$word)); # bp[0]
- &jz (&label("bn_sqr_mont"));
- &mov ($_bpend,"eax");
- &mov ("eax",&DWP(0,$inp));
- &xor ("edx","edx");
-
-&set_label("mull",16);
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*bp[0]
- &add ($carry,"eax");
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
- &cmp ($j,$num);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("mull"));
-
- &mov ($carry,"edx");
- &mul ($word); # ap[num-1]*bp[0]
- &mov ($word,$_n0);
- &add ("eax",$carry);
- &mov ($inp,$_np);
- &adc ("edx",0);
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
-
- &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
- &xor ($j,$j);
- &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
- &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
-
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &mov ("eax",&DWP(4,$inp)); # np[1]
- &adc ("edx",0);
- &inc ($j);
-
- &jmp (&label("2ndmadd"));
-
-&set_label("1stmadd",16);
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*bp[i]
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
- &adc ("edx",0);
- &cmp ($j,$num);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("1stmadd"));
-
- &mov ($carry,"edx");
- &mul ($word); # ap[num-1]*bp[i]
- &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
- &mov ($word,$_n0);
- &adc ("edx",0);
- &mov ($inp,$_np);
- &add ($carry,"eax");
- &adc ("edx",0);
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
-
- &xor ($j,$j);
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
- &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
- &adc ($j,0);
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
- &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
-
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &mov ("eax",&DWP(4,$inp)); # np[1]
- &adc ("edx",0);
- &mov ($j,1);
-
-&set_label("2ndmadd",16);
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
- &adc ("edx",0);
- &cmp ($j,$num);
- &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
- &jl (&label("2ndmadd"));
-
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
- &adc ("edx",0);
- &add ($carry,"eax");
- &adc ("edx",0);
- &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
-
- &xor ("eax","eax");
- &mov ($j,$_bp); # &bp[i]
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
- &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
- &lea ($j,&DWP(4,$j));
- &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
- &cmp ($j,$_bpend);
- &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
- &je (&label("common_tail"));
-
- &mov ($word,&DWP(0,$j)); # bp[i+1]
- &mov ($inp,$_ap);
- &mov ($_bp,$j); # &bp[++i]
- &xor ($j,$j);
- &xor ("edx","edx");
- &mov ("eax",&DWP(0,$inp));
- &jmp (&label("1stmadd"));
-
-&set_label("bn_sqr_mont",16);
-$sbit=$num;
- &mov ($_num,$num);
- &mov ($_bp,$j); # i=0
-
- &mov ("eax",$word); # ap[0]
- &mul ($word); # ap[0]*ap[0]
- &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
- &mov ($sbit,"edx");
- &shr ("edx",1);
- &and ($sbit,1);
- &inc ($j);
-&set_label("sqr",16);
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*ap[0]
- &add ("eax",$carry);
- &lea ($j,&DWP(1,$j));
- &adc ("edx",0);
- &lea ($carry,&DWP(0,$sbit,"eax",2));
- &shr ("eax",31);
- &cmp ($j,$_num);
- &mov ($sbit,"eax");
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("sqr"));
-
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
- &mov ($carry,"edx");
- &mul ($word); # ap[num-1]*ap[0]
- &add ("eax",$carry);
- &mov ($word,$_n0);
- &adc ("edx",0);
- &mov ($inp,$_np);
- &lea ($carry,&DWP(0,$sbit,"eax",2));
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
- &shr ("eax",31);
- &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
-
- &lea ($carry,&DWP(0,"eax","edx",2));
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &shr ("edx",31);
- &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
- &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
-
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &mov ($num,$j);
- &adc ("edx",0);
- &mov ("eax",&DWP(4,$inp)); # np[1]
- &mov ($j,1);
-
-&set_label("3rdmadd",16);
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
- &adc ("edx",0);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
-
- &mov ($carry,"edx");
- &mul ($word); # np[j+1]*m
- &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
- &lea ($j,&DWP(2,$j));
- &adc ("edx",0);
- &add ($carry,"eax");
- &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
- &adc ("edx",0);
- &cmp ($j,$num);
- &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
- &jl (&label("3rdmadd"));
-
- &mov ($carry,"edx");
- &mul ($word); # np[j]*m
- &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
- &adc ("edx",0);
- &add ($carry,"eax");
- &adc ("edx",0);
- &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
-
- &mov ($j,$_bp); # i
- &xor ("eax","eax");
- &mov ($inp,$_ap);
- &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
- &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
- &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
- &cmp ($j,$num);
- &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
- &je (&label("common_tail"));
-
- &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
- &lea ($j,&DWP(1,$j));
- &mov ("eax",$word);
- &mov ($_bp,$j); # ++i
- &mul ($word); # ap[i]*ap[i]
- &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
- &adc ("edx",0);
- &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
- &xor ($carry,$carry);
- &cmp ($j,$num);
- &lea ($j,&DWP(1,$j));
- &je (&label("sqrlast"));
-
- &mov ($sbit,"edx"); # zaps $num
- &shr ("edx",1);
- &and ($sbit,1);
-&set_label("sqradd",16);
- &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
- &mov ($carry,"edx");
- &mul ($word); # ap[j]*ap[i]
- &add ("eax",$carry);
- &lea ($carry,&DWP(0,"eax","eax"));
- &adc ("edx",0);
- &shr ("eax",31);
- &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
- &lea ($j,&DWP(1,$j));
- &adc ("eax",0);
- &add ($carry,$sbit);
- &adc ("eax",0);
- &cmp ($j,$_num);
- &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
- &mov ($sbit,"eax");
- &jle (&label("sqradd"));
-
- &mov ($carry,"edx");
- &lea ("edx",&DWP(0,$sbit,"edx",2));
- &shr ($carry,31);
-&set_label("sqrlast");
- &mov ($word,$_n0);
- &mov ($inp,$_np);
- &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
-
- &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
- &mov ("eax",&DWP(0,$inp)); # np[0]
- &adc ($carry,0);
- &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
- &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
-
- &mul ($word); # np[0]*m
- &add ("eax",&DWP($frame,"esp")); # +=tp[0]
- &lea ($num,&DWP(-1,$j));
- &adc ("edx",0);
- &mov ($j,1);
- &mov ("eax",&DWP(4,$inp)); # np[1]
-
- &jmp (&label("3rdmadd"));
-}
-
-&set_label("common_tail",16);
- &mov ($np,$_np); # load modulus pointer
- &mov ($rp,$_rp); # load result pointer
- &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
-
- &mov ("eax",&DWP(0,$tp)); # tp[0]
- &mov ($j,$num); # j=num-1
- &xor ($i,$i); # i=0 and clear CF!
-
-&set_label("sub",16);
- &sbb ("eax",&DWP(0,$np,$i,4));
- &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
- &dec ($j); # doesn't affect CF!
- &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
- &lea ($i,&DWP(1,$i)); # i++
- &jge (&label("sub"));
-
- &sbb ("eax",0); # handle upmost overflow bit
- &and ($tp,"eax");
- &not ("eax");
- &mov ($np,$rp);
- &and ($np,"eax");
- &or ($tp,$np); # tp=carry?tp:rp
-
-&set_label("copy",16); # copy or in-place refresh
- &mov ("eax",&DWP(0,$tp,$num,4));
- &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
- &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
- &dec ($num);
- &jge (&label("copy"));
-
- &mov ("esp",$_sp); # pull saved stack pointer
- &mov ("eax",1);
-&set_label("just_leave");
-&function_end("bn_mul_mont");
-
-&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
-&asm_finish();
diff --git a/openssl/crypto/bn/asm/ppc.pl b/openssl/crypto/bn/asm/ppc.pl
index 08e005347..37c65d351 100644
--- a/openssl/crypto/bn/asm/ppc.pl
+++ b/openssl/crypto/bn/asm/ppc.pl
@@ -100,9 +100,9 @@
# me a note at schari@us.ibm.com
#
-$opf = shift;
+$flavour = shift;
-if ($opf =~ /32\.s/) {
+if ($flavour =~ /32/) {
$BITS= 32;
$BNSZ= $BITS/8;
$ISA= "\"ppc\"";
@@ -125,7 +125,7 @@ if ($opf =~ /32\.s/) {
$INSR= "insrwi"; # insert right
$ROTL= "rotlwi"; # rotate left by immediate
$TR= "tw"; # conditional trap
-} elsif ($opf =~ /64\.s/) {
+} elsif ($flavour =~ /64/) {
$BITS= 64;
$BNSZ= $BITS/8;
$ISA= "\"ppc64\"";
@@ -149,93 +149,16 @@ if ($opf =~ /32\.s/) {
$INSR= "insrdi"; # insert right
$ROTL= "rotldi"; # rotate left by immediate
$TR= "td"; # conditional trap
-} else { die "nonsense $opf"; }
+} else { die "nonsense $flavour"; }
-( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!";
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
-# function entry points from the AIX code
-#
-# There are other, more elegant, ways to handle this. We (IBM) chose
-# this approach as it plays well with scripts we run to 'namespace'
-# OpenSSL .i.e. we add a prefix to all the public symbols so we can
-# co-exist in the same process with other implementations of OpenSSL.
-# 'cleverer' ways of doing these substitutions tend to hide data we
-# need to be obvious.
-#
-my @items = ("bn_sqr_comba4",
- "bn_sqr_comba8",
- "bn_mul_comba4",
- "bn_mul_comba8",
- "bn_sub_words",
- "bn_add_words",
- "bn_div_words",
- "bn_sqr_words",
- "bn_mul_words",
- "bn_mul_add_words");
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-if ($opf =~ /linux/) { do_linux(); }
-elsif ($opf =~ /aix/) { do_aix(); }
-elsif ($opf =~ /osx/) { do_osx(); }
-else { do_bsd(); }
-
-sub do_linux {
- $d=&data();
-
- if ($BITS==64) {
- foreach $t (@items) {
- $d =~ s/\.$t:/\
-\t.section\t".opd","aw"\
-\t.align\t3\
-\t.globl\t$t\
-$t:\
-\t.quad\t.$t,.TOC.\@tocbase,0\
-\t.size\t$t,24\
-\t.previous\n\
-\t.type\t.$t,\@function\
-\t.globl\t.$t\
-.$t:/g;
- }
- }
- else {
- foreach $t (@items) {
- $d=~s/\.$t/$t/g;
- }
- }
- # hide internal labels to avoid pollution of name table...
- $d=~s/Lppcasm_/.Lppcasm_/gm;
- print $d;
-}
-
-sub do_aix {
- # AIX assembler is smart enough to please the linker without
- # making us do something special...
- print &data();
-}
-
-# MacOSX 32 bit
-sub do_osx {
- $d=&data();
- # Change the bn symbol prefix from '.' to '_'
- foreach $t (@items) {
- $d=~s/\.$t/_$t/g;
- }
- # Change .machine to something OS X asm will accept
- $d=~s/\.machine.*/.text/g;
- $d=~s/\#/;/g; # change comment from '#' to ';'
- print $d;
-}
-
-# BSD (Untested)
-sub do_bsd {
- $d=&data();
- foreach $t (@items) {
- $d=~s/\.$t/_$t/g;
- }
- print $d;
-}
-
-sub data {
- local($data)=<<EOF;
+$data=<<EOF;
#--------------------------------------------------------------------
#
#
@@ -297,33 +220,20 @@ sub data {
#
# Defines to be used in the assembly code.
#
-.set r0,0 # we use it as storage for value of 0
-.set SP,1 # preserved
-.set RTOC,2 # preserved
-.set r3,3 # 1st argument/return value
-.set r4,4 # 2nd argument/volatile register
-.set r5,5 # 3rd argument/volatile register
-.set r6,6 # ...
-.set r7,7
-.set r8,8
-.set r9,9
-.set r10,10
-.set r11,11
-.set r12,12
-.set r13,13 # not used, nor any other "below" it...
-
-.set BO_IF_NOT,4
-.set BO_IF,12
-.set BO_dCTR_NZERO,16
-.set BO_dCTR_ZERO,18
-.set BO_ALWAYS,20
-.set CR0_LT,0;
-.set CR0_GT,1;
-.set CR0_EQ,2
-.set CR1_FX,4;
-.set CR1_FEX,5;
-.set CR1_VX,6
-.set LR,8
+#.set r0,0 # we use it as storage for value of 0
+#.set SP,1 # preserved
+#.set RTOC,2 # preserved
+#.set r3,3 # 1st argument/return value
+#.set r4,4 # 2nd argument/volatile register
+#.set r5,5 # 3rd argument/volatile register
+#.set r6,6 # ...
+#.set r7,7
+#.set r8,8
+#.set r9,9
+#.set r10,10
+#.set r11,11
+#.set r12,12
+#.set r13,13 # not used, nor any other "below" it...
# Declare function names to be global
# NOTE: For gcc these names MUST be changed to remove
@@ -344,7 +254,7 @@ sub data {
# .text section
- .machine $ISA
+ .machine "any"
#
# NOTE: The following label name should be changed to
@@ -478,7 +388,7 @@ sub data {
$ST r9,`6*$BNSZ`(r3) #r[6]=c1
$ST r10,`7*$BNSZ`(r3) #r[7]=c2
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
#
@@ -903,7 +813,7 @@ sub data {
$ST r9, `15*$BNSZ`(r3) #r[15]=c1;
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
@@ -1055,7 +965,7 @@ sub data {
$ST r10,`6*$BNSZ`(r3) #r[6]=c1
$ST r11,`7*$BNSZ`(r3) #r[7]=c2
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
#
@@ -1591,7 +1501,7 @@ sub data {
adde r10,r10,r9
$ST r12,`14*$BNSZ`(r3) #r[14]=c3;
$ST r10,`15*$BNSZ`(r3) #r[15]=c1;
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
#
@@ -1623,7 +1533,7 @@ sub data {
subfc. r7,r0,r6 # If r6 is 0 then result is 0.
# if r6 > 0 then result !=0
# In either case carry bit is set.
- bc BO_IF,CR0_EQ,Lppcasm_sub_adios
+ beq Lppcasm_sub_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
@@ -1635,11 +1545,11 @@ Lppcasm_sub_mainloop:
# if carry = 1 this is r7-r8. Else it
# is r7-r8 -1 as we need.
$STU r6,$BNSZ(r3)
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop
+ bdnz- Lppcasm_sub_mainloop
Lppcasm_sub_adios:
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
andi. r3,r3,1 # keep only last bit.
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
@@ -1670,7 +1580,7 @@ Lppcasm_sub_adios:
# check for r6 = 0. Is this needed?
#
addic. r6,r6,0 #test r6 and clear carry bit.
- bc BO_IF,CR0_EQ,Lppcasm_add_adios
+ beq Lppcasm_add_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
addi r5,r5,-$BNSZ
@@ -1680,10 +1590,10 @@ Lppcasm_add_mainloop:
$LDU r8,$BNSZ(r5)
adde r8,r7,r8
$STU r8,$BNSZ(r3)
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop
+ bdnz- Lppcasm_add_mainloop
Lppcasm_add_adios:
addze r3,r0 #return carry bit.
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
#
@@ -1707,24 +1617,24 @@ Lppcasm_add_adios:
# r5 = d
$UCMPI 0,r5,0 # compare r5 and 0
- bc BO_IF_NOT,CR0_EQ,Lppcasm_div1 # proceed if d!=0
+ bne Lppcasm_div1 # proceed if d!=0
li r3,-1 # d=0 return -1
- bclr BO_ALWAYS,CR0_LT
+ blr
Lppcasm_div1:
xor r0,r0,r0 #r0=0
li r8,$BITS
$CNTLZ. r7,r5 #r7 = num leading 0s in d.
- bc BO_IF,CR0_EQ,Lppcasm_div2 #proceed if no leading zeros
+ beq Lppcasm_div2 #proceed if no leading zeros
subf r8,r7,r8 #r8 = BN_num_bits_word(d)
$SHR. r9,r3,r8 #are there any bits above r8'th?
$TR 16,r9,r0 #if there're, signal to dump core...
Lppcasm_div2:
$UCMP 0,r3,r5 #h>=d?
- bc BO_IF,CR0_LT,Lppcasm_div3 #goto Lppcasm_div3 if not
+ blt Lppcasm_div3 #goto Lppcasm_div3 if not
subf r3,r5,r3 #h-=d ;
Lppcasm_div3: #r7 = BN_BITS2-i. so r7=i
cmpi 0,0,r7,0 # is (i == 0)?
- bc BO_IF,CR0_EQ,Lppcasm_div4
+ beq Lppcasm_div4
$SHL r3,r3,r7 # h = (h<< i)
$SHR r8,r4,r8 # r8 = (l >> BN_BITS2 -i)
$SHL r5,r5,r7 # d<<=i
@@ -1741,7 +1651,7 @@ Lppcasm_divouterloop:
$SHRI r11,r4,`$BITS/2` #r11= (l&BN_MASK2h)>>BN_BITS4
# compute here for innerloop.
$UCMP 0,r8,r9 # is (h>>BN_BITS4)==dh
- bc BO_IF_NOT,CR0_EQ,Lppcasm_div5 # goto Lppcasm_div5 if not
+ bne Lppcasm_div5 # goto Lppcasm_div5 if not
li r8,-1
$CLRU r8,r8,`$BITS/2` #q = BN_MASK2l
@@ -1762,9 +1672,9 @@ Lppcasm_divinnerloop:
# the following 2 instructions do that
$SHLI r7,r10,`$BITS/2` # r7 = (t<<BN_BITS4)
or r7,r7,r11 # r7|=((l&BN_MASK2h)>>BN_BITS4)
- $UCMP 1,r6,r7 # compare (tl <= r7)
- bc BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit
- bc BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit
+ $UCMP cr1,r6,r7 # compare (tl <= r7)
+ bne Lppcasm_divinnerexit
+ ble cr1,Lppcasm_divinnerexit
addi r8,r8,-1 #q--
subf r12,r9,r12 #th -=dh
$CLRU r10,r5,`$BITS/2` #r10=dl. t is no longer needed in loop.
@@ -1773,14 +1683,14 @@ Lppcasm_divinnerloop:
Lppcasm_divinnerexit:
$SHRI r10,r6,`$BITS/2` #t=(tl>>BN_BITS4)
$SHLI r11,r6,`$BITS/2` #tl=(tl<<BN_BITS4)&BN_MASK2h;
- $UCMP 1,r4,r11 # compare l and tl
+ $UCMP cr1,r4,r11 # compare l and tl
add r12,r12,r10 # th+=t
- bc BO_IF_NOT,CR1_FX,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
+ bge cr1,Lppcasm_div7 # if (l>=tl) goto Lppcasm_div7
addi r12,r12,1 # th++
Lppcasm_div7:
subf r11,r11,r4 #r11=l-tl
- $UCMP 1,r3,r12 #compare h and th
- bc BO_IF_NOT,CR1_FX,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
+ $UCMP cr1,r3,r12 #compare h and th
+ bge cr1,Lppcasm_div8 #if (h>=th) goto Lppcasm_div8
addi r8,r8,-1 # q--
add r3,r5,r3 # h+=d
Lppcasm_div8:
@@ -1791,12 +1701,12 @@ Lppcasm_div8:
# the following 2 instructions will do this.
$INSR r11,r12,`$BITS/2`,`$BITS/2` # r11 is the value we want rotated $BITS/2.
$ROTL r3,r11,`$BITS/2` # rotate by $BITS/2 and store in r3
- bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ;
+ bdz Lppcasm_div9 #if (count==0) break ;
$SHLI r0,r8,`$BITS/2` #ret =q<<BN_BITS4
b Lppcasm_divouterloop
Lppcasm_div9:
or r3,r8,r0
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
#
@@ -1822,7 +1732,7 @@ Lppcasm_div9:
# No unrolling done here. Not performance critical.
addic. r5,r5,0 #test r5.
- bc BO_IF,CR0_EQ,Lppcasm_sqr_adios
+ beq Lppcasm_sqr_adios
addi r4,r4,-$BNSZ
addi r3,r3,-$BNSZ
mtctr r5
@@ -1833,9 +1743,9 @@ Lppcasm_sqr_mainloop:
$UMULH r8,r6,r6
$STU r7,$BNSZ(r3)
$STU r8,$BNSZ(r3)
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop
+ bdnz- Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
@@ -1858,7 +1768,7 @@ Lppcasm_sqr_adios:
xor r0,r0,r0
xor r12,r12,r12 # used for carry
rlwinm. r7,r5,30,2,31 # num >> 2
- bc BO_IF,CR0_EQ,Lppcasm_mw_REM
+ beq Lppcasm_mw_REM
mtctr r7
Lppcasm_mw_LOOP:
#mul(rp[0],ap[0],w,c1);
@@ -1896,11 +1806,11 @@ Lppcasm_mw_LOOP:
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP
+ bdnz- Lppcasm_mw_LOOP
Lppcasm_mw_REM:
andi. r5,r5,0x3
- bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
+ beq Lppcasm_mw_OVER
#mul(rp[0],ap[0],w,c1);
$LD r8,`0*$BNSZ`(r4)
$UMULL r9,r6,r8
@@ -1912,7 +1822,7 @@ Lppcasm_mw_REM:
addi r5,r5,-1
cmpli 0,0,r5,0
- bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
+ beq Lppcasm_mw_OVER
#mul(rp[1],ap[1],w,c1);
@@ -1926,7 +1836,7 @@ Lppcasm_mw_REM:
addi r5,r5,-1
cmpli 0,0,r5,0
- bc BO_IF,CR0_EQ,Lppcasm_mw_OVER
+ beq Lppcasm_mw_OVER
#mul_add(rp[2],ap[2],w,c1);
$LD r8,`2*$BNSZ`(r4)
@@ -1939,7 +1849,7 @@ Lppcasm_mw_REM:
Lppcasm_mw_OVER:
addi r3,r12,0
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
#
@@ -1964,7 +1874,7 @@ Lppcasm_mw_OVER:
xor r0,r0,r0 #r0 = 0
xor r12,r12,r12 #r12 = 0 . used for carry
rlwinm. r7,r5,30,2,31 # num >> 2
- bc BO_IF,CR0_EQ,Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
+ beq Lppcasm_maw_leftover # if (num < 4) go LPPCASM_maw_leftover
mtctr r7
Lppcasm_maw_mainloop:
#mul_add(rp[0],ap[0],w,c1);
@@ -2017,11 +1927,11 @@ Lppcasm_maw_mainloop:
$ST r11,`3*$BNSZ`(r3)
addi r3,r3,`4*$BNSZ`
addi r4,r4,`4*$BNSZ`
- bc BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop
+ bdnz- Lppcasm_maw_mainloop
Lppcasm_maw_leftover:
andi. r5,r5,0x3
- bc BO_IF,CR0_EQ,Lppcasm_maw_adios
+ beq Lppcasm_maw_adios
addi r3,r3,-$BNSZ
addi r4,r4,-$BNSZ
#mul_add(rp[0],ap[0],w,c1);
@@ -2036,7 +1946,7 @@ Lppcasm_maw_leftover:
addze r12,r10
$ST r9,0(r3)
- bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
+ bdz Lppcasm_maw_adios
#mul_add(rp[1],ap[1],w,c1);
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
@@ -2048,7 +1958,7 @@ Lppcasm_maw_leftover:
addze r12,r10
$ST r9,0(r3)
- bc BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios
+ bdz Lppcasm_maw_adios
#mul_add(rp[2],ap[2],w,c1);
$LDU r8,$BNSZ(r4)
$UMULL r9,r6,r8
@@ -2062,17 +1972,10 @@ Lppcasm_maw_leftover:
Lppcasm_maw_adios:
addi r3,r12,0
- bclr BO_ALWAYS,CR0_LT
+ blr
.long 0x00000000
.align 4
EOF
- $data =~ s/\`([^\`]*)\`/eval $1/gem;
-
- # if some assembler chokes on some simplified mnemonic,
- # this is the spot to fix it up, e.g.:
- # GNU as doesn't seem to accept cmplw, 32-bit unsigned compare
- $data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm;
- # assembler X doesn't accept li, load immediate value
- #$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm;
- return($data);
-}
+$data =~ s/\`([^\`]*)\`/eval $1/gem;
+print $data;
+close STDOUT;
diff --git a/openssl/crypto/bn/asm/sparcv8plus.S b/openssl/crypto/bn/asm/sparcv8plus.S
index 8c56e2e7e..63de1860f 100644
--- a/openssl/crypto/bn/asm/sparcv8plus.S
+++ b/openssl/crypto/bn/asm/sparcv8plus.S
@@ -144,6 +144,19 @@
* }
*/
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+ /* They've said -xarch=v9 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME_SIZE -192
+#elif defined(__GNUC__) && defined(__arch64__)
+ /* They've said -m64 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME_SIZE -192
+#else
+# define FRAME_SIZE -96
+#endif
/*
* GNU assembler can't stand stuw:-(
*/
@@ -619,8 +632,6 @@ bn_sub_words:
* Andy.
*/
-#define FRAME_SIZE -96
-
/*
* Here is register usage map for *all* routines below.
*/
diff --git a/openssl/crypto/bn/asm/x86_64-gcc.c b/openssl/crypto/bn/asm/x86_64-gcc.c
index f13f52dd8..acb0b4011 100644
--- a/openssl/crypto/bn/asm/x86_64-gcc.c
+++ b/openssl/crypto/bn/asm/x86_64-gcc.c
@@ -1,4 +1,5 @@
-#ifdef __SUNPRO_C
+#include "../bn_lcl.h"
+#if !(defined(__GNUC__) && __GNUC__>=2)
# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
#else
/*
@@ -54,7 +55,15 @@
* machine.
*/
+#ifdef _WIN64
+#define BN_ULONG unsigned long long
+#else
#define BN_ULONG unsigned long
+#endif
+
+#undef mul
+#undef mul_add
+#undef sqr
/*
* "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
@@ -97,7 +106,7 @@
: "a"(a) \
: "cc");
-BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
{
BN_ULONG c1=0;
@@ -121,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
return(c1);
}
-BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
{
BN_ULONG c1=0;
@@ -144,7 +153,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)
return(c1);
}
-void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n)
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
{
if (n <= 0) return;
@@ -175,14 +184,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
return ret;
}
-BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
+BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
{ BN_ULONG ret=0,i=0;
if (n <= 0) return 0;
asm (
" subq %2,%2 \n"
- ".align 16 \n"
+ ".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
@@ -198,14 +207,14 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
}
#ifndef SIMICS
-BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)
+BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
{ BN_ULONG ret=0,i=0;
if (n <= 0) return 0;
asm (
" subq %2,%2 \n"
- ".align 16 \n"
+ ".p2align 4 \n"
"1: movq (%4,%2,8),%0 \n"
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
@@ -485,7 +494,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
r[7]=c2;
}
-void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
@@ -561,7 +570,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
r[15]=c1;
}
-void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG t1,t2;
BN_ULONG c1,c2,c3;
diff --git a/openssl/crypto/bn/asm/x86_64-mont.pl b/openssl/crypto/bn/asm/x86_64-mont.pl
index c43b69592..3b7a6f243 100644
--- a/openssl/crypto/bn/asm/x86_64-mont.pl
+++ b/openssl/crypto/bn/asm/x86_64-mont.pl
@@ -15,14 +15,18 @@
# respectful 50%. It remains to be seen if loop unrolling and
# dedicated squaring routine can provide further improvement...
-$output=shift;
+$flavour = shift;
+$output = shift;
+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
+
+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open STDOUT,"| $^X $xlate $output";
+open STDOUT,"| $^X $xlate $flavour $output";
# int bn_mul_mont(
$rp="%rdi"; # BN_ULONG *rp,
@@ -55,13 +59,14 @@ bn_mul_mont:
push %r15
mov ${num}d,${num}d
- lea 2($num),%rax
- mov %rsp,%rbp
- neg %rax
- lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2))
+ lea 2($num),%r10
+ mov %rsp,%r11
+ neg %r10
+ lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2))
and \$-1024,%rsp # minimize TLB usage
- mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
+ mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
+.Lprologue:
mov %rdx,$bp # $bp reassigned, remember?
mov ($n0),$n0 # pull n0[0] value
@@ -197,18 +202,129 @@ bn_mul_mont:
dec $j
jge .Lcopy
- mov 8(%rsp,$num,8),%rsp # restore %rsp
+ mov 8(%rsp,$num,8),%rsi # restore %rsp
mov \$1,%rax
+ mov (%rsi),%r15
+ mov 8(%rsi),%r14
+ mov 16(%rsi),%r13
+ mov 24(%rsi),%r12
+ mov 32(%rsi),%rbp
+ mov 40(%rsi),%rbx
+ lea 48(%rsi),%rsp
+.Lepilogue:
+ ret
+.size bn_mul_mont,.-bn_mul_mont
+.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.align 16
+___
+
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+.type se_handler,\@abi-omnipotent
+.align 16
+se_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ lea .Lprologue(%rip),%r10
+ cmp %r10,%rbx # context->Rip<.Lprologue
+ jb .Lin_prologue
+
+ mov 152($context),%rax # pull context->Rsp
+
+ lea .Lepilogue(%rip),%r10
+ cmp %r10,%rbx # context->Rip>=.Lepilogue
+ jae .Lin_prologue
+
+ mov 192($context),%r10 # pull $num
+ mov 8(%rax,%r10,8),%rax # pull saved stack pointer
+ lea 48(%rax),%rax
+
+ mov -8(%rax),%rbx
+ mov -16(%rax),%rbp
+ mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
+ mov -48(%rax),%r15
+ mov %rbx,144($context) # restore context->Rbx
+ mov %rbp,160($context) # restore context->Rbp
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R15
+
+.Lin_prologue:
+ mov 8(%rax),%rdi
+ mov 16(%rax),%rsi
+ mov %rax,152($context) # restore context->Rsp
+ mov %rsi,168($context) # restore context->Rsi
+ mov %rdi,176($context) # restore context->Rdi
+
+ mov 40($disp),%rdi # disp->ContextRecord
+ mov $context,%rsi # context
+ mov \$154,%ecx # sizeof(CONTEXT)
+ .long 0xa548f3fc # cld; rep movsq
+
+ mov $disp,%rsi
+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
+ mov 40(%rsi),%r10 # disp->ContextRecord
+ lea 56(%rsi),%r11 # &disp->HandlerData
+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
+ mov %r10,32(%rsp) # arg5
+ mov %r11,40(%rsp) # arg6
+ mov %r12,48(%rsp) # arg7
+ mov %rcx,56(%rsp) # arg8, (NULL)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ mov \$1,%eax # ExceptionContinueSearch
+ add \$64,%rsp
+ popfq
pop %r15
pop %r14
pop %r13
pop %r12
pop %rbp
pop %rbx
+ pop %rdi
+ pop %rsi
ret
-.size bn_mul_mont,.-bn_mul_mont
-.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
+.size se_handler,.-se_handler
+
+.section .pdata
+.align 4
+ .rva .LSEH_begin_bn_mul_mont
+ .rva .LSEH_end_bn_mul_mont
+ .rva .LSEH_info_bn_mul_mont
+
+.section .xdata
+.align 8
+.LSEH_info_bn_mul_mont:
+ .byte 9,0,0,0
+ .rva se_handler
___
+}
print $code;
close STDOUT;
diff --git a/openssl/crypto/bn/bn.h b/openssl/crypto/bn/bn.h
index f1719a587..e484b7fc1 100644
--- a/openssl/crypto/bn/bn.h
+++ b/openssl/crypto/bn/bn.h
@@ -56,6 +56,59 @@
* [including the GNU Public Licence.]
*/
/* ====================================================================
+ * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+/* ====================================================================
* Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
*
* Portions of the attached software ("Contribution") are developed by
@@ -77,6 +130,7 @@
#include <stdio.h> /* FILE */
#endif
#include <openssl/ossl_typ.h>
+#include <openssl/crypto.h>
#ifdef __cplusplus
extern "C" {
@@ -94,9 +148,11 @@ extern "C" {
/* #define BN_DEBUG */
/* #define BN_DEBUG_RAND */
+#ifndef OPENSSL_SMALL_FOOTPRINT
#define BN_MUL_COMBA
#define BN_SQR_COMBA
#define BN_RECURSION
+#endif
/* This next option uses the C libraries (2 word)/(1 word) function.
* If it is not defined, I use my C version (which is slower).
@@ -137,6 +193,8 @@ extern "C" {
#define BN_DEC_FMT1 "%lu"
#define BN_DEC_FMT2 "%019lu"
#define BN_DEC_NUM 19
+#define BN_HEX_FMT1 "%lX"
+#define BN_HEX_FMT2 "%016lX"
#endif
/* This is where the long long data type is 64 bits, but long is 32.
@@ -162,83 +220,37 @@ extern "C" {
#define BN_DEC_FMT1 "%llu"
#define BN_DEC_FMT2 "%019llu"
#define BN_DEC_NUM 19
+#define BN_HEX_FMT1 "%llX"
+#define BN_HEX_FMT2 "%016llX"
#endif
#ifdef THIRTY_TWO_BIT
#ifdef BN_LLONG
-# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__)
+# if defined(_WIN32) && !defined(__GNUC__)
# define BN_ULLONG unsigned __int64
+# define BN_MASK (0xffffffffffffffffI64)
# else
# define BN_ULLONG unsigned long long
+# define BN_MASK (0xffffffffffffffffLL)
# endif
#endif
-#define BN_ULONG unsigned long
-#define BN_LONG long
+#define BN_ULONG unsigned int
+#define BN_LONG int
#define BN_BITS 64
#define BN_BYTES 4
#define BN_BITS2 32
#define BN_BITS4 16
-#ifdef OPENSSL_SYS_WIN32
-/* VC++ doesn't like the LL suffix */
-#define BN_MASK (0xffffffffffffffffL)
-#else
-#define BN_MASK (0xffffffffffffffffLL)
-#endif
#define BN_MASK2 (0xffffffffL)
#define BN_MASK2l (0xffff)
#define BN_MASK2h1 (0xffff8000L)
#define BN_MASK2h (0xffff0000L)
#define BN_TBIT (0x80000000L)
#define BN_DEC_CONV (1000000000L)
-#define BN_DEC_FMT1 "%lu"
-#define BN_DEC_FMT2 "%09lu"
-#define BN_DEC_NUM 9
-#endif
-
-#ifdef SIXTEEN_BIT
-#ifndef BN_DIV2W
-#define BN_DIV2W
-#endif
-#define BN_ULLONG unsigned long
-#define BN_ULONG unsigned short
-#define BN_LONG short
-#define BN_BITS 32
-#define BN_BYTES 2
-#define BN_BITS2 16
-#define BN_BITS4 8
-#define BN_MASK (0xffffffff)
-#define BN_MASK2 (0xffff)
-#define BN_MASK2l (0xff)
-#define BN_MASK2h1 (0xff80)
-#define BN_MASK2h (0xff00)
-#define BN_TBIT (0x8000)
-#define BN_DEC_CONV (100000)
#define BN_DEC_FMT1 "%u"
-#define BN_DEC_FMT2 "%05u"
-#define BN_DEC_NUM 5
-#endif
-
-#ifdef EIGHT_BIT
-#ifndef BN_DIV2W
-#define BN_DIV2W
-#endif
-#define BN_ULLONG unsigned short
-#define BN_ULONG unsigned char
-#define BN_LONG char
-#define BN_BITS 16
-#define BN_BYTES 1
-#define BN_BITS2 8
-#define BN_BITS4 4
-#define BN_MASK (0xffff)
-#define BN_MASK2 (0xff)
-#define BN_MASK2l (0xf)
-#define BN_MASK2h1 (0xf8)
-#define BN_MASK2h (0xf0)
-#define BN_TBIT (0x80)
-#define BN_DEC_CONV (100)
-#define BN_DEC_FMT1 "%u"
-#define BN_DEC_FMT2 "%02u"
-#define BN_DEC_NUM 2
+#define BN_DEC_FMT2 "%09u"
+#define BN_DEC_NUM 9
+#define BN_HEX_FMT1 "%X"
+#define BN_HEX_FMT2 "%08X"
#endif
#define BN_DEFAULT_BITS 1280
@@ -303,12 +315,8 @@ struct bn_mont_ctx_st
BIGNUM N; /* The modulus */
BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
* (Ni is only stored for bignum algorithm) */
-#if 0
- /* OpenSSL 0.9.9 preview: */
- BN_ULONG n0[2];/* least significant word(s) of Ni */
-#else
- BN_ULONG n0; /* least significant word of Ni */
-#endif
+ BN_ULONG n0[2];/* least significant word(s) of Ni;
+ (type changed with 0.9.9, was "BN_ULONG n0;" before) */
int flags;
};
@@ -504,6 +512,7 @@ char * BN_bn2hex(const BIGNUM *a);
char * BN_bn2dec(const BIGNUM *a);
int BN_hex2bn(BIGNUM **a, const char *str);
int BN_dec2bn(BIGNUM **a, const char *str);
+int BN_asc2bn(BIGNUM **a, const char *str);
int BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);
int BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */
BIGNUM *BN_mod_inverse(BIGNUM *ret,
@@ -531,17 +540,6 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
int do_trial_division, BN_GENCB *cb);
-int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx);
-
-int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
- const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
- const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb);
-int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
- BIGNUM *Xp1, BIGNUM *Xp2,
- const BIGNUM *Xp,
- const BIGNUM *e, BN_CTX *ctx,
- BN_GENCB *cb);
-
BN_MONT_CTX *BN_MONT_CTX_new(void );
void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
@@ -560,19 +558,22 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
#define BN_BLINDING_NO_UPDATE 0x00000001
#define BN_BLINDING_NO_RECREATE 0x00000002
-BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod);
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod);
void BN_BLINDING_free(BN_BLINDING *b);
int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *);
int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *);
+#ifndef OPENSSL_NO_DEPRECATED
unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *);
void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long);
+#endif
+CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *);
unsigned long BN_BLINDING_get_flags(const BN_BLINDING *);
void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long);
BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
- const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
+ const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
BN_MONT_CTX *m_ctx);
@@ -625,24 +626,24 @@ int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
* t^p[0] + t^p[1] + ... + t^p[k]
* where m = p[0] > p[1] > ... > p[k] = 0.
*/
-int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]);
+int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]);
/* r = a mod p */
int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
- const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */
-int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[],
+ const int p[], BN_CTX *ctx); /* r = (a * b) mod p */
+int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],
BN_CTX *ctx); /* r = (a * a) mod p */
-int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[],
+int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[],
BN_CTX *ctx); /* r = (1 / b) mod p */
int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
- const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */
+ const int p[], BN_CTX *ctx); /* r = (a / b) mod p */
int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
- const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */
+ const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */
int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a,
- const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */
+ const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */
int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a,
- const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */
-int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max);
-int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a);
+ const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */
+int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max);
+int BN_GF2m_arr2poly(const int p[], BIGNUM *a);
/* faster mod functions for the 'NIST primes'
* 0 <= a < p^2 */
@@ -751,10 +752,12 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);
#define bn_correct_top(a) \
{ \
BN_ULONG *ftl; \
- if ((a)->top > 0) \
+ int tmp_top = (a)->top; \
+ if (tmp_top > 0) \
{ \
- for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
- if (*(ftl--)) break; \
+ for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \
+ if (*(ftl--)) break; \
+ (a)->top = tmp_top; \
} \
bn_pollute(a); \
}
diff --git a/openssl/crypto/bn/bn_asm.c b/openssl/crypto/bn/bn_asm.c
index 99bc2de49..c43c91cc0 100644
--- a/openssl/crypto/bn/bn_asm.c
+++ b/openssl/crypto/bn/bn_asm.c
@@ -75,6 +75,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
assert(num >= 0);
if (num <= 0) return(c1);
+#ifndef OPENSSL_SMALL_FOOTPRINT
while (num&~3)
{
mul_add(rp[0],ap[0],w,c1);
@@ -83,11 +84,11 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
mul_add(rp[3],ap[3],w,c1);
ap+=4; rp+=4; num-=4;
}
- if (num)
+#endif
+ while (num)
{
- mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
- mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
- mul_add(rp[2],ap[2],w,c1); return c1;
+ mul_add(rp[0],ap[0],w,c1);
+ ap++; rp++; num--;
}
return(c1);
@@ -100,6 +101,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
assert(num >= 0);
if (num <= 0) return(c1);
+#ifndef OPENSSL_SMALL_FOOTPRINT
while (num&~3)
{
mul(rp[0],ap[0],w,c1);
@@ -108,11 +110,11 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
mul(rp[3],ap[3],w,c1);
ap+=4; rp+=4; num-=4;
}
- if (num)
+#endif
+ while (num)
{
- mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
- mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
- mul(rp[2],ap[2],w,c1);
+ mul(rp[0],ap[0],w,c1);
+ ap++; rp++; num--;
}
return(c1);
}
@@ -121,6 +123,8 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
{
assert(n >= 0);
if (n <= 0) return;
+
+#ifndef OPENSSL_SMALL_FOOTPRINT
while (n&~3)
{
sqr(r[0],r[1],a[0]);
@@ -129,11 +133,11 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
sqr(r[6],r[7],a[3]);
a+=4; r+=8; n-=4;
}
- if (n)
+#endif
+ while (n)
{
- sqr(r[0],r[1],a[0]); if (--n == 0) return;
- sqr(r[2],r[3],a[1]); if (--n == 0) return;
- sqr(r[4],r[5],a[2]);
+ sqr(r[0],r[1],a[0]);
+ a++; r+=2; n--;
}
}
@@ -150,18 +154,20 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
bl=LBITS(w);
bh=HBITS(w);
- for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+ while (num&~3)
{
mul_add(rp[0],ap[0],bl,bh,c);
- if (--num == 0) break;
mul_add(rp[1],ap[1],bl,bh,c);
- if (--num == 0) break;
mul_add(rp[2],ap[2],bl,bh,c);
- if (--num == 0) break;
mul_add(rp[3],ap[3],bl,bh,c);
- if (--num == 0) break;
- ap+=4;
- rp+=4;
+ ap+=4; rp+=4; num-=4;
+ }
+#endif
+ while (num)
+ {
+ mul_add(rp[0],ap[0],bl,bh,c);
+ ap++; rp++; num--;
}
return(c);
}
@@ -177,18 +183,20 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
bl=LBITS(w);
bh=HBITS(w);
- for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+ while (num&~3)
{
mul(rp[0],ap[0],bl,bh,carry);
- if (--num == 0) break;
mul(rp[1],ap[1],bl,bh,carry);
- if (--num == 0) break;
mul(rp[2],ap[2],bl,bh,carry);
- if (--num == 0) break;
mul(rp[3],ap[3],bl,bh,carry);
- if (--num == 0) break;
- ap+=4;
- rp+=4;
+ ap+=4; rp+=4; num-=4;
+ }
+#endif
+ while (num)
+ {
+ mul(rp[0],ap[0],bl,bh,carry);
+ ap++; rp++; num--;
}
return(carry);
}
@@ -197,22 +205,21 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
{
assert(n >= 0);
if (n <= 0) return;
- for (;;)
+
+#ifndef OPENSSL_SMALL_FOOTPRINT
+ while (n&~3)
{
sqr64(r[0],r[1],a[0]);
- if (--n == 0) break;
-
sqr64(r[2],r[3],a[1]);
- if (--n == 0) break;
-
sqr64(r[4],r[5],a[2]);
- if (--n == 0) break;
-
sqr64(r[6],r[7],a[3]);
- if (--n == 0) break;
-
- a+=4;
- r+=8;
+ a+=4; r+=8; n-=4;
+ }
+#endif
+ while (n)
+ {
+ sqr64(r[0],r[1],a[0]);
+ a++; r+=2; n--;
}
}
@@ -303,31 +310,30 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
assert(n >= 0);
if (n <= 0) return((BN_ULONG)0);
- for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+ while (n&~3)
{
ll+=(BN_ULLONG)a[0]+b[0];
r[0]=(BN_ULONG)ll&BN_MASK2;
ll>>=BN_BITS2;
- if (--n <= 0) break;
-
ll+=(BN_ULLONG)a[1]+b[1];
r[1]=(BN_ULONG)ll&BN_MASK2;
ll>>=BN_BITS2;
- if (--n <= 0) break;
-
ll+=(BN_ULLONG)a[2]+b[2];
r[2]=(BN_ULONG)ll&BN_MASK2;
ll>>=BN_BITS2;
- if (--n <= 0) break;
-
ll+=(BN_ULLONG)a[3]+b[3];
r[3]=(BN_ULONG)ll&BN_MASK2;
ll>>=BN_BITS2;
- if (--n <= 0) break;
-
- a+=4;
- b+=4;
- r+=4;
+ a+=4; b+=4; r+=4; n-=4;
+ }
+#endif
+ while (n)
+ {
+ ll+=(BN_ULLONG)a[0]+b[0];
+ r[0]=(BN_ULONG)ll&BN_MASK2;
+ ll>>=BN_BITS2;
+ a++; b++; r++; n--;
}
return((BN_ULONG)ll);
}
@@ -340,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
if (n <= 0) return((BN_ULONG)0);
c=0;
- for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+ while (n&~3)
{
t=a[0];
t=(t+c)&BN_MASK2;
@@ -348,35 +355,36 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
l=(t+b[0])&BN_MASK2;
c+=(l < t);
r[0]=l;
- if (--n <= 0) break;
-
t=a[1];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[1])&BN_MASK2;
c+=(l < t);
r[1]=l;
- if (--n <= 0) break;
-
t=a[2];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[2])&BN_MASK2;
c+=(l < t);
r[2]=l;
- if (--n <= 0) break;
-
t=a[3];
t=(t+c)&BN_MASK2;
c=(t < c);
l=(t+b[3])&BN_MASK2;
c+=(l < t);
r[3]=l;
- if (--n <= 0) break;
-
- a+=4;
- b+=4;
- r+=4;
+ a+=4; b+=4; r+=4; n-=4;
+ }
+#endif
+ while(n)
+ {
+ t=a[0];
+ t=(t+c)&BN_MASK2;
+ c=(t < c);
+ l=(t+b[0])&BN_MASK2;
+ c+=(l < t);
+ r[0]=l;
+ a++; b++; r++; n--;
}
return((BN_ULONG)c);
}
@@ -390,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
assert(n >= 0);
if (n <= 0) return((BN_ULONG)0);
- for (;;)
+#ifndef OPENSSL_SMALL_FOOTPRINT
+ while (n&~3)
{
t1=a[0]; t2=b[0];
r[0]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
- if (--n <= 0) break;
-
t1=a[1]; t2=b[1];
r[1]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
- if (--n <= 0) break;
-
t1=a[2]; t2=b[2];
r[2]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
- if (--n <= 0) break;
-
t1=a[3]; t2=b[3];
r[3]=(t1-t2-c)&BN_MASK2;
if (t1 != t2) c=(t1 < t2);
- if (--n <= 0) break;
-
- a+=4;
- b+=4;
- r+=4;
+ a+=4; b+=4; r+=4; n-=4;
+ }
+#endif
+ while (n)
+ {
+ t1=a[0]; t2=b[0];
+ r[0]=(t1-t2-c)&BN_MASK2;
+ if (t1 != t2) c=(t1 < t2);
+ a++; b++; r++; n--;
}
return(c);
}
-#ifdef BN_MUL_COMBA
+#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
#undef bn_mul_comba8
#undef bn_mul_comba4
@@ -820,18 +827,134 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
r[6]=c1;
r[7]=c2;
}
+
+#ifdef OPENSSL_NO_ASM
+#ifdef OPENSSL_BN_ASM_MONT
+#include <alloca.h>
+/*
+ * This is essentially reference implementation, which may or may not
+ * result in performance improvement. E.g. on IA-32 this routine was
+ * observed to give 40% faster rsa1024 private key operations and 10%
+ * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
+ * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
+ * reference implementation, one to be used as starting point for
+ * platform-specific assembler. Mentioned numbers apply to compiler
+ * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
+ * can vary not only from platform to platform, but even for compiler
+ * versions. Assembler vs. assembler improvement coefficients can
+ * [and are known to] differ and are to be documented elsewhere.
+ */
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
+ {
+ BN_ULONG c0,c1,ml,*tp,n0;
+#ifdef mul64
+ BN_ULONG mh;
+#endif
+ volatile BN_ULONG *vp;
+ int i=0,j;
+
+#if 0 /* template for platform-specific implementation */
+ if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
+#endif
+ vp = tp = alloca((num+2)*sizeof(BN_ULONG));
+
+ n0 = *n0p;
+
+ c0 = 0;
+ ml = bp[0];
+#ifdef mul64
+ mh = HBITS(ml);
+ ml = LBITS(ml);
+ for (j=0;j<num;++j)
+ mul(tp[j],ap[j],ml,mh,c0);
+#else
+ for (j=0;j<num;++j)
+ mul(tp[j],ap[j],ml,c0);
+#endif
+
+ tp[num] = c0;
+ tp[num+1] = 0;
+ goto enter;
+
+ for(i=0;i<num;i++)
+ {
+ c0 = 0;
+ ml = bp[i];
+#ifdef mul64
+ mh = HBITS(ml);
+ ml = LBITS(ml);
+ for (j=0;j<num;++j)
+ mul_add(tp[j],ap[j],ml,mh,c0);
+#else
+ for (j=0;j<num;++j)
+ mul_add(tp[j],ap[j],ml,c0);
+#endif
+ c1 = (tp[num] + c0)&BN_MASK2;
+ tp[num] = c1;
+ tp[num+1] = (c1<c0?1:0);
+ enter:
+ c1 = tp[0];
+ ml = (c1*n0)&BN_MASK2;
+ c0 = 0;
+#ifdef mul64
+ mh = HBITS(ml);
+ ml = LBITS(ml);
+ mul_add(c1,np[0],ml,mh,c0);
+#else
+ mul_add(c1,ml,np[0],c0);
+#endif
+ for(j=1;j<num;j++)
+ {
+ c1 = tp[j];
+#ifdef mul64
+ mul_add(c1,np[j],ml,mh,c0);
+#else
+ mul_add(c1,ml,np[j],c0);
+#endif
+ tp[j-1] = c1&BN_MASK2;
+ }
+ c1 = (tp[num] + c0)&BN_MASK2;
+ tp[num-1] = c1;
+ tp[num] = tp[num+1] + (c1<c0?1:0);
+ }
+
+ if (tp[num]!=0 || tp[num-1]>=np[num-1])
+ {
+ c0 = bn_sub_words(rp,tp,np,num);
+ if (tp[num]!=0 || c0==0)
+ {
+ for(i=0;i<num+2;i++) vp[i] = 0;
+ return 1;
+ }
+ }
+ for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;
+ vp[num] = 0;
+ vp[num+1] = 0;
+ return 1;
+ }
+#else
+/*
+ * Return value of 0 indicates that multiplication/convolution was not
+ * performed to signal the caller to fall down to alternative/original
+ * code-path.
+ */
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
+{ return 0; }
+#endif /* OPENSSL_BN_ASM_MONT */
+#endif
+
#else /* !BN_MUL_COMBA */
/* hmm... is it faster just to do a multiply? */
#undef bn_sqr_comba4
-void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG t[8];
bn_sqr_normal(r,a,4,t);
}
#undef bn_sqr_comba8
-void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
{
BN_ULONG t[16];
bn_sqr_normal(r,a,8,t);
@@ -857,4 +980,51 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
}
+#ifdef OPENSSL_NO_ASM
+#ifdef OPENSSL_BN_ASM_MONT
+#include <alloca.h>
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
+ {
+ BN_ULONG c0,c1,*tp,n0=*n0p;
+ volatile BN_ULONG *vp;
+ int i=0,j;
+
+ vp = tp = alloca((num+2)*sizeof(BN_ULONG));
+
+ for(i=0;i<=num;i++) tp[i]=0;
+
+ for(i=0;i<num;i++)
+ {
+ c0 = bn_mul_add_words(tp,ap,num,bp[i]);
+ c1 = (tp[num] + c0)&BN_MASK2;
+ tp[num] = c1;
+ tp[num+1] = (c1<c0?1:0);
+
+ c0 = bn_mul_add_words(tp,np,num,tp[0]*n0);
+ c1 = (tp[num] + c0)&BN_MASK2;
+ tp[num] = c1;
+ tp[num+1] += (c1<c0?1:0);
+ for(j=0;j<=num;j++) tp[j]=tp[j+1];
+ }
+
+ if (tp[num]!=0 || tp[num-1]>=np[num-1])
+ {
+ c0 = bn_sub_words(rp,tp,np,num);
+ if (tp[num]!=0 || c0==0)
+ {
+ for(i=0;i<num+2;i++) vp[i] = 0;
+ return 1;
+ }
+ }
+ for(i=0;i<num;i++) rp[i] = tp[i], vp[i] = 0;
+ vp[num] = 0;
+ vp[num+1] = 0;
+ return 1;
+ }
+#else
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num)
+{ return 0; }
+#endif /* OPENSSL_BN_ASM_MONT */
+#endif
+
#endif /* !BN_MUL_COMBA */
diff --git a/openssl/crypto/bn/bn_blind.c b/openssl/crypto/bn/bn_blind.c
index c11fb4ccc..e060592fd 100644
--- a/openssl/crypto/bn/bn_blind.c
+++ b/openssl/crypto/bn/bn_blind.c
@@ -1,6 +1,6 @@
/* crypto/bn/bn_blind.c */
/* ====================================================================
- * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
+ * Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -121,8 +121,11 @@ struct bn_blinding_st
BIGNUM *Ai;
BIGNUM *e;
BIGNUM *mod; /* just a reference */
+#ifndef OPENSSL_NO_DEPRECATED
unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
* used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
+#endif
+ CRYPTO_THREADID tid;
unsigned int counter;
unsigned long flags;
BN_MONT_CTX *m_ctx;
@@ -131,7 +134,7 @@ struct bn_blinding_st
BN_MONT_CTX *m_ctx);
};
-BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod)
+BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
{
BN_BLINDING *ret=NULL;
@@ -158,6 +161,7 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGN
BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
ret->counter = BN_BLINDING_COUNTER;
+ CRYPTO_THREADID_current(&ret->tid);
return(ret);
err:
if (ret != NULL) BN_BLINDING_free(ret);
@@ -263,6 +267,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct
return(ret);
}
+#ifndef OPENSSL_NO_DEPRECATED
unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b)
{
return b->thread_id;
@@ -272,6 +277,12 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n)
{
b->thread_id = n;
}
+#endif
+
+CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b)
+ {
+ return &b->tid;
+ }
unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
{
@@ -284,7 +295,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
}
BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
- const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx,
+ const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
BN_MONT_CTX *m_ctx)
diff --git a/openssl/crypto/bn/bn_ctx.c b/openssl/crypto/bn/bn_ctx.c
index b3452f1a9..3f2256f67 100644
--- a/openssl/crypto/bn/bn_ctx.c
+++ b/openssl/crypto/bn/bn_ctx.c
@@ -161,7 +161,7 @@ static void ctxdbg(BN_CTX *ctx)
fprintf(stderr,"(%08x): ", (unsigned int)ctx);
while(bnidx < ctx->used)
{
- fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
+ fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
if(!(bnidx % BN_CTX_POOL_SIZE))
item = item->next;
}
@@ -171,8 +171,8 @@ static void ctxdbg(BN_CTX *ctx)
while(fpidx < stack->depth)
{
while(bnidx++ < stack->indexes[fpidx])
- fprintf(stderr," ");
- fprintf(stderr,"^^ ");
+ fprintf(stderr," ");
+ fprintf(stderr,"^^^ ");
bnidx++;
fpidx++;
}
diff --git a/openssl/crypto/bn/bn_div.c b/openssl/crypto/bn/bn_div.c
index 1e8e57626..802a43d64 100644
--- a/openssl/crypto/bn/bn_div.c
+++ b/openssl/crypto/bn/bn_div.c
@@ -102,7 +102,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
/* The next 2 are needed so we can do a dv->d[0]|=1 later
* since BN_lshift1 will only work once there is a value :-) */
BN_zero(dv);
- bn_wexpand(dv,1);
+ if(bn_wexpand(dv,1) == NULL) goto end;
dv->top=1;
if (!BN_lshift(D,D,nm-nd)) goto end;
@@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
if (dv == NULL)
res=BN_CTX_get(ctx);
else res=dv;
- if (sdiv == NULL || res == NULL) goto err;
+ if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
+ goto err;
/* First we normalise the numbers */
norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
@@ -336,7 +337,7 @@ X) -> 0x%08X\n",
t2 -= d1;
}
#else /* !BN_LLONG */
- BN_ULONG t2l,t2h,ql,qh;
+ BN_ULONG t2l,t2h;
q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
@@ -354,9 +355,12 @@ X) -> 0x%08X\n",
t2l = d1 * q;
t2h = BN_UMULT_HIGH(d1,q);
#else
+ {
+ BN_ULONG ql, qh;
t2l=LBITS(d1); t2h=HBITS(d1);
ql =LBITS(q); qh =HBITS(q);
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
+ }
#endif
for (;;)
@@ -560,7 +564,7 @@ X) -> 0x%08X\n",
t2 -= d1;
}
#else /* !BN_LLONG */
- BN_ULONG t2l,t2h,ql,qh;
+ BN_ULONG t2l,t2h;
q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
@@ -578,9 +582,12 @@ X) -> 0x%08X\n",
t2l = d1 * q;
t2h = BN_UMULT_HIGH(d1,q);
#else
+ {
+ BN_ULONG ql, qh;
t2l=LBITS(d1); t2h=HBITS(d1);
ql =LBITS(q); qh =HBITS(q);
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
+ }
#endif
for (;;)
diff --git a/openssl/crypto/bn/bn_exp.c b/openssl/crypto/bn/bn_exp.c
index 70a33f0d9..d9b6c737f 100644
--- a/openssl/crypto/bn/bn_exp.c
+++ b/openssl/crypto/bn/bn_exp.c
@@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
rr = BN_CTX_get(ctx);
else
rr = r;
- if ((v = BN_CTX_get(ctx)) == NULL) goto err;
+ v = BN_CTX_get(ctx);
+ if (rr == NULL || v == NULL) goto err;
if (BN_copy(v,a) == NULL) goto err;
bits=BN_num_bits(p);
diff --git a/openssl/crypto/bn/bn_gf2m.c b/openssl/crypto/bn/bn_gf2m.c
index 306f029f2..527b0fa15 100644
--- a/openssl/crypto/bn/bn_gf2m.c
+++ b/openssl/crypto/bn/bn_gf2m.c
@@ -121,74 +121,12 @@ static const BN_ULONG SQR_tb[16] =
SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \
SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
#endif
-#ifdef SIXTEEN_BIT
-#define SQR1(w) \
- SQR_tb[(w) >> 12 & 0xF] << 8 | SQR_tb[(w) >> 8 & 0xF]
-#define SQR0(w) \
- SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF]
-#endif
-#ifdef EIGHT_BIT
-#define SQR1(w) \
- SQR_tb[(w) >> 4 & 0xF]
-#define SQR0(w) \
- SQR_tb[(w) & 15]
-#endif
/* Product of two polynomials a, b each with degree < BN_BITS2 - 1,
* result is a polynomial r with degree < 2 * BN_BITS - 1
* The caller MUST ensure that the variables have the right amount
* of space allocated.
*/
-#ifdef EIGHT_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
- {
- register BN_ULONG h, l, s;
- BN_ULONG tab[4], top1b = a >> 7;
- register BN_ULONG a1, a2;
-
- a1 = a & (0x7F); a2 = a1 << 1;
-
- tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-
- s = tab[b & 0x3]; l = s;
- s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 6;
- s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4;
- s = tab[b >> 6 ]; l ^= s << 6; h ^= s >> 2;
-
- /* compensate for the top bit of a */
-
- if (top1b & 01) { l ^= b << 7; h ^= b >> 1; }
-
- *r1 = h; *r0 = l;
- }
-#endif
-#ifdef SIXTEEN_BIT
-static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
- {
- register BN_ULONG h, l, s;
- BN_ULONG tab[4], top1b = a >> 15;
- register BN_ULONG a1, a2;
-
- a1 = a & (0x7FFF); a2 = a1 << 1;
-
- tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2;
-
- s = tab[b & 0x3]; l = s;
- s = tab[b >> 2 & 0x3]; l ^= s << 2; h = s >> 14;
- s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 12;
- s = tab[b >> 6 & 0x3]; l ^= s << 6; h ^= s >> 10;
- s = tab[b >> 8 & 0x3]; l ^= s << 8; h ^= s >> 8;
- s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >> 6;
- s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >> 4;
- s = tab[b >>14 ]; l ^= s << 14; h ^= s >> 2;
-
- /* compensate for the top bit of a */
-
- if (top1b & 01) { l ^= b << 15; h ^= b >> 1; }
-
- *r1 = h; *r0 = l;
- }
-#endif
#ifdef THIRTY_TWO_BIT
static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)
{
@@ -294,7 +232,8 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
if (a->top < b->top) { at = b; bt = a; }
else { at = a; bt = b; }
- bn_wexpand(r, at->top);
+ if(bn_wexpand(r, at->top) == NULL)
+ return 0;
for (i = 0; i < bt->top; i++)
{
@@ -320,7 +259,7 @@ int BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
/* Performs modular reduction of a and store result in r. r could be a. */
-int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
+int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])
{
int j, k;
int n, dN, d0, d1;
@@ -421,11 +360,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])
int BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)
{
int ret = 0;
- const int max = BN_num_bits(p);
- unsigned int *arr=NULL;
+ const int max = BN_num_bits(p) + 1;
+ int *arr=NULL;
bn_check_top(a);
bn_check_top(p);
- if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+ if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
ret = BN_GF2m_poly2arr(p, arr, max);
if (!ret || ret > max)
{
@@ -443,7 +382,7 @@ err:
/* Compute the product of two polynomials a and b, reduce modulo p, and store
* the result in r. r could be a or b; a could be b.
*/
-int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
{
int zlen, i, j, k, ret = 0;
BIGNUM *s;
@@ -499,12 +438,12 @@ err:
int BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
{
int ret = 0;
- const int max = BN_num_bits(p);
- unsigned int *arr=NULL;
+ const int max = BN_num_bits(p) + 1;
+ int *arr=NULL;
bn_check_top(a);
bn_check_top(b);
bn_check_top(p);
- if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+ if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
ret = BN_GF2m_poly2arr(p, arr, max);
if (!ret || ret > max)
{
@@ -520,7 +459,7 @@ err:
/* Square a, reduce the result mod p, and store it in a. r could be a. */
-int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
{
int i, ret = 0;
BIGNUM *s;
@@ -555,12 +494,12 @@ err:
int BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
{
int ret = 0;
- const int max = BN_num_bits(p);
- unsigned int *arr=NULL;
+ const int max = BN_num_bits(p) + 1;
+ int *arr=NULL;
bn_check_top(a);
bn_check_top(p);
- if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+ if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
ret = BN_GF2m_poly2arr(p, arr, max);
if (!ret || ret > max)
{
@@ -642,7 +581,7 @@ err:
* function is only provided for convenience; for best performance, use the
* BN_GF2m_mod_inv function.
*/
-int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx)
{
BIGNUM *field;
int ret = 0;
@@ -768,7 +707,7 @@ err:
* function is only provided for convenience; for best performance, use the
* BN_GF2m_mod_div function.
*/
-int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx)
{
BIGNUM *field;
int ret = 0;
@@ -793,7 +732,7 @@ err:
* the result in r. r could be a.
* Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.
*/
-int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)
{
int ret = 0, i, n;
BIGNUM *u;
@@ -839,12 +778,12 @@ err:
int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)
{
int ret = 0;
- const int max = BN_num_bits(p);
- unsigned int *arr=NULL;
+ const int max = BN_num_bits(p) + 1;
+ int *arr=NULL;
bn_check_top(a);
bn_check_top(b);
bn_check_top(p);
- if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+ if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
ret = BN_GF2m_poly2arr(p, arr, max);
if (!ret || ret > max)
{
@@ -862,7 +801,7 @@ err:
* the result in r. r could be a.
* Uses exponentiation as in algorithm A.4.1 from IEEE P1363.
*/
-int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)
{
int ret = 0;
BIGNUM *u;
@@ -898,11 +837,11 @@ err:
int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
{
int ret = 0;
- const int max = BN_num_bits(p);
- unsigned int *arr=NULL;
+ const int max = BN_num_bits(p) + 1;
+ int *arr=NULL;
bn_check_top(a);
bn_check_top(p);
- if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err;
+ if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;
ret = BN_GF2m_poly2arr(p, arr, max);
if (!ret || ret > max)
{
@@ -919,10 +858,9 @@ err:
/* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0.
* Uses algorithms A.4.7 and A.4.6 from IEEE P1363.
*/
-int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx)
+int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx)
{
- int ret = 0, count = 0;
- unsigned int j;
+ int ret = 0, count = 0, j;
BIGNUM *a, *z, *rho, *w, *w2, *tmp;
bn_check_top(a_);
@@ -1017,11 +955,11 @@ err:
int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
{
int ret = 0;
- const int max = BN_num_bits(p);
- unsigned int *arr=NULL;
+ const int max = BN_num_bits(p) + 1;
+ int *arr=NULL;
bn_check_top(a);
bn_check_top(p);
- if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) *
+ if ((arr = (int *)OPENSSL_malloc(sizeof(int) *
max)) == NULL) goto err;
ret = BN_GF2m_poly2arr(p, arr, max);
if (!ret || ret > max)
@@ -1037,20 +975,17 @@ err:
}
/* Convert the bit-string representation of a polynomial
- * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array
- * of integers corresponding to the bits with non-zero coefficient.
+ * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding
+ * to the bits with non-zero coefficient. Array is terminated with -1.
* Up to max elements of the array will be filled. Return value is total
- * number of coefficients that would be extracted if array was large enough.
+ * number of array elements that would be filled if array was large enough.
*/
-int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
+int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)
{
int i, j, k = 0;
BN_ULONG mask;
- if (BN_is_zero(a) || !BN_is_bit_set(a, 0))
- /* a_0 == 0 => return error (the unsigned int array
- * must be terminated by 0)
- */
+ if (BN_is_zero(a))
return 0;
for (i = a->top - 1; i >= 0; i--)
@@ -1070,24 +1005,28 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)
}
}
+ if (k < max) {
+ p[k] = -1;
+ k++;
+ }
+
return k;
}
/* Convert the coefficient array representation of a polynomial to a
- * bit-string. The array must be terminated by 0.
+ * bit-string. The array must be terminated by -1.
*/
-int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a)
+int BN_GF2m_arr2poly(const int p[], BIGNUM *a)
{
int i;
bn_check_top(a);
BN_zero(a);
- for (i = 0; p[i] != 0; i++)
+ for (i = 0; p[i] != -1; i++)
{
if (BN_set_bit(a, p[i]) == 0)
return 0;
}
- BN_set_bit(a, 0);
bn_check_top(a);
return 1;
diff --git a/openssl/crypto/bn/bn_lcl.h b/openssl/crypto/bn/bn_lcl.h
index 27ac4397a..8e5e98e3f 100644
--- a/openssl/crypto/bn/bn_lcl.h
+++ b/openssl/crypto/bn/bn_lcl.h
@@ -255,7 +255,8 @@ extern "C" {
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
-# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
+# elif (defined(__x86_64) || defined(__x86_64__)) && \
+ (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret,discard; \
diff --git a/openssl/crypto/bn/bn_lib.c b/openssl/crypto/bn/bn_lib.c
index 32a8fbaf5..5470fbe6e 100644
--- a/openssl/crypto/bn/bn_lib.c
+++ b/openssl/crypto/bn/bn_lib.c
@@ -133,15 +133,34 @@ int BN_get_params(int which)
const BIGNUM *BN_value_one(void)
{
- static BN_ULONG data_one=1L;
- static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA};
+ static const BN_ULONG data_one=1L;
+ static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA};
return(&const_one);
}
+char *BN_options(void)
+ {
+ static int init=0;
+ static char data[16];
+
+ if (!init)
+ {
+ init++;
+#ifdef BN_LLONG
+ BIO_snprintf(data,sizeof data,"bn(%d,%d)",
+ (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
+#else
+ BIO_snprintf(data,sizeof data,"bn(%d,%d)",
+ (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
+#endif
+ }
+ return(data);
+ }
+
int BN_num_bits_word(BN_ULONG l)
{
- static const char bits[256]={
+ static const unsigned char bits[256]={
0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
@@ -216,7 +235,7 @@ int BN_num_bits_word(BN_ULONG l)
else
#endif
{
-#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
+#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
if (l & 0xff00L)
return(bits[(int)(l>>8)]+8);
else
@@ -744,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n)
i=n/BN_BITS2;
j=n%BN_BITS2;
if (a->top <= i) return 0;
- return(((a->d[i])>>j)&((BN_ULONG)1));
+ return (int)(((a->d[i])>>j)&((BN_ULONG)1));
}
int BN_mask_bits(BIGNUM *a, int n)
diff --git a/openssl/crypto/bn/bn_mont.c b/openssl/crypto/bn/bn_mont.c
index 4799b152d..7224637ab 100644
--- a/openssl/crypto/bn/bn_mont.c
+++ b/openssl/crypto/bn/bn_mont.c
@@ -122,26 +122,10 @@
#define MONT_WORD /* use the faster word-based algorithm */
-#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
-/* This condition means we have a specific non-default build:
- * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any
- * BN_BITS2<=32 platform; an explicit "enable-montasm" is required.
- * I.e., if we are here, the user intentionally deviates from the
- * normal stable build to get better Montgomery performance from
- * the 0.9.9-dev backport.
- *
- * In this case only, we also enable BN_from_montgomery_word()
- * (another non-stable feature from 0.9.9-dev).
- */
-#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
-#endif
-
-#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+#ifdef MONT_WORD
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
#endif
-
-
int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_MONT_CTX *mont, BN_CTX *ctx)
{
@@ -153,11 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
if (num>1 && a->top==num && b->top==num)
{
if (bn_wexpand(r,num) == NULL) return(0);
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
-#else
- if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num))
-#endif
{
r->neg = a->neg^b->neg;
r->top = num;
@@ -181,7 +161,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
if (!BN_mul(tmp,a,b,ctx)) goto err;
}
/* reduce from aRR to aR */
-#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+#ifdef MONT_WORD
if (!BN_from_montgomery_word(r,tmp,mont)) goto err;
#else
if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
@@ -193,7 +173,7 @@ err:
return(ret);
}
-#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD
+#ifdef MONT_WORD
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
{
BIGNUM *n;
@@ -217,15 +197,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
nrp= &(r->d[nl]);
/* clear the top words of T */
+#if 1
for (i=r->top; i<max; i++) /* memset? XXX */
r->d[i]=0;
+#else
+ memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
+#endif
r->top=max;
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
n0=mont->n0[0];
-#else
- n0=mont->n0;
-#endif
#ifdef BN_COUNT
fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
@@ -270,6 +250,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
}
al=r->top-ri;
+#define BRANCH_FREE 1
+#if BRANCH_FREE
if (bn_wexpand(ret,ri) == NULL) return(0);
x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
@@ -317,164 +299,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
rp[i]=nrp[i], ap[i]=0;
bn_correct_top(r);
bn_correct_top(ret);
- bn_check_top(ret);
-
- return(1);
- }
-
-int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
- BN_CTX *ctx)
- {
- int retn=0;
- BIGNUM *t;
-
- BN_CTX_start(ctx);
- if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
- retn = BN_from_montgomery_word(ret,t,mont);
- BN_CTX_end(ctx);
- return retn;
- }
-
-#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
-
-int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
- BN_CTX *ctx)
- {
- int retn=0;
-
-#ifdef MONT_WORD
- BIGNUM *n,*r;
- BN_ULONG *ap,*np,*rp,n0,v,*nrp;
- int al,nl,max,i,x,ri;
-
- BN_CTX_start(ctx);
- if ((r = BN_CTX_get(ctx)) == NULL) goto err;
-
- if (!BN_copy(r,a)) goto err;
- n= &(mont->N);
-
- ap=a->d;
- /* mont->ri is the size of mont->N in bits (rounded up
- to the word size) */
- al=ri=mont->ri/BN_BITS2;
-
- nl=n->top;
- if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
-
- max=(nl+al+1); /* allow for overflow (no?) XXX */
- if (bn_wexpand(r,max) == NULL) goto err;
-
- r->neg=a->neg^n->neg;
- np=n->d;
- rp=r->d;
- nrp= &(r->d[nl]);
-
- /* clear the top words of T */
-#if 1
- for (i=r->top; i<max; i++) /* memset? XXX */
- r->d[i]=0;
#else
- memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
-#endif
-
- r->top=max;
- n0=mont->n0;
-
-#ifdef BN_COUNT
- fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl);
-#endif
- for (i=0; i<nl; i++)
- {
-#ifdef __TANDEM
- {
- long long t1;
- long long t2;
- long long t3;
- t1 = rp[0] * (n0 & 0177777);
- t2 = 037777600000l;
- t2 = n0 & t2;
- t3 = rp[0] & 0177777;
- t2 = (t3 * t2) & BN_MASK2;
- t1 = t1 + t2;
- v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
- }
-#else
- v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
-#endif
- nrp++;
- rp++;
- if (((nrp[-1]+=v)&BN_MASK2) >= v)
- continue;
- else
- {
- if (((++nrp[0])&BN_MASK2) != 0) continue;
- if (((++nrp[1])&BN_MASK2) != 0) continue;
- for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
- }
- }
- bn_correct_top(r);
-
- /* mont->ri will be a multiple of the word size and below code
- * is kind of BN_rshift(ret,r,mont->ri) equivalent */
- if (r->top <= ri)
- {
- ret->top=0;
- retn=1;
- goto err;
- }
- al=r->top-ri;
-
-# define BRANCH_FREE 1
-# if BRANCH_FREE
- if (bn_wexpand(ret,ri) == NULL) goto err;
- x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
- ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
- ret->neg=r->neg;
-
- rp=ret->d;
- ap=&(r->d[ri]);
-
- {
- size_t m1,m2;
-
- v=bn_sub_words(rp,ap,np,ri);
- /* this ----------------^^ works even in al<ri case
- * thanks to zealous zeroing of top of the vector in the
- * beginning. */
-
- /* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
- /* in other words if subtraction result is real, then
- * trick unconditional memcpy below to perform in-place
- * "refresh" instead of actual copy. */
- m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
- m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
- m1|=m2; /* (al!=ri) */
- m1|=(0-(size_t)v); /* (al!=ri || v) */
- m1&=~m2; /* (al!=ri || v) && !al>ri */
- nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
- }
-
- /* 'i<ri' is chosen to eliminate dependency on input data, even
- * though it results in redundant copy in al<ri case. */
- for (i=0,ri-=4; i<ri; i+=4)
- {
- BN_ULONG t1,t2,t3,t4;
-
- t1=nrp[i+0];
- t2=nrp[i+1];
- t3=nrp[i+2]; ap[i+0]=0;
- t4=nrp[i+3]; ap[i+1]=0;
- rp[i+0]=t1; ap[i+2]=0;
- rp[i+1]=t2; ap[i+3]=0;
- rp[i+2]=t3;
- rp[i+3]=t4;
- }
- for (ri+=4; i<ri; i++)
- rp[i]=nrp[i], ap[i]=0;
- bn_correct_top(r);
- bn_correct_top(ret);
-# else
- if (bn_wexpand(ret,al) == NULL) goto err;
+ if (bn_wexpand(ret,al) == NULL) return(0);
ret->top=al;
ret->neg=r->neg;
@@ -497,8 +323,30 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
al+=4;
for (; i<al; i++)
rp[i]=ap[i];
-# endif
-#else /* !MONT_WORD */
+
+ if (BN_ucmp(ret, &(mont->N)) >= 0)
+ {
+ if (!BN_usub(ret,ret,&(mont->N))) return(0);
+ }
+#endif
+ bn_check_top(ret);
+
+ return(1);
+ }
+#endif /* MONT_WORD */
+
+int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
+ BN_CTX *ctx)
+ {
+ int retn=0;
+#ifdef MONT_WORD
+ BIGNUM *t;
+
+ BN_CTX_start(ctx);
+ if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
+ retn = BN_from_montgomery_word(ret,t,mont);
+ BN_CTX_end(ctx);
+#else /* !MONT_WORD */
BIGNUM *t1,*t2;
BN_CTX_start(ctx);
@@ -515,21 +363,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
if (!BN_add(t2,a,t1)) goto err;
if (!BN_rshift(ret,t2,mont->ri)) goto err;
-#endif /* MONT_WORD */
-#if !defined(BRANCH_FREE) || BRANCH_FREE==0
if (BN_ucmp(ret, &(mont->N)) >= 0)
{
if (!BN_usub(ret,ret,&(mont->N))) goto err;
}
-#endif
retn=1;
bn_check_top(ret);
err:
BN_CTX_end(ctx);
+#endif /* MONT_WORD */
return(retn);
}
-#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */
BN_MONT_CTX *BN_MONT_CTX_new(void)
{
@@ -549,11 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
BN_init(&(ctx->RR));
BN_init(&(ctx->N));
BN_init(&(ctx->Ni));
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
ctx->n0[0] = ctx->n0[1] = 0;
-#else
- ctx->n0 = 0;
-#endif
ctx->flags=0;
}
@@ -585,26 +426,22 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
BIGNUM tmod;
BN_ULONG buf[2];
- mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
- BN_zero(R);
-#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)",
- only certain BN_BITS2<=32 platforms actually need this */
- if (!(BN_set_bit(R,2*BN_BITS2))) goto err; /* R */
-#else
- if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
-#endif
-
- buf[0]=mod->d[0]; /* tmod = N mod word size */
- buf[1]=0;
-
BN_init(&tmod);
tmod.d=buf;
- tmod.top = buf[0] != 0 ? 1 : 0;
tmod.dmax=2;
tmod.neg=0;
-#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)";
- only certain BN_BITS2<=32 platforms actually need this */
+ mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+
+#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
+ /* Only certain BN_BITS2<=32 platforms actually make use of
+ * n0[1], and we could use the #else case (with a shorter R
+ * value) for the others. However, currently only the assembler
+ * files do know which is which. */
+
+ BN_zero(R);
+ if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
+
tmod.top=0;
if ((buf[0] = mod->d[0])) tmod.top=1;
if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2;
@@ -632,6 +469,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
#else
+ BN_zero(R);
+ if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
+
+ buf[0]=mod->d[0]; /* tmod = N mod word size */
+ buf[1]=0;
+ tmod.top = buf[0] != 0 ? 1 : 0;
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
@@ -647,12 +490,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only least significant word: */
-# if 0 /* for OpenSSL 0.9.9 mont->n0 */
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = 0;
-# else
- mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
-# endif
#endif
}
#else /* !MONT_WORD */
@@ -689,12 +528,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
if (!BN_copy(&(to->N),&(from->N))) return NULL;
if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
to->ri=from->ri;
-#if 0 /* for OpenSSL 0.9.9 mont->n0 */
to->n0[0]=from->n0[0];
to->n0[1]=from->n0[1];
-#else
- to->n0=from->n0;
-#endif
return(to);
}
diff --git a/openssl/crypto/bn/bn_mul.c b/openssl/crypto/bn/bn_mul.c
index b848c8cc6..a0e9ec3b4 100644
--- a/openssl/crypto/bn/bn_mul.c
+++ b/openssl/crypto/bn/bn_mul.c
@@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
assert(j <= al || j <= bl);
k = j+j;
t = BN_CTX_get(ctx);
+ if (t == NULL)
+ goto err;
if (al > j || bl > j)
{
- bn_wexpand(t,k*4);
- bn_wexpand(rr,k*4);
+ if (bn_wexpand(t,k*4) == NULL) goto err;
+ if (bn_wexpand(rr,k*4) == NULL) goto err;
bn_mul_part_recursive(rr->d,a->d,b->d,
j,al-j,bl-j,t->d);
}
else /* al <= j || bl <= j */
{
- bn_wexpand(t,k*2);
- bn_wexpand(rr,k*2);
+ if (bn_wexpand(t,k*2) == NULL) goto err;
+ if (bn_wexpand(rr,k*2) == NULL) goto err;
bn_mul_recursive(rr->d,a->d,b->d,
j,al-j,bl-j,t->d);
}
diff --git a/openssl/crypto/bn/bn_opt.c b/openssl/crypto/bn/bn_opt.c
deleted file mode 100644
index 21cbb38f6..000000000
--- a/openssl/crypto/bn/bn_opt.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/* crypto/bn/bn_opt.c */
-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
- * All rights reserved.
- *
- * This package is an SSL implementation written
- * by Eric Young (eay@cryptsoft.com).
- * The implementation was written so as to conform with Netscapes SSL.
- *
- * This library is free for commercial and non-commercial use as long as
- * the following conditions are aheared to. The following conditions
- * apply to all code found in this distribution, be it the RC4, RSA,
- * lhash, DES, etc., code; not just the SSL code. The SSL documentation
- * included with this distribution is covered by the same copyright terms
- * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- *
- * Copyright remains Eric Young's, and as such any Copyright notices in
- * the code are not to be removed.
- * If this package is used in a product, Eric Young should be given attribution
- * as the author of the parts of the library used.
- * This can be in the form of a textual message at program startup or
- * in documentation (online or textual) provided with the package.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * "This product includes cryptographic software written by
- * Eric Young (eay@cryptsoft.com)"
- * The word 'cryptographic' can be left out if the rouines from the library
- * being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from
- * the apps directory (application code) you must include an acknowledgement:
- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- *
- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * The licence and distribution terms for any publically available version or
- * derivative of this code cannot be changed. i.e. this code cannot simply be
- * copied and put under another distribution licence
- * [including the GNU Public Licence.]
- */
-
-#ifndef BN_DEBUG
-# undef NDEBUG /* avoid conflicting definitions */
-# define NDEBUG
-#endif
-
-#include <assert.h>
-#include <limits.h>
-#include <stdio.h>
-#include "cryptlib.h"
-#include "bn_lcl.h"
-
-char *BN_options(void)
- {
- static int init=0;
- static char data[16];
-
- if (!init)
- {
- init++;
-#ifdef BN_LLONG
- BIO_snprintf(data,sizeof data,"bn(%d,%d)",
- (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
-#else
- BIO_snprintf(data,sizeof data,"bn(%d,%d)",
- (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
-#endif
- }
- return(data);
- }
diff --git a/openssl/crypto/bn/bn_print.c b/openssl/crypto/bn/bn_print.c
index 810dde34e..bebb466d0 100644
--- a/openssl/crypto/bn/bn_print.c
+++ b/openssl/crypto/bn/bn_print.c
@@ -294,6 +294,27 @@ err:
return(0);
}
+int BN_asc2bn(BIGNUM **bn, const char *a)
+ {
+ const char *p = a;
+ if (*p == '-')
+ p++;
+
+ if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x'))
+ {
+ if (!BN_hex2bn(bn, p + 2))
+ return 0;
+ }
+ else
+ {
+ if (!BN_dec2bn(bn, p))
+ return 0;
+ }
+ if (*a == '-')
+ (*bn)->neg = 1;
+ return 1;
+ }
+
#ifndef OPENSSL_NO_BIO
#ifndef OPENSSL_NO_FP_API
int BN_print_fp(FILE *fp, const BIGNUM *a)
diff --git a/openssl/crypto/bn/bn_x931p.c b/openssl/crypto/bn/bn_x931p.c
deleted file mode 100644
index 04c5c874e..000000000
--- a/openssl/crypto/bn/bn_x931p.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/* bn_x931p.c */
-/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
- * project 2005.
- */
-/* ====================================================================
- * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- * software must display the following acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- * endorse or promote products derived from this software without
- * prior written permission. For written permission, please contact
- * licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- * nor may "OpenSSL" appear in their names without prior written
- * permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- * acknowledgment:
- * "This product includes software developed by the OpenSSL Project
- * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com). This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com).
- *
- */
-
-#include <stdio.h>
-#include <openssl/bn.h>
-
-/* X9.31 routines for prime derivation */
-
-/* X9.31 prime derivation. This is used to generate the primes pi
- * (p1, p2, q1, q2) from a parameter Xpi by checking successive odd
- * integers.
- */
-
-static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
- BN_GENCB *cb)
- {
- int i = 0;
- if (!BN_copy(pi, Xpi))
- return 0;
- if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
- return 0;
- for(;;)
- {
- i++;
- BN_GENCB_call(cb, 0, i);
- /* NB 27 MR is specificed in X9.31 */
- if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb))
- break;
- if (!BN_add_word(pi, 2))
- return 0;
- }
- BN_GENCB_call(cb, 2, i);
- return 1;
- }
-
-/* This is the main X9.31 prime derivation function. From parameters
- * Xp1, Xp2 and Xp derive the prime p. If the parameters p1 or p2 are
- * not NULL they will be returned too: this is needed for testing.
- */
-
-int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
- const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
- const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb)
- {
- int ret = 0;
-
- BIGNUM *t, *p1p2, *pm1;
-
- /* Only even e supported */
- if (!BN_is_odd(e))
- return 0;
-
- BN_CTX_start(ctx);
- if (!p1)
- p1 = BN_CTX_get(ctx);
-
- if (!p2)
- p2 = BN_CTX_get(ctx);
-
- t = BN_CTX_get(ctx);
-
- p1p2 = BN_CTX_get(ctx);
-
- pm1 = BN_CTX_get(ctx);
-
- if (!bn_x931_derive_pi(p1, Xp1, ctx, cb))
- goto err;
-
- if (!bn_x931_derive_pi(p2, Xp2, ctx, cb))
- goto err;
-
- if (!BN_mul(p1p2, p1, p2, ctx))
- goto err;
-
- /* First set p to value of Rp */
-
- if (!BN_mod_inverse(p, p2, p1, ctx))
- goto err;
-
- if (!BN_mul(p, p, p2, ctx))
- goto err;
-
- if (!BN_mod_inverse(t, p1, p2, ctx))
- goto err;
-
- if (!BN_mul(t, t, p1, ctx))
- goto err;
-
- if (!BN_sub(p, p, t))
- goto err;
-
- if (p->neg && !BN_add(p, p, p1p2))
- goto err;
-
- /* p now equals Rp */
-
- if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
- goto err;
-
- if (!BN_add(p, p, Xp))
- goto err;
-
- /* p now equals Yp0 */
-
- for (;;)
- {
- int i = 1;
- BN_GENCB_call(cb, 0, i++);
- if (!BN_copy(pm1, p))
- goto err;
- if (!BN_sub_word(pm1, 1))
- goto err;
- if (!BN_gcd(t, pm1, e, ctx))
- goto err;
- if (BN_is_one(t)
- /* X9.31 specifies 8 MR and 1 Lucas test or any prime test
- * offering similar or better guarantees 50 MR is considerably
- * better.
- */
- && BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb))
- break;
- if (!BN_add(p, p, p1p2))
- goto err;
- }
-
- BN_GENCB_call(cb, 3, 0);
-
- ret = 1;
-
- err:
-
- BN_CTX_end(ctx);
-
- return ret;
- }
-
-/* Generate pair of paramters Xp, Xq for X9.31 prime generation.
- * Note: nbits paramter is sum of number of bits in both.
- */
-
-int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
- {
- BIGNUM *t;
- int i;
- /* Number of bits for each prime is of the form
- * 512+128s for s = 0, 1, ...
- */
- if ((nbits < 1024) || (nbits & 0xff))
- return 0;
- nbits >>= 1;
- /* The random value Xp must be between sqrt(2) * 2^(nbits-1) and
- * 2^nbits - 1. By setting the top two bits we ensure that the lower
- * bound is exceeded.
- */
- if (!BN_rand(Xp, nbits, 1, 0))
- return 0;
-
- BN_CTX_start(ctx);
- t = BN_CTX_get(ctx);
-
- for (i = 0; i < 1000; i++)
- {
- if (!BN_rand(Xq, nbits, 1, 0))
- return 0;
- /* Check that |Xp - Xq| > 2^(nbits - 100) */
- BN_sub(t, Xp, Xq);
- if (BN_num_bits(t) > (nbits - 100))
- break;
- }
-
- BN_CTX_end(ctx);
-
- if (i < 1000)
- return 1;
-
- return 0;
-
- }
-
-/* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1
- * and Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL
- * the relevant parameter will be stored in it.
- *
- * Due to the fact that |Xp - Xq| > 2^(nbits - 100) must be satisfied Xp and Xq
- * are generated using the previous function and supplied as input.
- */
-
-int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
- BIGNUM *Xp1, BIGNUM *Xp2,
- const BIGNUM *Xp,
- const BIGNUM *e, BN_CTX *ctx,
- BN_GENCB *cb)
- {
- int ret = 0;
-
- BN_CTX_start(ctx);
- if (!Xp1)
- Xp1 = BN_CTX_get(ctx);
- if (!Xp2)
- Xp2 = BN_CTX_get(ctx);
-
- if (!BN_rand(Xp1, 101, 0, 0))
- goto error;
- if (!BN_rand(Xp2, 101, 0, 0))
- goto error;
- if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
- goto error;
-
- ret = 1;
-
- error:
- BN_CTX_end(ctx);
-
- return ret;
-
- }
-
diff --git a/openssl/crypto/bn/bntest.c b/openssl/crypto/bn/bntest.c
index cf190380f..0cd99c5b4 100644
--- a/openssl/crypto/bn/bntest.c
+++ b/openssl/crypto/bn/bntest.c
@@ -486,7 +486,7 @@ static void print_word(BIO *bp,BN_ULONG w)
return;
}
#endif
- BIO_printf(bp,"%lX",w);
+ BIO_printf(bp,BN_HEX_FMT1,w);
}
int test_div_word(BIO *bp)
@@ -732,6 +732,8 @@ int test_mont(BIO *bp, BN_CTX *ctx)
BN_init(&n);
mont=BN_MONT_CTX_new();
+ if (mont == NULL)
+ return 0;
BN_bntest_rand(&a,100,0,0); /**/
BN_bntest_rand(&b,100,0,0); /**/
@@ -1027,7 +1029,7 @@ int test_exp(BIO *bp, BN_CTX *ctx)
BN_bntest_rand(a,20+i*5,0,0); /**/
BN_bntest_rand(b,2+i,0,0); /**/
- if (!BN_exp(d,a,b,ctx))
+ if (BN_exp(d,a,b,ctx) <= 0)
return(0);
if (bp != NULL)
@@ -1116,8 +1118,8 @@ int test_gf2m_mod(BIO *bp)
{
BIGNUM *a,*b[2],*c,*d,*e;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1174,8 +1176,8 @@ int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d,*e,*f,*g,*h;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1245,8 +1247,8 @@ int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1304,8 +1306,8 @@ int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1359,8 +1361,8 @@ int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d,*e,*f;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1422,8 +1424,8 @@ int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d,*e,*f;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1493,8 +1495,8 @@ int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d,*e,*f;
int i, j, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
@@ -1552,8 +1554,8 @@ int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx)
{
BIGNUM *a,*b[2],*c,*d,*e;
int i, j, s = 0, t, ret = 0;
- unsigned int p0[] = {163,7,6,3,0};
- unsigned int p1[] = {193,15,0};
+ int p0[] = {163,7,6,3,0,-1};
+ int p1[] = {193,15,0,-1};
a=BN_new();
b[0]=BN_new();
diff --git a/openssl/crypto/bn/exptest.c b/openssl/crypto/bn/exptest.c
index f598a07cf..074a8e882 100644
--- a/openssl/crypto/bn/exptest.c
+++ b/openssl/crypto/bn/exptest.c
@@ -163,7 +163,7 @@ int main(int argc, char *argv[])
{
if (BN_cmp(r_simple,r_mont) != 0)
printf("\nsimple and mont results differ\n");
- if (BN_cmp(r_simple,r_mont) != 0)
+ if (BN_cmp(r_simple,r_mont_const) != 0)
printf("\nsimple and mont const time results differ\n");
if (BN_cmp(r_simple,r_recp) != 0)
printf("\nsimple and recp results differ\n");
@@ -187,7 +187,7 @@ int main(int argc, char *argv[])
BN_free(b);
BN_free(m);
BN_CTX_free(ctx);
- ERR_remove_state(0);
+ ERR_remove_thread_state(NULL);
CRYPTO_mem_leaks(out);
BIO_free(out);
printf(" done\n");