diff options
| author | marha <marha@users.sourceforge.net> | 2010-03-30 12:36:28 +0000 | 
|---|---|---|
| committer | marha <marha@users.sourceforge.net> | 2010-03-30 12:36:28 +0000 | 
| commit | ff48c0d9098080b51ea12710029135916d117806 (patch) | |
| tree | 96e6af9caf170ba21a1027b24e306a07e27d7b75 /openssl/crypto/bn | |
| parent | bb731f5ac92655c4860a41fa818a7a63005f8369 (diff) | |
| download | vcxsrv-ff48c0d9098080b51ea12710029135916d117806.tar.gz vcxsrv-ff48c0d9098080b51ea12710029135916d117806.tar.bz2 vcxsrv-ff48c0d9098080b51ea12710029135916d117806.zip | |
svn merge -r514:HEAD ^/branches/released .
Diffstat (limited to 'openssl/crypto/bn')
25 files changed, 949 insertions, 1775 deletions
| diff --git a/openssl/crypto/bn/Makefile b/openssl/crypto/bn/Makefile index f5e8f65a4..aabc4f56b 100644 --- a/openssl/crypto/bn/Makefile +++ b/openssl/crypto/bn/Makefile @@ -12,8 +12,6 @@ MAKEFILE=	Makefile  AR=		ar r  BN_ASM=		bn_asm.o -# or use -#BN_ASM=	bn86-elf.o  CFLAGS= $(INCLUDES) $(CFLAG)  ASFLAGS= $(INCLUDES) $(ASFLAG) @@ -28,13 +26,13 @@ LIBSRC=	bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \  	bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \  	bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \  	bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \ -	bn_depr.c bn_x931p.c bn_const.c bn_opt.c +	bn_depr.c bn_const.c  LIBOBJ=	bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \  	bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \  	bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \  	bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \ -	bn_depr.o bn_x931p.o bn_const.o bn_opt.o +	bn_depr.o bn_const.o  SRC= $(LIBSRC) @@ -58,36 +56,25 @@ bnbug: bnbug.c ../../libcrypto.a top  	cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a  lib:	$(LIBOBJ) -	$(ARX) $(LIB) $(LIBOBJ) +	$(AR) $(LIB) $(LIBOBJ)  	$(RANLIB) $(LIB) || echo Never mind.  	@touch lib -# ELF -bn86-elf.s:	asm/bn-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) bn-586.pl elf $(CFLAGS) > ../$@) -co86-elf.s:	asm/co-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) co-586.pl elf $(CFLAGS) > ../$@) -mo86-elf.s:	asm/mo-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) mo-586.pl elf $(CFLAGS) > ../$@) -# COFF -bn86-cof.s: asm/bn-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) bn-586.pl coff $(CFLAGS) > ../$@) -co86-cof.s: asm/co-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) co-586.pl coff $(CFLAGS) > ../$@) -mo86-cof.s: asm/mo-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) mo-586.pl coff $(CFLAGS) > ../$@) -# a.out -bn86-out.s: asm/bn-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) bn-586.pl a.out $(CFLAGS) > ../$@) -co86-out.s: asm/co-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) co-586.pl a.out $(CFLAGS) > ../$@) -mo86-out.s: asm/mo-586.pl ../perlasm/x86asm.pl -	(cd asm; $(PERL) mo-586.pl a.out $(CFLAGS) > ../$@) +bn-586.s:	asm/bn-586.pl ../perlasm/x86asm.pl +	$(PERL) asm/bn-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ +co-586.s:	asm/co-586.pl ../perlasm/x86asm.pl +	$(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ +x86-mont.s:	asm/x86-mont.pl ../perlasm/x86asm.pl +	$(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@  sparcv8.o:	asm/sparcv8.S  	$(CC) $(CFLAGS) -c asm/sparcv8.S -sparcv8plus.o:	asm/sparcv8plus.S -	$(CC) $(CFLAGS) -c asm/sparcv8plus.S +bn-sparcv9.o:	asm/sparcv8plus.S +	$(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S +sparcv9a-mont.s:	asm/sparcv9a-mont.pl +	$(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ +sparcv9-mont.s:		asm/sparcv9-mont.pl +	$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@  bn-mips3.o:	asm/mips3.s  	@if [ "$(CC)" = "gcc" ]; then \ @@ -95,10 +82,13 @@ bn-mips3.o:	asm/mips3.s  		as -$$ABI -O -o $@ asm/mips3.s; \  	else	$(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi +bn-s390x.o:	asm/s390x.S +	$(CC) $(CFLAGS) -c -o $@ asm/s390x.S +  x86_64-gcc.o:	asm/x86_64-gcc.c  	$(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c  x86_64-mont.s:	asm/x86_64-mont.pl -	$(PERL) asm/x86_64-mont.pl $@ +	$(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@  bn-ia64.s:	asm/ia64.S  	$(CC) $(CFLAGS) -E asm/ia64.S > $@ @@ -111,12 +101,14 @@ pa-risc2.o: asm/pa-risc2.s  	/usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s  # ppc - AIX, Linux, MacOS X... -linux_ppc32.s: asm/ppc.pl;	$(PERL) $< $@ -linux_ppc64.s: asm/ppc.pl;	$(PERL) $< $@ -aix_ppc32.s: asm/ppc.pl;	$(PERL) asm/ppc.pl $@ -aix_ppc64.s: asm/ppc.pl;	$(PERL) asm/ppc.pl $@ -osx_ppc32.s: asm/ppc.pl;	$(PERL) $< $@ -osx_ppc64.s: asm/ppc.pl;	$(PERL) $< $@ +bn-ppc.s:	asm/ppc.pl;	$(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@ +ppc-mont.s:	asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@ + +alpha-mont.s:	asm/alpha-mont.pl +	$(PERL) $< | $(CC) -E - | tee $@ > /dev/null + +# GNU make "catch all" +%-mont.s:	asm/%-mont.pl;	$(PERL) $< $(CFLAGS) > $@  files:  	$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -184,8 +176,11 @@ bn_blind.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h  bn_blind.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h  bn_blind.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h  bn_blind.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_blind.c bn_lcl.h -bn_const.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h -bn_const.o: ../../include/openssl/ossl_typ.h bn.h bn_const.c +bn_const.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h +bn_const.o: ../../include/openssl/opensslconf.h +bn_const.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +bn_const.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h +bn_const.o: ../../include/openssl/symhacks.h bn.h bn_const.c  bn_ctx.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h  bn_ctx.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h  bn_ctx.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -292,13 +287,6 @@ bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h  bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h  bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h  bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c -bn_opt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h -bn_opt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h -bn_opt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h -bn_opt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h -bn_opt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h -bn_opt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h -bn_opt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_opt.c  bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h  bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h  bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h @@ -357,6 +345,3 @@ bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h  bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h  bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h  bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c -bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h -bn_x931p.o: ../../include/openssl/opensslconf.h -bn_x931p.o: ../../include/openssl/ossl_typ.h bn_x931p.c diff --git a/openssl/crypto/bn/asm/armv4-mont.pl b/openssl/crypto/bn/asm/armv4-mont.pl index 05d5dc1a4..14e0d2d1d 100644 --- a/openssl/crypto/bn/asm/armv4-mont.pl +++ b/openssl/crypto/bn/asm/armv4-mont.pl @@ -193,6 +193,7 @@ bn_mul_mont:  	bx	lr			@ interoperable with Thumb ISA:-)  .size	bn_mul_mont,.-bn_mul_mont  .asciz	"Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>" +.align	2  ___  $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm;	# make it possible to compile with -march=armv4 diff --git a/openssl/crypto/bn/asm/bn-586.pl b/openssl/crypto/bn/asm/bn-586.pl index 26c2685a7..332ef3e91 100644 --- a/openssl/crypto/bn/asm/bn-586.pl +++ b/openssl/crypto/bn/asm/bn-586.pl @@ -1,6 +1,7 @@  #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm");  require "x86asm.pl";  &asm_init($ARGV[0],$0); @@ -24,38 +25,25 @@ sub bn_mul_add_words  	{  	local($name)=@_; -	&function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); +	&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); -	&comment(""); -	$Low="eax"; -	$High="edx"; -	$a="ebx"; -	$w="ebp"; -	$r="edi"; -	$c="esi"; - -	&xor($c,$c);		# clear carry -	&mov($r,&wparam(0));	# - -	&mov("ecx",&wparam(2));	# -	&mov($a,&wparam(1));	# - -	&and("ecx",0xfffffff8);	# num / 8 -	&mov($w,&wparam(3));	# - -	&push("ecx");		# Up the stack for a tmp variable - -	&jz(&label("maw_finish")); +	$r="eax"; +	$a="edx"; +	$c="ecx";  	if ($sse2) {  		&picmeup("eax","OPENSSL_ia32cap_P");  		&bt(&DWP(0,"eax"),26); -		&jnc(&label("maw_loop")); +		&jnc(&label("maw_non_sse2")); -		&movd("mm0",$w);		# mm0 = w +		&mov($r,&wparam(0)); +		&mov($a,&wparam(1)); +		&mov($c,&wparam(2)); +		&movd("mm0",&wparam(3));	# mm0 = w  		&pxor("mm1","mm1");		# mm1 = carry_in - -		&set_label("maw_sse2_loop",0); +		&jmp(&label("maw_sse2_entry")); +		 +	&set_label("maw_sse2_unrolled",16);  		&movd("mm3",&DWP(0,$r,"",0));	# mm3 = r[0]  		&paddq("mm1","mm3");		# mm1 = carry_in + r[0]  		&movd("mm2",&DWP(0,$a,"",0));	# mm2 = a[0] @@ -112,42 +100,82 @@ sub bn_mul_add_words  		&psrlq("mm1",32);		# mm1 = carry6  		&paddq("mm1","mm3");		# mm1 = carry6 + r[7] + w*a[7]  		&movd(&DWP(28,$r,"",0),"mm1"); -		&add($r,32); +		&lea($r,&DWP(32,$r));  		&psrlq("mm1",32);		# mm1 = carry_out -		&sub("ecx",8); +		&sub($c,8); +		&jz(&label("maw_sse2_exit")); +	&set_label("maw_sse2_entry"); +		&test($c,0xfffffff8); +		&jnz(&label("maw_sse2_unrolled")); + +	&set_label("maw_sse2_loop",4); +		&movd("mm2",&DWP(0,$a));	# mm2 = a[i] +		&movd("mm3",&DWP(0,$r));	# mm3 = r[i] +		&pmuludq("mm2","mm0");		# a[i] *= w +		&lea($a,&DWP(4,$a)); +		&paddq("mm1","mm3");		# carry += r[i] +		&paddq("mm1","mm2");		# carry += a[i]*w +		&movd(&DWP(0,$r),"mm1");	# r[i] = carry_low +		&sub($c,1); +		&psrlq("mm1",32);		# carry = carry_high +		&lea($r,&DWP(4,$r));  		&jnz(&label("maw_sse2_loop")); - -		&movd($c,"mm1");		# c = carry_out +	&set_label("maw_sse2_exit"); +		&movd("eax","mm1");		# c = carry_out  		&emms(); +		&ret(); -		&jmp(&label("maw_finish")); +	&set_label("maw_non_sse2",16);  	} -	&set_label("maw_loop",0); +	# function_begin prologue +	&push("ebp"); +	&push("ebx"); +	&push("esi"); +	&push("edi"); + +	&comment(""); +	$Low="eax"; +	$High="edx"; +	$a="ebx"; +	$w="ebp"; +	$r="edi"; +	$c="esi"; + +	&xor($c,$c);		# clear carry +	&mov($r,&wparam(0));	# + +	&mov("ecx",&wparam(2));	# +	&mov($a,&wparam(1));	# + +	&and("ecx",0xfffffff8);	# num / 8 +	&mov($w,&wparam(3));	# -	&mov(&swtmp(0),"ecx");	# +	&push("ecx");		# Up the stack for a tmp variable + +	&jz(&label("maw_finish")); + +	&set_label("maw_loop",16);  	for ($i=0; $i<32; $i+=4)  		{  		&comment("Round $i"); -		 &mov("eax",&DWP($i,$a,"",0)); 	# *a +		 &mov("eax",&DWP($i,$a)); 	# *a  		&mul($w);			# *a * w -		&add("eax",$c);		# L(t)+= *r -		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r +		&add("eax",$c);			# L(t)+= c  		&adc("edx",0);			# H(t)+=carry -		 &add("eax",$c);		# L(t)+=c +		 &add("eax",&DWP($i,$r));	# L(t)+= *r  		&adc("edx",0);			# H(t)+=carry -		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t); +		 &mov(&DWP($i,$r),"eax");	# *r= L(t);  		&mov($c,"edx");			# c=  H(t);  		}  	&comment(""); -	&mov("ecx",&swtmp(0));	# -	&add($a,32); -	&add($r,32);  	&sub("ecx",8); +	&lea($a,&DWP(32,$a)); +	&lea($r,&DWP(32,$r));  	&jnz(&label("maw_loop"));  	&set_label("maw_finish",0); @@ -160,16 +188,15 @@ sub bn_mul_add_words  	for ($i=0; $i<7; $i++)  		{  		&comment("Tail Round $i"); -		 &mov("eax",&DWP($i*4,$a,"",0));# *a +		 &mov("eax",&DWP($i*4,$a));	# *a  		&mul($w);			# *a * w  		&add("eax",$c);			# L(t)+=c -		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r  		&adc("edx",0);			# H(t)+=carry -		 &add("eax",$c); +		 &add("eax",&DWP($i*4,$r));	# L(t)+= *r  		&adc("edx",0);			# H(t)+=carry  		 &dec("ecx") if ($i != 7-1); -		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t); -		 &mov($c,"edx");			# c=  H(t); +		&mov(&DWP($i*4,$r),"eax");	# *r= L(t); +		 &mov($c,"edx");		# c=  H(t);  		&jz(&label("maw_end")) if ($i != 7-1);  		}  	&set_label("maw_end",0); @@ -184,7 +211,45 @@ sub bn_mul_words  	{  	local($name)=@_; -	&function_begin($name,""); +	&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + +	$r="eax"; +	$a="edx"; +	$c="ecx"; + +	if ($sse2) { +		&picmeup("eax","OPENSSL_ia32cap_P"); +		&bt(&DWP(0,"eax"),26); +		&jnc(&label("mw_non_sse2")); + +		&mov($r,&wparam(0)); +		&mov($a,&wparam(1)); +		&mov($c,&wparam(2)); +		&movd("mm0",&wparam(3));	# mm0 = w +		&pxor("mm1","mm1");		# mm1 = carry = 0 + +	&set_label("mw_sse2_loop",16); +		&movd("mm2",&DWP(0,$a));	# mm2 = a[i] +		&pmuludq("mm2","mm0");		# a[i] *= w +		&lea($a,&DWP(4,$a)); +		&paddq("mm1","mm2");		# carry += a[i]*w +		&movd(&DWP(0,$r),"mm1");	# r[i] = carry_low +		&sub($c,1); +		&psrlq("mm1",32);		# carry = carry_high +		&lea($r,&DWP(4,$r)); +		&jnz(&label("mw_sse2_loop")); + +		&movd("eax","mm1");		# return carry +		&emms(); +		&ret(); +	&set_label("mw_non_sse2",16); +	} + +	# function_begin prologue +	&push("ebp"); +	&push("ebx"); +	&push("esi"); +	&push("edi");  	&comment("");  	$Low="eax"; @@ -257,7 +322,40 @@ sub bn_sqr_words  	{  	local($name)=@_; -	&function_begin($name,""); +	&function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); + +	$r="eax"; +	$a="edx"; +	$c="ecx"; + +	if ($sse2) { +		&picmeup("eax","OPENSSL_ia32cap_P"); +		&bt(&DWP(0,"eax"),26); +		&jnc(&label("sqr_non_sse2")); + +		&mov($r,&wparam(0)); +		&mov($a,&wparam(1)); +		&mov($c,&wparam(2)); + +	&set_label("sqr_sse2_loop",16); +		&movd("mm0",&DWP(0,$a));	# mm0 = a[i] +		&pmuludq("mm0","mm0");		# a[i] *= a[i] +		&lea($a,&DWP(4,$a));		# a++ +		&movq(&QWP(0,$r),"mm0");	# r[i] = a[i]*a[i] +		&sub($c,1); +		&lea($r,&DWP(8,$r));		# r += 2 +		&jnz(&label("sqr_sse2_loop")); + +		&emms(); +		&ret(); +	&set_label("sqr_non_sse2",16); +	} + +	# function_begin prologue +	&push("ebp"); +	&push("ebx"); +	&push("esi"); +	&push("edi");  	&comment("");  	$r="esi"; @@ -313,12 +411,13 @@ sub bn_div_words  	{  	local($name)=@_; -	&function_begin($name,""); +	&function_begin_B($name,"");  	&mov("edx",&wparam(0));	#  	&mov("eax",&wparam(1));	# -	&mov("ebx",&wparam(2));	# -	&div("ebx"); -	&function_end($name); +	&mov("ecx",&wparam(2));	# +	&div("ecx"); +	&ret(); +	&function_end_B($name);  	}  sub bn_add_words diff --git a/openssl/crypto/bn/asm/co-586.pl b/openssl/crypto/bn/asm/co-586.pl index 5d962cb95..57101a6bd 100644 --- a/openssl/crypto/bn/asm/co-586.pl +++ b/openssl/crypto/bn/asm/co-586.pl @@ -1,6 +1,7 @@  #!/usr/local/bin/perl -push(@INC,"perlasm","../../perlasm"); +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC,"${dir}","${dir}../../perlasm");  require "x86asm.pl";  &asm_init($ARGV[0],$0); diff --git a/openssl/crypto/bn/asm/mo-586.pl b/openssl/crypto/bn/asm/mo-586.pl deleted file mode 100644 index 098229309..000000000 --- a/openssl/crypto/bn/asm/mo-586.pl +++ /dev/null @@ -1,603 +0,0 @@ -#!/usr/bin/env perl - -# This is crypto/bn/asm/x86-mont.pl (with asciz from crypto/perlasm/x86asm.pl) -# from OpenSSL 0.9.9-dev  - -sub ::asciz -{ my @str=unpack("C*",shift); -    push @str,0; -    while ($#str>15) { -	&data_byte(@str[0..15]); -	foreach (0..15) { shift @str; } -    } -    &data_byte(@str) if (@str); -} - -# ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see http://www.openssl.org/~appro/cryptogams/. -# ==================================================================== - -# October 2005 -# -# This is a "teaser" code, as it can be improved in several ways... -# First of all non-SSE2 path should be implemented (yes, for now it -# performs Montgomery multiplication/convolution only on SSE2-capable -# CPUs such as P4, others fall down to original code). Then inner loop -# can be unrolled and modulo-scheduled to improve ILP and possibly -# moved to 128-bit XMM register bank (though it would require input -# rearrangement and/or increase bus bandwidth utilization). Dedicated -# squaring procedure should give further performance improvement... -# Yet, for being draft, the code improves rsa512 *sign* benchmark by -# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-) - -# December 2006 -# -# Modulo-scheduling SSE2 loops results in further 15-20% improvement. -# Integer-only code [being equipped with dedicated squaring procedure] -# gives ~40% on rsa512 sign benchmark... - -push(@INC,"perlasm","../../perlasm"); -require "x86asm.pl"; - -&asm_init($ARGV[0],$0); - -$sse2=0; -for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } - -&external_label("OPENSSL_ia32cap_P") if ($sse2); - -&function_begin("bn_mul_mont"); - -$i="edx"; -$j="ecx"; -$ap="esi";	$tp="esi";		# overlapping variables!!! -$rp="edi";	$bp="edi";		# overlapping variables!!! -$np="ebp"; -$num="ebx"; - -$_num=&DWP(4*0,"esp");			# stack top layout -$_rp=&DWP(4*1,"esp"); -$_ap=&DWP(4*2,"esp"); -$_bp=&DWP(4*3,"esp"); -$_np=&DWP(4*4,"esp"); -$_n0=&DWP(4*5,"esp");	$_n0q=&QWP(4*5,"esp"); -$_sp=&DWP(4*6,"esp"); -$_bpend=&DWP(4*7,"esp"); -$frame=32;				# size of above frame rounded up to 16n - -	&xor	("eax","eax"); -	&mov	("edi",&wparam(5));	# int num -	&cmp	("edi",4); -	&jl	(&label("just_leave")); - -	&lea	("esi",&wparam(0));	# put aside pointer to argument block -	&lea	("edx",&wparam(1));	# load ap -	&mov	("ebp","esp");		# saved stack pointer! -	&add	("edi",2);		# extra two words on top of tp -	&neg	("edi"); -	&lea	("esp",&DWP(-$frame,"esp","edi",4));	# alloca($frame+4*(num+2)) -	&neg	("edi"); - -	# minimize cache contention by arraning 2K window between stack -	# pointer and ap argument [np is also position sensitive vector, -	# but it's assumed to be near ap, as it's allocated at ~same -	# time]. -	&mov	("eax","esp"); -	&sub	("eax","edx"); -	&and	("eax",2047); -	&sub	("esp","eax");		# this aligns sp and ap modulo 2048 - -	&xor	("edx","esp"); -	&and	("edx",2048); -	&xor	("edx",2048); -	&sub	("esp","edx");		# this splits them apart modulo 4096 - -	&and	("esp",-64);		# align to cache line - -	################################# load argument block... -	&mov	("eax",&DWP(0*4,"esi"));# BN_ULONG *rp -	&mov	("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap -	&mov	("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp -	&mov	("edx",&DWP(3*4,"esi"));# const BN_ULONG *np -	&mov	("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0 -	#&mov	("edi",&DWP(5*4,"esi"));# int num - -	&mov	("esi",&DWP(0,"esi"));	# pull n0[0] -	&mov	($_rp,"eax");		# ... save a copy of argument block -	&mov	($_ap,"ebx"); -	&mov	($_bp,"ecx"); -	&mov	($_np,"edx"); -	&mov	($_n0,"esi"); -	&lea	($num,&DWP(-3,"edi"));	# num=num-1 to assist modulo-scheduling -	#&mov	($_num,$num);		# redundant as $num is not reused -	&mov	($_sp,"ebp");		# saved stack pointer! - -if($sse2) { -$acc0="mm0";	# mmx register bank layout -$acc1="mm1"; -$car0="mm2"; -$car1="mm3"; -$mul0="mm4"; -$mul1="mm5"; -$temp="mm6"; -$mask="mm7"; - -	&picmeup("eax","OPENSSL_ia32cap_P"); -	&bt	(&DWP(0,"eax"),26); -	&jnc	(&label("non_sse2")); - -	&mov	("eax",-1); -	&movd	($mask,"eax");		# mask 32 lower bits - -	&mov	($ap,$_ap);		# load input pointers -	&mov	($bp,$_bp); -	&mov	($np,$_np); - -	&xor	($i,$i);		# i=0 -	&xor	($j,$j);		# j=0 - -	&movd	($mul0,&DWP(0,$bp));		# bp[0] -	&movd	($mul1,&DWP(0,$ap));		# ap[0] -	&movd	($car1,&DWP(0,$np));		# np[0] - -	&pmuludq($mul1,$mul0);			# ap[0]*bp[0] -	&movq	($car0,$mul1); -	&movq	($acc0,$mul1);			# I wish movd worked for -	&pand	($acc0,$mask);			# inter-register transfers - -	&pmuludq($mul1,$_n0q);			# *=n0 - -	&pmuludq($car1,$mul1);			# "t[0]"*np[0]*n0 -	&paddq	($car1,$acc0); - -	&movd	($acc1,&DWP(4,$np));		# np[1] -	&movd	($acc0,&DWP(4,$ap));		# ap[1] - -	&psrlq	($car0,32); -	&psrlq	($car1,32); - -	&inc	($j);				# j++ -&set_label("1st",16); -	&pmuludq($acc0,$mul0);			# ap[j]*bp[0] -	&pmuludq($acc1,$mul1);			# np[j]*m1 -	&paddq	($car0,$acc0);			# +=c0 -	&paddq	($car1,$acc1);			# +=c1 - -	&movq	($acc0,$car0); -	&pand	($acc0,$mask); -	&movd	($acc1,&DWP(4,$np,$j,4));	# np[j+1] -	&paddq	($car1,$acc0);			# +=ap[j]*bp[0]; -	&movd	($acc0,&DWP(4,$ap,$j,4));	# ap[j+1] -	&psrlq	($car0,32); -	&movd	(&DWP($frame-4,"esp",$j,4),$car1);	# tp[j-1]= -	&psrlq	($car1,32); - -	&lea	($j,&DWP(1,$j)); -	&cmp	($j,$num); -	&jl	(&label("1st")); - -	&pmuludq($acc0,$mul0);			# ap[num-1]*bp[0] -	&pmuludq($acc1,$mul1);			# np[num-1]*m1 -	&paddq	($car0,$acc0);			# +=c0 -	&paddq	($car1,$acc1);			# +=c1 - -	&movq	($acc0,$car0); -	&pand	($acc0,$mask); -	&paddq	($car1,$acc0);			# +=ap[num-1]*bp[0]; -	&movd	(&DWP($frame-4,"esp",$j,4),$car1);	# tp[num-2]= - -	&psrlq	($car0,32); -	&psrlq	($car1,32); - -	&paddq	($car1,$car0); -	&movq	(&QWP($frame,"esp",$num,4),$car1);	# tp[num].tp[num-1] - -	&inc	($i);				# i++ -&set_label("outer"); -	&xor	($j,$j);			# j=0 - -	&movd	($mul0,&DWP(0,$bp,$i,4));	# bp[i] -	&movd	($mul1,&DWP(0,$ap));		# ap[0] -	&movd	($temp,&DWP($frame,"esp"));	# tp[0] -	&movd	($car1,&DWP(0,$np));		# np[0] -	&pmuludq($mul1,$mul0);			# ap[0]*bp[i] - -	&paddq	($mul1,$temp);			# +=tp[0] -	&movq	($acc0,$mul1); -	&movq	($car0,$mul1); -	&pand	($acc0,$mask); - -	&pmuludq($mul1,$_n0q);			# *=n0 - -	&pmuludq($car1,$mul1); -	&paddq	($car1,$acc0); - -	&movd	($temp,&DWP($frame+4,"esp"));	# tp[1] -	&movd	($acc1,&DWP(4,$np));		# np[1] -	&movd	($acc0,&DWP(4,$ap));		# ap[1] - -	&psrlq	($car0,32); -	&psrlq	($car1,32); -	&paddq	($car0,$temp);			# +=tp[1] - -	&inc	($j);				# j++ -	&dec	($num); -&set_label("inner"); -	&pmuludq($acc0,$mul0);			# ap[j]*bp[i] -	&pmuludq($acc1,$mul1);			# np[j]*m1 -	&paddq	($car0,$acc0);			# +=c0 -	&paddq	($car1,$acc1);			# +=c1 - -	&movq	($acc0,$car0); -	&movd	($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1] -	&pand	($acc0,$mask); -	&movd	($acc1,&DWP(4,$np,$j,4));	# np[j+1] -	&paddq	($car1,$acc0);			# +=ap[j]*bp[i]+tp[j] -	&movd	($acc0,&DWP(4,$ap,$j,4));	# ap[j+1] -	&psrlq	($car0,32); -	&movd	(&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]= -	&psrlq	($car1,32); -	&paddq	($car0,$temp);			# +=tp[j+1] - -	&dec	($num); -	&lea	($j,&DWP(1,$j));		# j++ -	&jnz	(&label("inner")); - -	&mov	($num,$j); -	&pmuludq($acc0,$mul0);			# ap[num-1]*bp[i] -	&pmuludq($acc1,$mul1);			# np[num-1]*m1 -	&paddq	($car0,$acc0);			# +=c0 -	&paddq	($car1,$acc1);			# +=c1 - -	&movq	($acc0,$car0); -	&pand	($acc0,$mask); -	&paddq	($car1,$acc0);			# +=ap[num-1]*bp[i]+tp[num-1] -	&movd	(&DWP($frame-4,"esp",$j,4),$car1);	# tp[num-2]= -	&psrlq	($car0,32); -	&psrlq	($car1,32); - -	&movd	($temp,&DWP($frame+4,"esp",$num,4));	# += tp[num] -	&paddq	($car1,$car0); -	&paddq	($car1,$temp); -	&movq	(&QWP($frame,"esp",$num,4),$car1);	# tp[num].tp[num-1] - -	&lea	($i,&DWP(1,$i));		# i++ -	&cmp	($i,$num); -	&jle	(&label("outer")); - -	&emms	();				# done with mmx bank -	&jmp	(&label("common_tail")); - -&set_label("non_sse2",16); -} - -if (0) { -	&mov	("esp",$_sp); -	&xor	("eax","eax");	# signal "not fast enough [yet]" -	&jmp	(&label("just_leave")); -	# While the below code provides competitive performance for -	# all key lengthes on modern Intel cores, it's still more -	# than 10% slower for 4096-bit key elsewhere:-( "Competitive" -	# means compared to the original integer-only assembler. -	# 512-bit RSA sign is better by ~40%, but that's about all -	# one can say about all CPUs... -} else { -$inp="esi";	# integer path uses these registers differently -$word="edi"; -$carry="ebp"; - -	&mov	($inp,$_ap); -	&lea	($carry,&DWP(1,$num)); -	&mov	($word,$_bp); -	&xor	($j,$j);				# j=0 -	&mov	("edx",$inp); -	&and	($carry,1);				# see if num is even -	&sub	("edx",$word);				# see if ap==bp -	&lea	("eax",&DWP(4,$word,$num,4));		# &bp[num] -	&or	($carry,"edx"); -	&mov	($word,&DWP(0,$word));			# bp[0] -	&jz	(&label("bn_sqr_mont")); -	&mov	($_bpend,"eax"); -	&mov	("eax",&DWP(0,$inp)); -	&xor	("edx","edx"); - -&set_label("mull",16); -	&mov	($carry,"edx"); -	&mul	($word);				# ap[j]*bp[0] -	&add	($carry,"eax"); -	&lea	($j,&DWP(1,$j)); -	&adc	("edx",0); -	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j+1] -	&cmp	($j,$num); -	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]= -	&jl	(&label("mull")); - -	&mov	($carry,"edx"); -	&mul	($word);				# ap[num-1]*bp[0] -	 &mov	($word,$_n0); -	&add	("eax",$carry); -	 &mov	($inp,$_np); -	&adc	("edx",0); -	 &imul	($word,&DWP($frame,"esp"));		# n0*tp[0] - -	&mov	(&DWP($frame,"esp",$num,4),"eax");	# tp[num-1]= -	&xor	($j,$j); -	&mov	(&DWP($frame+4,"esp",$num,4),"edx");	# tp[num]= -	&mov	(&DWP($frame+8,"esp",$num,4),$j);	# tp[num+1]= - -	&mov	("eax",&DWP(0,$inp));			# np[0] -	&mul	($word);				# np[0]*m -	&add	("eax",&DWP($frame,"esp"));		# +=tp[0] -	&mov	("eax",&DWP(4,$inp));			# np[1] -	&adc	("edx",0); -	&inc	($j); - -	&jmp	(&label("2ndmadd")); - -&set_label("1stmadd",16); -	&mov	($carry,"edx"); -	&mul	($word);				# ap[j]*bp[i] -	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j] -	&lea	($j,&DWP(1,$j)); -	&adc	("edx",0); -	&add	($carry,"eax"); -	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j+1] -	&adc	("edx",0); -	&cmp	($j,$num); -	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]= -	&jl	(&label("1stmadd")); - -	&mov	($carry,"edx"); -	&mul	($word);				# ap[num-1]*bp[i] -	&add	("eax",&DWP($frame,"esp",$num,4));	# +=tp[num-1] -	 &mov	($word,$_n0); -	&adc	("edx",0); -	 &mov	($inp,$_np); -	&add	($carry,"eax"); -	&adc	("edx",0); -	 &imul	($word,&DWP($frame,"esp"));		# n0*tp[0] - -	&xor	($j,$j); -	&add	("edx",&DWP($frame+4,"esp",$num,4));	# carry+=tp[num] -	&mov	(&DWP($frame,"esp",$num,4),$carry);	# tp[num-1]= -	&adc	($j,0); -	 &mov	("eax",&DWP(0,$inp));			# np[0] -	&mov	(&DWP($frame+4,"esp",$num,4),"edx");	# tp[num]= -	&mov	(&DWP($frame+8,"esp",$num,4),$j);	# tp[num+1]= - -	&mul	($word);				# np[0]*m -	&add	("eax",&DWP($frame,"esp"));		# +=tp[0] -	&mov	("eax",&DWP(4,$inp));			# np[1] -	&adc	("edx",0); -	&mov	($j,1); - -&set_label("2ndmadd",16); -	&mov	($carry,"edx"); -	&mul	($word);				# np[j]*m -	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j] -	&lea	($j,&DWP(1,$j)); -	&adc	("edx",0); -	&add	($carry,"eax"); -	&mov	("eax",&DWP(0,$inp,$j,4));		# np[j+1] -	&adc	("edx",0); -	&cmp	($j,$num); -	&mov	(&DWP($frame-8,"esp",$j,4),$carry);	# tp[j-1]= -	&jl	(&label("2ndmadd")); - -	&mov	($carry,"edx"); -	&mul	($word);				# np[j]*m -	&add	($carry,&DWP($frame,"esp",$num,4));	# +=tp[num-1] -	&adc	("edx",0); -	&add	($carry,"eax"); -	&adc	("edx",0); -	&mov	(&DWP($frame-4,"esp",$num,4),$carry);	# tp[num-2]= - -	&xor	("eax","eax"); -	 &mov	($j,$_bp);				# &bp[i] -	&add	("edx",&DWP($frame+4,"esp",$num,4));	# carry+=tp[num] -	&adc	("eax",&DWP($frame+8,"esp",$num,4));	# +=tp[num+1] -	 &lea	($j,&DWP(4,$j)); -	&mov	(&DWP($frame,"esp",$num,4),"edx");	# tp[num-1]= -	 &cmp	($j,$_bpend); -	&mov	(&DWP($frame+4,"esp",$num,4),"eax");	# tp[num]= -	&je	(&label("common_tail")); - -	&mov	($word,&DWP(0,$j));			# bp[i+1] -	&mov	($inp,$_ap); -	&mov	($_bp,$j);				# &bp[++i] -	&xor	($j,$j); -	&xor	("edx","edx"); -	&mov	("eax",&DWP(0,$inp)); -	&jmp	(&label("1stmadd")); - -&set_label("bn_sqr_mont",16); -$sbit=$num; -	&mov	($_num,$num); -	&mov	($_bp,$j);				# i=0 - -	&mov	("eax",$word);				# ap[0] -	&mul	($word);				# ap[0]*ap[0] -	&mov	(&DWP($frame,"esp"),"eax");		# tp[0]= -	&mov	($sbit,"edx"); -	&shr	("edx",1); -	&and	($sbit,1); -	&inc	($j); -&set_label("sqr",16); -	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j] -	&mov	($carry,"edx"); -	&mul	($word);				# ap[j]*ap[0] -	&add	("eax",$carry); -	&lea	($j,&DWP(1,$j)); -	&adc	("edx",0); -	&lea	($carry,&DWP(0,$sbit,"eax",2)); -	&shr	("eax",31); -	&cmp	($j,$_num); -	&mov	($sbit,"eax"); -	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]= -	&jl	(&label("sqr")); - -	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[num-1] -	&mov	($carry,"edx"); -	&mul	($word);				# ap[num-1]*ap[0] -	&add	("eax",$carry); -	 &mov	($word,$_n0); -	&adc	("edx",0); -	 &mov	($inp,$_np); -	&lea	($carry,&DWP(0,$sbit,"eax",2)); -	 &imul	($word,&DWP($frame,"esp"));		# n0*tp[0] -	&shr	("eax",31); -	&mov	(&DWP($frame,"esp",$j,4),$carry);	# tp[num-1]= - -	&lea	($carry,&DWP(0,"eax","edx",2)); -	 &mov	("eax",&DWP(0,$inp));			# np[0] -	&shr	("edx",31); -	&mov	(&DWP($frame+4,"esp",$j,4),$carry);	# tp[num]= -	&mov	(&DWP($frame+8,"esp",$j,4),"edx");	# tp[num+1]= - -	&mul	($word);				# np[0]*m -	&add	("eax",&DWP($frame,"esp"));		# +=tp[0] -	&mov	($num,$j); -	&adc	("edx",0); -	&mov	("eax",&DWP(4,$inp));			# np[1] -	&mov	($j,1); - -&set_label("3rdmadd",16); -	&mov	($carry,"edx"); -	&mul	($word);				# np[j]*m -	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j] -	&adc	("edx",0); -	&add	($carry,"eax"); -	&mov	("eax",&DWP(4,$inp,$j,4));		# np[j+1] -	&adc	("edx",0); -	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j-1]= - -	&mov	($carry,"edx"); -	&mul	($word);				# np[j+1]*m -	&add	($carry,&DWP($frame+4,"esp",$j,4));	# +=tp[j+1] -	&lea	($j,&DWP(2,$j)); -	&adc	("edx",0); -	&add	($carry,"eax"); -	&mov	("eax",&DWP(0,$inp,$j,4));		# np[j+2] -	&adc	("edx",0); -	&cmp	($j,$num); -	&mov	(&DWP($frame-8,"esp",$j,4),$carry);	# tp[j]= -	&jl	(&label("3rdmadd")); - -	&mov	($carry,"edx"); -	&mul	($word);				# np[j]*m -	&add	($carry,&DWP($frame,"esp",$num,4));	# +=tp[num-1] -	&adc	("edx",0); -	&add	($carry,"eax"); -	&adc	("edx",0); -	&mov	(&DWP($frame-4,"esp",$num,4),$carry);	# tp[num-2]= - -	&mov	($j,$_bp);				# i -	&xor	("eax","eax"); -	&mov	($inp,$_ap); -	&add	("edx",&DWP($frame+4,"esp",$num,4));	# carry+=tp[num] -	&adc	("eax",&DWP($frame+8,"esp",$num,4));	# +=tp[num+1] -	&mov	(&DWP($frame,"esp",$num,4),"edx");	# tp[num-1]= -	&cmp	($j,$num); -	&mov	(&DWP($frame+4,"esp",$num,4),"eax");	# tp[num]= -	&je	(&label("common_tail")); - -	&mov	($word,&DWP(4,$inp,$j,4));		# ap[i] -	&lea	($j,&DWP(1,$j)); -	&mov	("eax",$word); -	&mov	($_bp,$j);				# ++i -	&mul	($word);				# ap[i]*ap[i] -	&add	("eax",&DWP($frame,"esp",$j,4));	# +=tp[i] -	&adc	("edx",0); -	&mov	(&DWP($frame,"esp",$j,4),"eax");	# tp[i]= -	&xor	($carry,$carry); -	&cmp	($j,$num); -	&lea	($j,&DWP(1,$j)); -	&je	(&label("sqrlast")); - -	&mov	($sbit,"edx");				# zaps $num -	&shr	("edx",1); -	&and	($sbit,1); -&set_label("sqradd",16); -	&mov	("eax",&DWP(0,$inp,$j,4));		# ap[j] -	&mov	($carry,"edx"); -	&mul	($word);				# ap[j]*ap[i] -	&add	("eax",$carry); -	&lea	($carry,&DWP(0,"eax","eax")); -	&adc	("edx",0); -	&shr	("eax",31); -	&add	($carry,&DWP($frame,"esp",$j,4));	# +=tp[j] -	&lea	($j,&DWP(1,$j)); -	&adc	("eax",0); -	&add	($carry,$sbit); -	&adc	("eax",0); -	&cmp	($j,$_num); -	&mov	(&DWP($frame-4,"esp",$j,4),$carry);	# tp[j]= -	&mov	($sbit,"eax"); -	&jle	(&label("sqradd")); - -	&mov	($carry,"edx"); -	&lea	("edx",&DWP(0,$sbit,"edx",2)); -	&shr	($carry,31); -&set_label("sqrlast"); -	&mov	($word,$_n0); -	&mov	($inp,$_np); -	&imul	($word,&DWP($frame,"esp"));		# n0*tp[0] - -	&add	("edx",&DWP($frame,"esp",$j,4));	# +=tp[num] -	&mov	("eax",&DWP(0,$inp));			# np[0] -	&adc	($carry,0); -	&mov	(&DWP($frame,"esp",$j,4),"edx");	# tp[num]= -	&mov	(&DWP($frame+4,"esp",$j,4),$carry);	# tp[num+1]= - -	&mul	($word);				# np[0]*m -	&add	("eax",&DWP($frame,"esp"));		# +=tp[0] -	&lea	($num,&DWP(-1,$j)); -	&adc	("edx",0); -	&mov	($j,1); -	&mov	("eax",&DWP(4,$inp));			# np[1] - -	&jmp	(&label("3rdmadd")); -} - -&set_label("common_tail",16); -	&mov	($np,$_np);			# load modulus pointer -	&mov	($rp,$_rp);			# load result pointer -	&lea	($tp,&DWP($frame,"esp"));	# [$ap and $bp are zapped] - -	&mov	("eax",&DWP(0,$tp));		# tp[0] -	&mov	($j,$num);			# j=num-1 -	&xor	($i,$i);			# i=0 and clear CF! - -&set_label("sub",16); -	&sbb	("eax",&DWP(0,$np,$i,4)); -	&mov	(&DWP(0,$rp,$i,4),"eax");	# rp[i]=tp[i]-np[i] -	&dec	($j);				# doesn't affect CF! -	&mov	("eax",&DWP(4,$tp,$i,4));	# tp[i+1] -	&lea	($i,&DWP(1,$i));		# i++ -	&jge	(&label("sub")); - -	&sbb	("eax",0);			# handle upmost overflow bit -	&and	($tp,"eax"); -	¬	("eax"); -	&mov	($np,$rp); -	&and	($np,"eax"); -	&or	($tp,$np);			# tp=carry?tp:rp - -&set_label("copy",16);				# copy or in-place refresh -	&mov	("eax",&DWP(0,$tp,$num,4)); -	&mov	(&DWP(0,$rp,$num,4),"eax");	# rp[i]=tp[i] -	&mov	(&DWP($frame,"esp",$num,4),$j);	# zap temporary vector -	&dec	($num); -	&jge	(&label("copy")); - -	&mov	("esp",$_sp);		# pull saved stack pointer -	&mov	("eax",1); -&set_label("just_leave"); -&function_end("bn_mul_mont"); - -&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); - -&asm_finish(); diff --git a/openssl/crypto/bn/asm/ppc.pl b/openssl/crypto/bn/asm/ppc.pl index 08e005347..37c65d351 100644 --- a/openssl/crypto/bn/asm/ppc.pl +++ b/openssl/crypto/bn/asm/ppc.pl @@ -100,9 +100,9 @@  #	me a note at schari@us.ibm.com  # -$opf = shift; +$flavour = shift; -if ($opf =~ /32\.s/) { +if ($flavour =~ /32/) {  	$BITS=	32;  	$BNSZ=	$BITS/8;  	$ISA=	"\"ppc\""; @@ -125,7 +125,7 @@ if ($opf =~ /32\.s/) {  	$INSR=	"insrwi";	# insert right  	$ROTL=	"rotlwi";	# rotate left by immediate  	$TR=	"tw";		# conditional trap -} elsif ($opf =~ /64\.s/) { +} elsif ($flavour =~ /64/) {  	$BITS=	64;  	$BNSZ=	$BITS/8;  	$ISA=	"\"ppc64\""; @@ -149,93 +149,16 @@ if ($opf =~ /32\.s/) {  	$INSR=	"insrdi";	# insert right   	$ROTL=	"rotldi";	# rotate left by immediate  	$TR=	"td";		# conditional trap -} else { die "nonsense $opf"; } +} else { die "nonsense $flavour"; } -( defined shift || open STDOUT,">$opf" ) || die "can't open $opf: $!"; +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +die "can't locate ppc-xlate.pl"; -# function entry points from the AIX code -# -# There are other, more elegant, ways to handle this. We (IBM) chose -# this approach as it plays well with scripts we run to 'namespace' -# OpenSSL .i.e. we add a prefix to all the public symbols so we can -# co-exist in the same process with other implementations of OpenSSL. -# 'cleverer' ways of doing these substitutions tend to hide data we -# need to be obvious. -# -my @items = ("bn_sqr_comba4", -	     "bn_sqr_comba8", -	     "bn_mul_comba4", -	     "bn_mul_comba8", -	     "bn_sub_words", -	     "bn_add_words", -	     "bn_div_words", -	     "bn_sqr_words", -	     "bn_mul_words", -	     "bn_mul_add_words"); +open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; -if    ($opf =~ /linux/)	{  do_linux();	} -elsif ($opf =~ /aix/)	{  do_aix();	} -elsif ($opf =~ /osx/)	{  do_osx();	} -else			{  do_bsd();	} - -sub do_linux { -    $d=&data(); - -    if ($BITS==64) { -      foreach $t (@items) { -        $d =~ s/\.$t:/\ -\t.section\t".opd","aw"\ -\t.align\t3\ -\t.globl\t$t\ -$t:\ -\t.quad\t.$t,.TOC.\@tocbase,0\ -\t.size\t$t,24\ -\t.previous\n\ -\t.type\t.$t,\@function\ -\t.globl\t.$t\ -.$t:/g; -      } -    } -    else { -      foreach $t (@items) { -        $d=~s/\.$t/$t/g; -      } -    } -    # hide internal labels to avoid pollution of name table... -    $d=~s/Lppcasm_/.Lppcasm_/gm; -    print $d; -} - -sub do_aix { -    # AIX assembler is smart enough to please the linker without -    # making us do something special... -    print &data(); -} - -# MacOSX 32 bit -sub do_osx { -    $d=&data(); -    # Change the bn symbol prefix from '.' to '_' -    foreach $t (@items) { -      $d=~s/\.$t/_$t/g; -    } -    # Change .machine to something OS X asm will accept -    $d=~s/\.machine.*/.text/g; -    $d=~s/\#/;/g; # change comment from '#' to ';' -    print $d; -} - -# BSD (Untested) -sub do_bsd { -    $d=&data(); -    foreach $t (@items) { -      $d=~s/\.$t/_$t/g; -    } -    print $d; -} - -sub data { -	local($data)=<<EOF; +$data=<<EOF;  #--------------------------------------------------------------------  #  # @@ -297,33 +220,20 @@ sub data {  #  #	Defines to be used in the assembly code.  #	 -.set r0,0	# we use it as storage for value of 0 -.set SP,1	# preserved -.set RTOC,2	# preserved  -.set r3,3	# 1st argument/return value -.set r4,4	# 2nd argument/volatile register -.set r5,5	# 3rd argument/volatile register -.set r6,6	# ... -.set r7,7 -.set r8,8 -.set r9,9 -.set r10,10 -.set r11,11 -.set r12,12 -.set r13,13	# not used, nor any other "below" it... - -.set BO_IF_NOT,4 -.set BO_IF,12 -.set BO_dCTR_NZERO,16 -.set BO_dCTR_ZERO,18 -.set BO_ALWAYS,20 -.set CR0_LT,0; -.set CR0_GT,1; -.set CR0_EQ,2 -.set CR1_FX,4; -.set CR1_FEX,5; -.set CR1_VX,6 -.set LR,8 +#.set r0,0	# we use it as storage for value of 0 +#.set SP,1	# preserved +#.set RTOC,2	# preserved  +#.set r3,3	# 1st argument/return value +#.set r4,4	# 2nd argument/volatile register +#.set r5,5	# 3rd argument/volatile register +#.set r6,6	# ... +#.set r7,7 +#.set r8,8 +#.set r9,9 +#.set r10,10 +#.set r11,11 +#.set r12,12 +#.set r13,13	# not used, nor any other "below" it...  #	Declare function names to be global  #	NOTE:	For gcc these names MUST be changed to remove @@ -344,7 +254,7 @@ sub data {  # .text section -	.machine	$ISA +	.machine	"any"  #  #	NOTE:	The following label name should be changed to @@ -478,7 +388,7 @@ sub data {  	$ST		r9,`6*$BNSZ`(r3)	#r[6]=c1  	$ST		r10,`7*$BNSZ`(r3)	#r[7]=c2 -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  # @@ -903,7 +813,7 @@ sub data {  	$ST		r9, `15*$BNSZ`(r3)	#r[15]=c1; -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000 @@ -1055,7 +965,7 @@ sub data {  	$ST	r10,`6*$BNSZ`(r3)	#r[6]=c1  	$ST	r11,`7*$BNSZ`(r3)	#r[7]=c2 -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  # @@ -1591,7 +1501,7 @@ sub data {  	adde	r10,r10,r9  	$ST	r12,`14*$BNSZ`(r3)	#r[14]=c3;  	$ST	r10,`15*$BNSZ`(r3)	#r[15]=c1; -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  # @@ -1623,7 +1533,7 @@ sub data {  	subfc.	r7,r0,r6        # If r6 is 0 then result is 0.  				# if r6 > 0 then result !=0  				# In either case carry bit is set. -	bc	BO_IF,CR0_EQ,Lppcasm_sub_adios +	beq	Lppcasm_sub_adios  	addi	r4,r4,-$BNSZ  	addi	r3,r3,-$BNSZ  	addi	r5,r5,-$BNSZ @@ -1635,11 +1545,11 @@ Lppcasm_sub_mainloop:  				# if carry = 1 this is r7-r8. Else it  				# is r7-r8 -1 as we need.  	$STU	r6,$BNSZ(r3) -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_sub_mainloop +	bdnz-	Lppcasm_sub_mainloop  Lppcasm_sub_adios:	  	subfze	r3,r0		# if carry bit is set then r3 = 0 else -1  	andi.	r3,r3,1         # keep only last bit. -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000 @@ -1670,7 +1580,7 @@ Lppcasm_sub_adios:  #	check for r6 = 0. Is this needed?  #  	addic.	r6,r6,0		#test r6 and clear carry bit. -	bc	BO_IF,CR0_EQ,Lppcasm_add_adios +	beq	Lppcasm_add_adios  	addi	r4,r4,-$BNSZ  	addi	r3,r3,-$BNSZ  	addi	r5,r5,-$BNSZ @@ -1680,10 +1590,10 @@ Lppcasm_add_mainloop:  	$LDU	r8,$BNSZ(r5)  	adde	r8,r7,r8  	$STU	r8,$BNSZ(r3) -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_add_mainloop +	bdnz-	Lppcasm_add_mainloop  Lppcasm_add_adios:	  	addze	r3,r0			#return carry bit. -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  # @@ -1707,24 +1617,24 @@ Lppcasm_add_adios:  #	r5 = d  	$UCMPI	0,r5,0			# compare r5 and 0 -	bc	BO_IF_NOT,CR0_EQ,Lppcasm_div1	# proceed if d!=0 +	bne	Lppcasm_div1		# proceed if d!=0  	li	r3,-1			# d=0 return -1 -	bclr	BO_ALWAYS,CR0_LT	 +	blr  Lppcasm_div1:  	xor	r0,r0,r0		#r0=0  	li	r8,$BITS  	$CNTLZ.	r7,r5			#r7 = num leading 0s in d. -	bc	BO_IF,CR0_EQ,Lppcasm_div2	#proceed if no leading zeros +	beq	Lppcasm_div2		#proceed if no leading zeros  	subf	r8,r7,r8		#r8 = BN_num_bits_word(d)  	$SHR.	r9,r3,r8		#are there any bits above r8'th?  	$TR	16,r9,r0		#if there're, signal to dump core...  Lppcasm_div2:  	$UCMP	0,r3,r5			#h>=d? -	bc	BO_IF,CR0_LT,Lppcasm_div3	#goto Lppcasm_div3 if not +	blt	Lppcasm_div3		#goto Lppcasm_div3 if not  	subf	r3,r5,r3		#h-=d ;   Lppcasm_div3:				#r7 = BN_BITS2-i. so r7=i  	cmpi	0,0,r7,0		# is (i == 0)? -	bc	BO_IF,CR0_EQ,Lppcasm_div4 +	beq	Lppcasm_div4  	$SHL	r3,r3,r7		# h = (h<< i)  	$SHR	r8,r4,r8		# r8 = (l >> BN_BITS2 -i)  	$SHL	r5,r5,r7		# d<<=i @@ -1741,7 +1651,7 @@ Lppcasm_divouterloop:  	$SHRI	r11,r4,`$BITS/2`	#r11= (l&BN_MASK2h)>>BN_BITS4  					# compute here for innerloop.  	$UCMP	0,r8,r9			# is (h>>BN_BITS4)==dh -	bc	BO_IF_NOT,CR0_EQ,Lppcasm_div5	# goto Lppcasm_div5 if not +	bne	Lppcasm_div5		# goto Lppcasm_div5 if not  	li	r8,-1  	$CLRU	r8,r8,`$BITS/2`		#q = BN_MASK2l  @@ -1762,9 +1672,9 @@ Lppcasm_divinnerloop:  					# the following 2 instructions do that  	$SHLI	r7,r10,`$BITS/2`	# r7 = (t<<BN_BITS4)  	or	r7,r7,r11		# r7|=((l&BN_MASK2h)>>BN_BITS4) -	$UCMP	1,r6,r7			# compare (tl <= r7) -	bc	BO_IF_NOT,CR0_EQ,Lppcasm_divinnerexit -	bc	BO_IF_NOT,CR1_FEX,Lppcasm_divinnerexit +	$UCMP	cr1,r6,r7		# compare (tl <= r7) +	bne	Lppcasm_divinnerexit +	ble	cr1,Lppcasm_divinnerexit  	addi	r8,r8,-1		#q--  	subf	r12,r9,r12		#th -=dh  	$CLRU	r10,r5,`$BITS/2`	#r10=dl. t is no longer needed in loop. @@ -1773,14 +1683,14 @@ Lppcasm_divinnerloop:  Lppcasm_divinnerexit:  	$SHRI	r10,r6,`$BITS/2`	#t=(tl>>BN_BITS4)  	$SHLI	r11,r6,`$BITS/2`	#tl=(tl<<BN_BITS4)&BN_MASK2h; -	$UCMP	1,r4,r11		# compare l and tl +	$UCMP	cr1,r4,r11		# compare l and tl  	add	r12,r12,r10		# th+=t -	bc	BO_IF_NOT,CR1_FX,Lppcasm_div7  # if (l>=tl) goto Lppcasm_div7 +	bge	cr1,Lppcasm_div7	# if (l>=tl) goto Lppcasm_div7  	addi	r12,r12,1		# th++  Lppcasm_div7:  	subf	r11,r11,r4		#r11=l-tl -	$UCMP	1,r3,r12		#compare h and th -	bc	BO_IF_NOT,CR1_FX,Lppcasm_div8	#if (h>=th) goto Lppcasm_div8 +	$UCMP	cr1,r3,r12		#compare h and th +	bge	cr1,Lppcasm_div8	#if (h>=th) goto Lppcasm_div8  	addi	r8,r8,-1		# q--  	add	r3,r5,r3		# h+=d  Lppcasm_div8: @@ -1791,12 +1701,12 @@ Lppcasm_div8:  					# the following 2 instructions will do this.  	$INSR	r11,r12,`$BITS/2`,`$BITS/2`	# r11 is the value we want rotated $BITS/2.  	$ROTL	r3,r11,`$BITS/2`	# rotate by $BITS/2 and store in r3 -	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_div9#if (count==0) break ; +	bdz	Lppcasm_div9		#if (count==0) break ;  	$SHLI	r0,r8,`$BITS/2`		#ret =q<<BN_BITS4  	b	Lppcasm_divouterloop  Lppcasm_div9:  	or	r3,r8,r0 -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  # @@ -1822,7 +1732,7 @@ Lppcasm_div9:  #	No unrolling done here. Not performance critical.  	addic.	r5,r5,0			#test r5. -	bc	BO_IF,CR0_EQ,Lppcasm_sqr_adios +	beq	Lppcasm_sqr_adios  	addi	r4,r4,-$BNSZ  	addi	r3,r3,-$BNSZ  	mtctr	r5 @@ -1833,9 +1743,9 @@ Lppcasm_sqr_mainloop:  	$UMULH  r8,r6,r6  	$STU	r7,$BNSZ(r3)  	$STU	r8,$BNSZ(r3) -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_sqr_mainloop +	bdnz-	Lppcasm_sqr_mainloop  Lppcasm_sqr_adios:	 -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000 @@ -1858,7 +1768,7 @@ Lppcasm_sqr_adios:  	xor	r0,r0,r0  	xor	r12,r12,r12		# used for carry  	rlwinm.	r7,r5,30,2,31		# num >> 2 -	bc	BO_IF,CR0_EQ,Lppcasm_mw_REM +	beq	Lppcasm_mw_REM  	mtctr	r7  Lppcasm_mw_LOOP:	  					#mul(rp[0],ap[0],w,c1); @@ -1896,11 +1806,11 @@ Lppcasm_mw_LOOP:  	addi	r3,r3,`4*$BNSZ`  	addi	r4,r4,`4*$BNSZ` -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_mw_LOOP +	bdnz-	Lppcasm_mw_LOOP  Lppcasm_mw_REM:  	andi.	r5,r5,0x3 -	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER +	beq	Lppcasm_mw_OVER  					#mul(rp[0],ap[0],w,c1);  	$LD	r8,`0*$BNSZ`(r4)  	$UMULL	r9,r6,r8 @@ -1912,7 +1822,7 @@ Lppcasm_mw_REM:  	addi	r5,r5,-1  	cmpli	0,0,r5,0 -	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER +	beq	Lppcasm_mw_OVER  					#mul(rp[1],ap[1],w,c1); @@ -1926,7 +1836,7 @@ Lppcasm_mw_REM:  	addi	r5,r5,-1  	cmpli	0,0,r5,0 -	bc	BO_IF,CR0_EQ,Lppcasm_mw_OVER +	beq	Lppcasm_mw_OVER  					#mul_add(rp[2],ap[2],w,c1);  	$LD	r8,`2*$BNSZ`(r4) @@ -1939,7 +1849,7 @@ Lppcasm_mw_REM:  Lppcasm_mw_OVER:	  	addi	r3,r12,0 -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  # @@ -1964,7 +1874,7 @@ Lppcasm_mw_OVER:  	xor	r0,r0,r0		#r0 = 0  	xor	r12,r12,r12  		#r12 = 0 . used for carry		  	rlwinm.	r7,r5,30,2,31		# num >> 2 -	bc	BO_IF,CR0_EQ,Lppcasm_maw_leftover	# if (num < 4) go LPPCASM_maw_leftover +	beq	Lppcasm_maw_leftover	# if (num < 4) go LPPCASM_maw_leftover  	mtctr	r7  Lppcasm_maw_mainloop:	  					#mul_add(rp[0],ap[0],w,c1); @@ -2017,11 +1927,11 @@ Lppcasm_maw_mainloop:  	$ST	r11,`3*$BNSZ`(r3)  	addi	r3,r3,`4*$BNSZ`  	addi	r4,r4,`4*$BNSZ` -	bc	BO_dCTR_NZERO,CR0_EQ,Lppcasm_maw_mainloop +	bdnz-	Lppcasm_maw_mainloop  Lppcasm_maw_leftover:  	andi.	r5,r5,0x3 -	bc	BO_IF,CR0_EQ,Lppcasm_maw_adios +	beq	Lppcasm_maw_adios  	addi	r3,r3,-$BNSZ  	addi	r4,r4,-$BNSZ  					#mul_add(rp[0],ap[0],w,c1); @@ -2036,7 +1946,7 @@ Lppcasm_maw_leftover:  	addze	r12,r10  	$ST	r9,0(r3) -	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios +	bdz	Lppcasm_maw_adios  					#mul_add(rp[1],ap[1],w,c1);  	$LDU	r8,$BNSZ(r4)	  	$UMULL	r9,r6,r8 @@ -2048,7 +1958,7 @@ Lppcasm_maw_leftover:  	addze	r12,r10  	$ST	r9,0(r3) -	bc	BO_dCTR_ZERO,CR0_EQ,Lppcasm_maw_adios +	bdz	Lppcasm_maw_adios  					#mul_add(rp[2],ap[2],w,c1);  	$LDU	r8,$BNSZ(r4)  	$UMULL	r9,r6,r8 @@ -2062,17 +1972,10 @@ Lppcasm_maw_leftover:  Lppcasm_maw_adios:	  	addi	r3,r12,0 -	bclr	BO_ALWAYS,CR0_LT +	blr  	.long	0x00000000  	.align	4  EOF -	$data =~ s/\`([^\`]*)\`/eval $1/gem; - -	# if some assembler chokes on some simplified mnemonic, -	# this is the spot to fix it up, e.g.: -	# GNU as doesn't seem to accept cmplw, 32-bit unsigned compare -	$data =~ s/^(\s*)cmplw(\s+)([^,]+),(.*)/$1cmpl$2$3,0,$4/gm; -	# assembler X doesn't accept li, load immediate value -	#$data =~ s/^(\s*)li(\s+)([^,]+),(.*)/$1addi$2$3,0,$4/gm; -	return($data); -} +$data =~ s/\`([^\`]*)\`/eval $1/gem; +print $data; +close STDOUT; diff --git a/openssl/crypto/bn/asm/sparcv8plus.S b/openssl/crypto/bn/asm/sparcv8plus.S index 8c56e2e7e..63de1860f 100644 --- a/openssl/crypto/bn/asm/sparcv8plus.S +++ b/openssl/crypto/bn/asm/sparcv8plus.S @@ -144,6 +144,19 @@   *	    }   */ +#if defined(__SUNPRO_C) && defined(__sparcv9) +  /* They've said -xarch=v9 at command line */ +  .register	%g2,#scratch +  .register	%g3,#scratch +# define	FRAME_SIZE	-192 +#elif defined(__GNUC__) && defined(__arch64__) +  /* They've said -m64 at command line */ +  .register	%g2,#scratch +  .register	%g3,#scratch +# define	FRAME_SIZE	-192 +#else  +# define	FRAME_SIZE	-96 +#endif   /*   * GNU assembler can't stand stuw:-(   */ @@ -619,8 +632,6 @@ bn_sub_words:   *							Andy.   */ -#define FRAME_SIZE	-96 -  /*   * Here is register usage map for *all* routines below.   */ diff --git a/openssl/crypto/bn/asm/x86_64-gcc.c b/openssl/crypto/bn/asm/x86_64-gcc.c index f13f52dd8..acb0b4011 100644 --- a/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/openssl/crypto/bn/asm/x86_64-gcc.c @@ -1,4 +1,5 @@ -#ifdef __SUNPRO_C +#include "../bn_lcl.h" +#if !(defined(__GNUC__) && __GNUC__>=2)  # include "../bn_asm.c"	/* kind of dirty hack for Sun Studio */  #else  /* @@ -54,7 +55,15 @@   *    machine.   */ +#ifdef _WIN64 +#define BN_ULONG unsigned long long +#else  #define BN_ULONG unsigned long +#endif + +#undef mul +#undef mul_add +#undef sqr  /*   * "m"(a), "+m"(r)	is the way to favor DirectPath µ-code; @@ -97,7 +106,7 @@  		: "a"(a)		\  		: "cc"); -BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  	{  	BN_ULONG c1=0; @@ -121,7 +130,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)  	return(c1);  	}  -BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w) +BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  	{  	BN_ULONG c1=0; @@ -144,7 +153,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w)  	return(c1);  	}  -void bn_sqr_words(BN_ULONG *r, BN_ULONG *a, int n) +void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)          {  	if (n <= 0) return; @@ -175,14 +184,14 @@ BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)  	return ret;  } -BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) +BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)  { BN_ULONG ret=0,i=0;  	if (n <= 0) return 0;  	asm (  	"	subq	%2,%2		\n" -	".align 16			\n" +	".p2align 4			\n"  	"1:	movq	(%4,%2,8),%0	\n"  	"	adcq	(%5,%2,8),%0	\n"  	"	movq	%0,(%3,%2,8)	\n" @@ -198,14 +207,14 @@ BN_ULONG bn_add_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n)  }  #ifndef SIMICS -BN_ULONG bn_sub_words (BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int n) +BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)  { BN_ULONG ret=0,i=0;  	if (n <= 0) return 0;  	asm (  	"	subq	%2,%2		\n" -	".align 16			\n" +	".p2align 4			\n"  	"1:	movq	(%4,%2,8),%0	\n"  	"	sbbq	(%5,%2,8),%0	\n"  	"	movq	%0,(%3,%2,8)	\n" @@ -485,7 +494,7 @@ void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)  	r[7]=c2;  	} -void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)  	{  	BN_ULONG t1,t2;  	BN_ULONG c1,c2,c3; @@ -561,7 +570,7 @@ void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a)  	r[15]=c1;  	} -void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)  	{  	BN_ULONG t1,t2;  	BN_ULONG c1,c2,c3; diff --git a/openssl/crypto/bn/asm/x86_64-mont.pl b/openssl/crypto/bn/asm/x86_64-mont.pl index c43b69592..3b7a6f243 100644 --- a/openssl/crypto/bn/asm/x86_64-mont.pl +++ b/openssl/crypto/bn/asm/x86_64-mont.pl @@ -15,14 +15,18 @@  # respectful 50%. It remains to be seen if loop unrolling and  # dedicated squaring routine can provide further improvement... -$output=shift; +$flavour = shift; +$output  = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);  $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;  ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or  ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or  die "can't locate x86_64-xlate.pl"; -open STDOUT,"| $^X $xlate $output"; +open STDOUT,"| $^X $xlate $flavour $output";  # int bn_mul_mont(  $rp="%rdi";	# BN_ULONG *rp, @@ -55,13 +59,14 @@ bn_mul_mont:  	push	%r15  	mov	${num}d,${num}d -	lea	2($num),%rax -	mov	%rsp,%rbp -	neg	%rax -	lea	(%rsp,%rax,8),%rsp	# tp=alloca(8*(num+2)) +	lea	2($num),%r10 +	mov	%rsp,%r11 +	neg	%r10 +	lea	(%rsp,%r10,8),%rsp	# tp=alloca(8*(num+2))  	and	\$-1024,%rsp		# minimize TLB usage -	mov	%rbp,8(%rsp,$num,8)	# tp[num+1]=%rsp +	mov	%r11,8(%rsp,$num,8)	# tp[num+1]=%rsp +.Lprologue:  	mov	%rdx,$bp		# $bp reassigned, remember?  	mov	($n0),$n0		# pull n0[0] value @@ -197,18 +202,129 @@ bn_mul_mont:  	dec	$j  	jge	.Lcopy -	mov	8(%rsp,$num,8),%rsp	# restore %rsp +	mov	8(%rsp,$num,8),%rsi	# restore %rsp  	mov	\$1,%rax +	mov	(%rsi),%r15 +	mov	8(%rsi),%r14 +	mov	16(%rsi),%r13 +	mov	24(%rsi),%r12 +	mov	32(%rsi),%rbp +	mov	40(%rsi),%rbx +	lea	48(%rsi),%rsp +.Lepilogue: +	ret +.size	bn_mul_mont,.-bn_mul_mont +.asciz	"Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" +.align	16 +___ + +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +#		CONTEXT *context,DISPATCHER_CONTEXT *disp) +if ($win64) { +$rec="%rcx"; +$frame="%rdx"; +$context="%r8"; +$disp="%r9"; + +$code.=<<___; +.extern	__imp_RtlVirtualUnwind +.type	se_handler,\@abi-omnipotent +.align	16 +se_handler: +	push	%rsi +	push	%rdi +	push	%rbx +	push	%rbp +	push	%r12 +	push	%r13 +	push	%r14 +	push	%r15 +	pushfq +	sub	\$64,%rsp + +	mov	120($context),%rax	# pull context->Rax +	mov	248($context),%rbx	# pull context->Rip + +	lea	.Lprologue(%rip),%r10 +	cmp	%r10,%rbx		# context->Rip<.Lprologue +	jb	.Lin_prologue + +	mov	152($context),%rax	# pull context->Rsp + +	lea	.Lepilogue(%rip),%r10 +	cmp	%r10,%rbx		# context->Rip>=.Lepilogue +	jae	.Lin_prologue + +	mov	192($context),%r10	# pull $num +	mov	8(%rax,%r10,8),%rax	# pull saved stack pointer +	lea	48(%rax),%rax + +	mov	-8(%rax),%rbx +	mov	-16(%rax),%rbp +	mov	-24(%rax),%r12 +	mov	-32(%rax),%r13 +	mov	-40(%rax),%r14 +	mov	-48(%rax),%r15 +	mov	%rbx,144($context)	# restore context->Rbx +	mov	%rbp,160($context)	# restore context->Rbp +	mov	%r12,216($context)	# restore context->R12 +	mov	%r13,224($context)	# restore context->R13 +	mov	%r14,232($context)	# restore context->R14 +	mov	%r15,240($context)	# restore context->R15 + +.Lin_prologue: +	mov	8(%rax),%rdi +	mov	16(%rax),%rsi +	mov	%rax,152($context)	# restore context->Rsp +	mov	%rsi,168($context)	# restore context->Rsi +	mov	%rdi,176($context)	# restore context->Rdi + +	mov	40($disp),%rdi		# disp->ContextRecord +	mov	$context,%rsi		# context +	mov	\$154,%ecx		# sizeof(CONTEXT) +	.long	0xa548f3fc		# cld; rep movsq + +	mov	$disp,%rsi +	xor	%rcx,%rcx		# arg1, UNW_FLAG_NHANDLER +	mov	8(%rsi),%rdx		# arg2, disp->ImageBase +	mov	0(%rsi),%r8		# arg3, disp->ControlPc +	mov	16(%rsi),%r9		# arg4, disp->FunctionEntry +	mov	40(%rsi),%r10		# disp->ContextRecord +	lea	56(%rsi),%r11		# &disp->HandlerData +	lea	24(%rsi),%r12		# &disp->EstablisherFrame +	mov	%r10,32(%rsp)		# arg5 +	mov	%r11,40(%rsp)		# arg6 +	mov	%r12,48(%rsp)		# arg7 +	mov	%rcx,56(%rsp)		# arg8, (NULL) +	call	*__imp_RtlVirtualUnwind(%rip) + +	mov	\$1,%eax		# ExceptionContinueSearch +	add	\$64,%rsp +	popfq  	pop	%r15  	pop	%r14  	pop	%r13  	pop	%r12  	pop	%rbp  	pop	%rbx +	pop	%rdi +	pop	%rsi  	ret -.size	bn_mul_mont,.-bn_mul_mont -.asciz	"Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>" +.size	se_handler,.-se_handler + +.section	.pdata +.align	4 +	.rva	.LSEH_begin_bn_mul_mont +	.rva	.LSEH_end_bn_mul_mont +	.rva	.LSEH_info_bn_mul_mont + +.section	.xdata +.align	8 +.LSEH_info_bn_mul_mont: +	.byte	9,0,0,0 +	.rva	se_handler  ___ +}  print $code;  close STDOUT; diff --git a/openssl/crypto/bn/bn.h b/openssl/crypto/bn/bn.h index f1719a587..e484b7fc1 100644 --- a/openssl/crypto/bn/bn.h +++ b/openssl/crypto/bn/bn.h @@ -56,6 +56,59 @@   * [including the GNU Public Licence.]   */  /* ==================================================================== + * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer.  + * + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in + *    the documentation and/or other materials provided with the + *    distribution. + * + * 3. All advertising materials mentioning features or use of this + *    software must display the following acknowledgment: + *    "This product includes software developed by the OpenSSL Project + *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + *    endorse or promote products derived from this software without + *    prior written permission. For written permission, please contact + *    openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + *    nor may "OpenSSL" appear in their names without prior written + *    permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + *    acknowledgment: + *    "This product includes software developed by the OpenSSL Project + *    for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com).  This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ +/* ====================================================================   * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.   *   * Portions of the attached software ("Contribution") are developed by  @@ -77,6 +130,7 @@  #include <stdio.h> /* FILE */  #endif  #include <openssl/ossl_typ.h> +#include <openssl/crypto.h>  #ifdef  __cplusplus  extern "C" { @@ -94,9 +148,11 @@ extern "C" {  /* #define BN_DEBUG */  /* #define BN_DEBUG_RAND */ +#ifndef OPENSSL_SMALL_FOOTPRINT  #define BN_MUL_COMBA  #define BN_SQR_COMBA  #define BN_RECURSION +#endif  /* This next option uses the C libraries (2 word)/(1 word) function.   * If it is not defined, I use my C version (which is slower). @@ -137,6 +193,8 @@ extern "C" {  #define BN_DEC_FMT1	"%lu"  #define BN_DEC_FMT2	"%019lu"  #define BN_DEC_NUM	19 +#define BN_HEX_FMT1	"%lX" +#define BN_HEX_FMT2	"%016lX"  #endif  /* This is where the long long data type is 64 bits, but long is 32. @@ -162,83 +220,37 @@ extern "C" {  #define BN_DEC_FMT1	"%llu"  #define BN_DEC_FMT2	"%019llu"  #define BN_DEC_NUM	19 +#define BN_HEX_FMT1	"%llX" +#define BN_HEX_FMT2	"%016llX"  #endif  #ifdef THIRTY_TWO_BIT  #ifdef BN_LLONG -# if defined(OPENSSL_SYS_WIN32) && !defined(__GNUC__) +# if defined(_WIN32) && !defined(__GNUC__)  #  define BN_ULLONG	unsigned __int64 +#  define BN_MASK	(0xffffffffffffffffI64)  # else  #  define BN_ULLONG	unsigned long long +#  define BN_MASK	(0xffffffffffffffffLL)  # endif  #endif -#define BN_ULONG	unsigned long -#define BN_LONG		long +#define BN_ULONG	unsigned int +#define BN_LONG		int  #define BN_BITS		64  #define BN_BYTES	4  #define BN_BITS2	32  #define BN_BITS4	16 -#ifdef OPENSSL_SYS_WIN32 -/* VC++ doesn't like the LL suffix */ -#define BN_MASK		(0xffffffffffffffffL) -#else -#define BN_MASK		(0xffffffffffffffffLL) -#endif  #define BN_MASK2	(0xffffffffL)  #define BN_MASK2l	(0xffff)  #define BN_MASK2h1	(0xffff8000L)  #define BN_MASK2h	(0xffff0000L)  #define BN_TBIT		(0x80000000L)  #define BN_DEC_CONV	(1000000000L) -#define BN_DEC_FMT1	"%lu" -#define BN_DEC_FMT2	"%09lu" -#define BN_DEC_NUM	9 -#endif - -#ifdef SIXTEEN_BIT -#ifndef BN_DIV2W -#define BN_DIV2W -#endif -#define BN_ULLONG	unsigned long -#define BN_ULONG	unsigned short -#define BN_LONG		short -#define BN_BITS		32 -#define BN_BYTES	2 -#define BN_BITS2	16 -#define BN_BITS4	8 -#define BN_MASK		(0xffffffff) -#define BN_MASK2	(0xffff) -#define BN_MASK2l	(0xff) -#define BN_MASK2h1	(0xff80) -#define BN_MASK2h	(0xff00) -#define BN_TBIT		(0x8000) -#define BN_DEC_CONV	(100000)  #define BN_DEC_FMT1	"%u" -#define BN_DEC_FMT2	"%05u" -#define BN_DEC_NUM	5 -#endif - -#ifdef EIGHT_BIT -#ifndef BN_DIV2W -#define BN_DIV2W -#endif -#define BN_ULLONG	unsigned short -#define BN_ULONG	unsigned char -#define BN_LONG		char -#define BN_BITS		16 -#define BN_BYTES	1 -#define BN_BITS2	8 -#define BN_BITS4	4 -#define BN_MASK		(0xffff) -#define BN_MASK2	(0xff) -#define BN_MASK2l	(0xf) -#define BN_MASK2h1	(0xf8) -#define BN_MASK2h	(0xf0) -#define BN_TBIT		(0x80) -#define BN_DEC_CONV	(100) -#define BN_DEC_FMT1	"%u" -#define BN_DEC_FMT2	"%02u" -#define BN_DEC_NUM	2 +#define BN_DEC_FMT2	"%09u" +#define BN_DEC_NUM	9 +#define BN_HEX_FMT1	"%X" +#define BN_HEX_FMT2	"%08X"  #endif  #define BN_DEFAULT_BITS	1280 @@ -303,12 +315,8 @@ struct bn_mont_ctx_st  	BIGNUM N;      /* The modulus */  	BIGNUM Ni;     /* R*(1/R mod N) - N*Ni = 1  	                * (Ni is only stored for bignum algorithm) */ -#if 0 -	/* OpenSSL 0.9.9 preview: */ -	BN_ULONG n0[2];/* least significant word(s) of Ni */ -#else -	BN_ULONG n0;   /* least significant word of Ni */ -#endif +	BN_ULONG n0[2];/* least significant word(s) of Ni; +	                  (type changed with 0.9.9, was "BN_ULONG n0;" before) */  	int flags;  	}; @@ -504,6 +512,7 @@ char *	BN_bn2hex(const BIGNUM *a);  char *	BN_bn2dec(const BIGNUM *a);  int 	BN_hex2bn(BIGNUM **a, const char *str);  int 	BN_dec2bn(BIGNUM **a, const char *str); +int	BN_asc2bn(BIGNUM **a, const char *str);  int	BN_gcd(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx);  int	BN_kronecker(const BIGNUM *a,const BIGNUM *b,BN_CTX *ctx); /* returns -2 for error */  BIGNUM *BN_mod_inverse(BIGNUM *ret, @@ -531,17 +540,6 @@ int	BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);  int	BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,  		int do_trial_division, BN_GENCB *cb); -int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx); - -int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, -			const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, -			const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb); -int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, -			BIGNUM *Xp1, BIGNUM *Xp2, -			const BIGNUM *Xp, -			const BIGNUM *e, BN_CTX *ctx, -			BN_GENCB *cb); -  BN_MONT_CTX *BN_MONT_CTX_new(void );  void BN_MONT_CTX_init(BN_MONT_CTX *ctx);  int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b, @@ -560,19 +558,22 @@ BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,  #define	BN_BLINDING_NO_UPDATE	0x00000001  #define	BN_BLINDING_NO_RECREATE	0x00000002 -BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod); +BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod);  void BN_BLINDING_free(BN_BLINDING *b);  int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);  int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);  int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);  int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *);  int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *); +#ifndef OPENSSL_NO_DEPRECATED  unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *);  void BN_BLINDING_set_thread_id(BN_BLINDING *, unsigned long); +#endif +CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *);  unsigned long BN_BLINDING_get_flags(const BN_BLINDING *);  void BN_BLINDING_set_flags(BN_BLINDING *, unsigned long);  BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, -	const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, +	const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,  	int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,  			  const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),  	BN_MONT_CTX *m_ctx); @@ -625,24 +626,24 @@ int	BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,   *     t^p[0] + t^p[1] + ... + t^p[k]   * where m = p[0] > p[1] > ... > p[k] = 0.   */ -int	BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]); +int	BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]);  	/* r = a mod p */  int	BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, -	const unsigned int p[], BN_CTX *ctx); /* r = (a * b) mod p */ -int	BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], +	const int p[], BN_CTX *ctx); /* r = (a * b) mod p */ +int	BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],  	BN_CTX *ctx); /* r = (a * a) mod p */ -int	BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const unsigned int p[], +int	BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *b, const int p[],  	BN_CTX *ctx); /* r = (1 / b) mod p */  int	BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, -	const unsigned int p[], BN_CTX *ctx); /* r = (a / b) mod p */ +	const int p[], BN_CTX *ctx); /* r = (a / b) mod p */  int	BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, -	const unsigned int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */ +	const int p[], BN_CTX *ctx); /* r = (a ^ b) mod p */  int	BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, -	const unsigned int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */ +	const int p[], BN_CTX *ctx); /* r = sqrt(a) mod p */  int	BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a, -	const unsigned int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ -int	BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max); -int	BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a); +	const int p[], BN_CTX *ctx); /* r^2 + r = a mod p */ +int	BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max); +int	BN_GF2m_arr2poly(const int p[], BIGNUM *a);  /* faster mod functions for the 'NIST primes'    * 0 <= a < p^2 */ @@ -751,10 +752,12 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);  #define bn_correct_top(a) \          { \          BN_ULONG *ftl; \ -	if ((a)->top > 0) \ +	int tmp_top = (a)->top; \ +	if (tmp_top > 0) \  		{ \ -		for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \ -		if (*(ftl--)) break; \ +		for (ftl= &((a)->d[tmp_top-1]); tmp_top > 0; tmp_top--) \ +			if (*(ftl--)) break; \ +		(a)->top = tmp_top; \  		} \  	bn_pollute(a); \  	} diff --git a/openssl/crypto/bn/bn_asm.c b/openssl/crypto/bn/bn_asm.c index 99bc2de49..c43c91cc0 100644 --- a/openssl/crypto/bn/bn_asm.c +++ b/openssl/crypto/bn/bn_asm.c @@ -75,6 +75,7 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  	assert(num >= 0);  	if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT  	while (num&~3)  		{  		mul_add(rp[0],ap[0],w,c1); @@ -83,11 +84,11 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  		mul_add(rp[3],ap[3],w,c1);  		ap+=4; rp+=4; num-=4;  		} -	if (num) +#endif +	while (num)  		{ -		mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1; -		mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1; -		mul_add(rp[2],ap[2],w,c1); return c1; +		mul_add(rp[0],ap[0],w,c1); +		ap++; rp++; num--;  		}  	return(c1); @@ -100,6 +101,7 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  	assert(num >= 0);  	if (num <= 0) return(c1); +#ifndef OPENSSL_SMALL_FOOTPRINT  	while (num&~3)  		{  		mul(rp[0],ap[0],w,c1); @@ -108,11 +110,11 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  		mul(rp[3],ap[3],w,c1);  		ap+=4; rp+=4; num-=4;  		} -	if (num) +#endif +	while (num)  		{ -		mul(rp[0],ap[0],w,c1); if (--num == 0) return c1; -		mul(rp[1],ap[1],w,c1); if (--num == 0) return c1; -		mul(rp[2],ap[2],w,c1); +		mul(rp[0],ap[0],w,c1); +		ap++; rp++; num--;  		}  	return(c1);  	}  @@ -121,6 +123,8 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)          {  	assert(n >= 0);  	if (n <= 0) return; + +#ifndef OPENSSL_SMALL_FOOTPRINT  	while (n&~3)  		{  		sqr(r[0],r[1],a[0]); @@ -129,11 +133,11 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)  		sqr(r[6],r[7],a[3]);  		a+=4; r+=8; n-=4;  		} -	if (n) +#endif +	while (n)  		{ -		sqr(r[0],r[1],a[0]); if (--n == 0) return; -		sqr(r[2],r[3],a[1]); if (--n == 0) return; -		sqr(r[4],r[5],a[2]); +		sqr(r[0],r[1],a[0]); +		a++; r+=2; n--;  		}  	} @@ -150,18 +154,20 @@ BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  	bl=LBITS(w);  	bh=HBITS(w); -	for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT +	while (num&~3)  		{  		mul_add(rp[0],ap[0],bl,bh,c); -		if (--num == 0) break;  		mul_add(rp[1],ap[1],bl,bh,c); -		if (--num == 0) break;  		mul_add(rp[2],ap[2],bl,bh,c); -		if (--num == 0) break;  		mul_add(rp[3],ap[3],bl,bh,c); -		if (--num == 0) break; -		ap+=4; -		rp+=4; +		ap+=4; rp+=4; num-=4; +		} +#endif +	while (num) +		{ +		mul_add(rp[0],ap[0],bl,bh,c); +		ap++; rp++; num--;  		}  	return(c);  	}  @@ -177,18 +183,20 @@ BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)  	bl=LBITS(w);  	bh=HBITS(w); -	for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT +	while (num&~3)  		{  		mul(rp[0],ap[0],bl,bh,carry); -		if (--num == 0) break;  		mul(rp[1],ap[1],bl,bh,carry); -		if (--num == 0) break;  		mul(rp[2],ap[2],bl,bh,carry); -		if (--num == 0) break;  		mul(rp[3],ap[3],bl,bh,carry); -		if (--num == 0) break; -		ap+=4; -		rp+=4; +		ap+=4; rp+=4; num-=4; +		} +#endif +	while (num) +		{ +		mul(rp[0],ap[0],bl,bh,carry); +		ap++; rp++; num--;  		}  	return(carry);  	}  @@ -197,22 +205,21 @@ void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)          {  	assert(n >= 0);  	if (n <= 0) return; -	for (;;) + +#ifndef OPENSSL_SMALL_FOOTPRINT +	while (n&~3)  		{  		sqr64(r[0],r[1],a[0]); -		if (--n == 0) break; -  		sqr64(r[2],r[3],a[1]); -		if (--n == 0) break; -  		sqr64(r[4],r[5],a[2]); -		if (--n == 0) break; -  		sqr64(r[6],r[7],a[3]); -		if (--n == 0) break; - -		a+=4; -		r+=8; +		a+=4; r+=8; n-=4; +		} +#endif +	while (n) +		{ +		sqr64(r[0],r[1],a[0]); +		a++; r+=2; n--;  		}  	} @@ -303,31 +310,30 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)  	assert(n >= 0);  	if (n <= 0) return((BN_ULONG)0); -	for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT +	while (n&~3)  		{  		ll+=(BN_ULLONG)a[0]+b[0];  		r[0]=(BN_ULONG)ll&BN_MASK2;  		ll>>=BN_BITS2; -		if (--n <= 0) break; -  		ll+=(BN_ULLONG)a[1]+b[1];  		r[1]=(BN_ULONG)ll&BN_MASK2;  		ll>>=BN_BITS2; -		if (--n <= 0) break; -  		ll+=(BN_ULLONG)a[2]+b[2];  		r[2]=(BN_ULONG)ll&BN_MASK2;  		ll>>=BN_BITS2; -		if (--n <= 0) break; -  		ll+=(BN_ULLONG)a[3]+b[3];  		r[3]=(BN_ULONG)ll&BN_MASK2;  		ll>>=BN_BITS2; -		if (--n <= 0) break; - -		a+=4; -		b+=4; -		r+=4; +		a+=4; b+=4; r+=4; n-=4; +		} +#endif +	while (n) +		{ +		ll+=(BN_ULLONG)a[0]+b[0]; +		r[0]=(BN_ULONG)ll&BN_MASK2; +		ll>>=BN_BITS2; +		a++; b++; r++; n--;  		}  	return((BN_ULONG)ll);  	} @@ -340,7 +346,8 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)  	if (n <= 0) return((BN_ULONG)0);  	c=0; -	for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT +	while (n&~3)  		{  		t=a[0];  		t=(t+c)&BN_MASK2; @@ -348,35 +355,36 @@ BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)  		l=(t+b[0])&BN_MASK2;  		c+=(l < t);  		r[0]=l; -		if (--n <= 0) break; -  		t=a[1];  		t=(t+c)&BN_MASK2;  		c=(t < c);  		l=(t+b[1])&BN_MASK2;  		c+=(l < t);  		r[1]=l; -		if (--n <= 0) break; -  		t=a[2];  		t=(t+c)&BN_MASK2;  		c=(t < c);  		l=(t+b[2])&BN_MASK2;  		c+=(l < t);  		r[2]=l; -		if (--n <= 0) break; -  		t=a[3];  		t=(t+c)&BN_MASK2;  		c=(t < c);  		l=(t+b[3])&BN_MASK2;  		c+=(l < t);  		r[3]=l; -		if (--n <= 0) break; - -		a+=4; -		b+=4; -		r+=4; +		a+=4; b+=4; r+=4; n-=4; +		} +#endif +	while(n) +		{ +		t=a[0]; +		t=(t+c)&BN_MASK2; +		c=(t < c); +		l=(t+b[0])&BN_MASK2; +		c+=(l < t); +		r[0]=l; +		a++; b++; r++; n--;  		}  	return((BN_ULONG)c);  	} @@ -390,36 +398,35 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)  	assert(n >= 0);  	if (n <= 0) return((BN_ULONG)0); -	for (;;) +#ifndef OPENSSL_SMALL_FOOTPRINT +	while (n&~3)  		{  		t1=a[0]; t2=b[0];  		r[0]=(t1-t2-c)&BN_MASK2;  		if (t1 != t2) c=(t1 < t2); -		if (--n <= 0) break; -  		t1=a[1]; t2=b[1];  		r[1]=(t1-t2-c)&BN_MASK2;  		if (t1 != t2) c=(t1 < t2); -		if (--n <= 0) break; -  		t1=a[2]; t2=b[2];  		r[2]=(t1-t2-c)&BN_MASK2;  		if (t1 != t2) c=(t1 < t2); -		if (--n <= 0) break; -  		t1=a[3]; t2=b[3];  		r[3]=(t1-t2-c)&BN_MASK2;  		if (t1 != t2) c=(t1 < t2); -		if (--n <= 0) break; - -		a+=4; -		b+=4; -		r+=4; +		a+=4; b+=4; r+=4; n-=4; +		} +#endif +	while (n) +		{ +		t1=a[0]; t2=b[0]; +		r[0]=(t1-t2-c)&BN_MASK2; +		if (t1 != t2) c=(t1 < t2); +		a++; b++; r++; n--;  		}  	return(c);  	} -#ifdef BN_MUL_COMBA +#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)  #undef bn_mul_comba8  #undef bn_mul_comba4 @@ -820,18 +827,134 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)  	r[6]=c1;  	r[7]=c2;  	} + +#ifdef OPENSSL_NO_ASM +#ifdef OPENSSL_BN_ASM_MONT +#include <alloca.h> +/* + * This is essentially reference implementation, which may or may not + * result in performance improvement. E.g. on IA-32 this routine was + * observed to give 40% faster rsa1024 private key operations and 10% + * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only + * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a + * reference implementation, one to be used as starting point for + * platform-specific assembler. Mentioned numbers apply to compiler + * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and + * can vary not only from platform to platform, but even for compiler + * versions. Assembler vs. assembler improvement coefficients can + * [and are known to] differ and are to be documented elsewhere. + */ +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) +	{ +	BN_ULONG c0,c1,ml,*tp,n0; +#ifdef mul64 +	BN_ULONG mh; +#endif +	volatile BN_ULONG *vp; +	int i=0,j; + +#if 0	/* template for platform-specific implementation */ +	if (ap==bp)	return bn_sqr_mont(rp,ap,np,n0p,num); +#endif +	vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + +	n0 = *n0p; + +	c0 = 0; +	ml = bp[0]; +#ifdef mul64 +	mh = HBITS(ml); +	ml = LBITS(ml); +	for (j=0;j<num;++j) +		mul(tp[j],ap[j],ml,mh,c0); +#else +	for (j=0;j<num;++j) +		mul(tp[j],ap[j],ml,c0); +#endif + +	tp[num]   = c0; +	tp[num+1] = 0; +	goto enter; + +	for(i=0;i<num;i++) +		{ +		c0 = 0; +		ml = bp[i]; +#ifdef mul64 +		mh = HBITS(ml); +		ml = LBITS(ml); +		for (j=0;j<num;++j) +			mul_add(tp[j],ap[j],ml,mh,c0); +#else +		for (j=0;j<num;++j) +			mul_add(tp[j],ap[j],ml,c0); +#endif +		c1 = (tp[num] + c0)&BN_MASK2; +		tp[num]   = c1; +		tp[num+1] = (c1<c0?1:0); +	enter: +		c1  = tp[0]; +		ml = (c1*n0)&BN_MASK2; +		c0 = 0; +#ifdef mul64 +		mh = HBITS(ml); +		ml = LBITS(ml); +		mul_add(c1,np[0],ml,mh,c0); +#else +		mul_add(c1,ml,np[0],c0); +#endif +		for(j=1;j<num;j++) +			{ +			c1 = tp[j]; +#ifdef mul64 +			mul_add(c1,np[j],ml,mh,c0); +#else +			mul_add(c1,ml,np[j],c0); +#endif +			tp[j-1] = c1&BN_MASK2; +			} +		c1        = (tp[num] + c0)&BN_MASK2; +		tp[num-1] = c1; +		tp[num]   = tp[num+1] + (c1<c0?1:0); +		} + +	if (tp[num]!=0 || tp[num-1]>=np[num-1]) +		{ +		c0 = bn_sub_words(rp,tp,np,num); +		if (tp[num]!=0 || c0==0) +			{ +			for(i=0;i<num+2;i++)	vp[i] = 0; +			return 1; +			} +		} +	for(i=0;i<num;i++)	rp[i] = tp[i],	vp[i] = 0; +	vp[num]   = 0; +	vp[num+1] = 0; +	return 1; +	} +#else +/* + * Return value of 0 indicates that multiplication/convolution was not + * performed to signal the caller to fall down to alternative/original + * code-path. + */ +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) +{	return 0;	} +#endif /* OPENSSL_BN_ASM_MONT */ +#endif +  #else /* !BN_MUL_COMBA */  /* hmm... is it faster just to do a multiply? */  #undef bn_sqr_comba4 -void bn_sqr_comba4(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)  	{  	BN_ULONG t[8];  	bn_sqr_normal(r,a,4,t);  	}  #undef bn_sqr_comba8 -void bn_sqr_comba8(BN_ULONG *r, BN_ULONG *a) +void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)  	{  	BN_ULONG t[16];  	bn_sqr_normal(r,a,8,t); @@ -857,4 +980,51 @@ void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)  	r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);  	} +#ifdef OPENSSL_NO_ASM +#ifdef OPENSSL_BN_ASM_MONT +#include <alloca.h> +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) +	{ +	BN_ULONG c0,c1,*tp,n0=*n0p; +	volatile BN_ULONG *vp; +	int i=0,j; + +	vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + +	for(i=0;i<=num;i++)	tp[i]=0; + +	for(i=0;i<num;i++) +		{ +		c0         = bn_mul_add_words(tp,ap,num,bp[i]); +		c1         = (tp[num] + c0)&BN_MASK2; +		tp[num]    = c1; +		tp[num+1]  = (c1<c0?1:0); + +		c0         = bn_mul_add_words(tp,np,num,tp[0]*n0); +		c1         = (tp[num] + c0)&BN_MASK2; +		tp[num]    = c1; +		tp[num+1] += (c1<c0?1:0); +		for(j=0;j<=num;j++)	tp[j]=tp[j+1]; +		} + +	if (tp[num]!=0 || tp[num-1]>=np[num-1]) +		{ +		c0 = bn_sub_words(rp,tp,np,num); +		if (tp[num]!=0 || c0==0) +			{ +			for(i=0;i<num+2;i++)	vp[i] = 0; +			return 1; +			} +		} +	for(i=0;i<num;i++)	rp[i] = tp[i],	vp[i] = 0; +	vp[num]   = 0; +	vp[num+1] = 0; +	return 1; +	} +#else +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num) +{	return 0;	} +#endif /* OPENSSL_BN_ASM_MONT */ +#endif +  #endif /* !BN_MUL_COMBA */ diff --git a/openssl/crypto/bn/bn_blind.c b/openssl/crypto/bn/bn_blind.c index c11fb4ccc..e060592fd 100644 --- a/openssl/crypto/bn/bn_blind.c +++ b/openssl/crypto/bn/bn_blind.c @@ -1,6 +1,6 @@  /* crypto/bn/bn_blind.c */  /* ==================================================================== - * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved. + * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.   *   * Redistribution and use in source and binary forms, with or without   * modification, are permitted provided that the following conditions @@ -121,8 +121,11 @@ struct bn_blinding_st  	BIGNUM *Ai;  	BIGNUM *e;  	BIGNUM *mod; /* just a reference */ +#ifndef OPENSSL_NO_DEPRECATED  	unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;  				  * used only by crypto/rsa/rsa_eay.c, rsa_lib.c */ +#endif +	CRYPTO_THREADID tid;  	unsigned int  counter;  	unsigned long flags;  	BN_MONT_CTX *m_ctx; @@ -131,7 +134,7 @@ struct bn_blinding_st  			  BN_MONT_CTX *m_ctx);  	}; -BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGNUM *mod) +BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)  	{  	BN_BLINDING *ret=NULL; @@ -158,6 +161,7 @@ BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, /* const */ BIGN  		BN_set_flags(ret->mod, BN_FLG_CONSTTIME);  	ret->counter = BN_BLINDING_COUNTER; +	CRYPTO_THREADID_current(&ret->tid);  	return(ret);  err:  	if (ret != NULL) BN_BLINDING_free(ret); @@ -263,6 +267,7 @@ int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ct  	return(ret);  	} +#ifndef OPENSSL_NO_DEPRECATED  unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b)  	{  	return b->thread_id; @@ -272,6 +277,12 @@ void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n)  	{  	b->thread_id = n;  	} +#endif + +CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b) +	{ +	return &b->tid; +	}  unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)  	{ @@ -284,7 +295,7 @@ void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)  	}  BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b, -	const BIGNUM *e, /* const */ BIGNUM *m, BN_CTX *ctx, +	const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,  	int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,  			  const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),  	BN_MONT_CTX *m_ctx) diff --git a/openssl/crypto/bn/bn_ctx.c b/openssl/crypto/bn/bn_ctx.c index b3452f1a9..3f2256f67 100644 --- a/openssl/crypto/bn/bn_ctx.c +++ b/openssl/crypto/bn/bn_ctx.c @@ -161,7 +161,7 @@ static void ctxdbg(BN_CTX *ctx)  	fprintf(stderr,"(%08x): ", (unsigned int)ctx);  	while(bnidx < ctx->used)  		{ -		fprintf(stderr,"%02x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); +		fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);  		if(!(bnidx % BN_CTX_POOL_SIZE))  			item = item->next;  		} @@ -171,8 +171,8 @@ static void ctxdbg(BN_CTX *ctx)  	while(fpidx < stack->depth)  		{  		while(bnidx++ < stack->indexes[fpidx]) -			fprintf(stderr,"   "); -		fprintf(stderr,"^^ "); +			fprintf(stderr,"    "); +		fprintf(stderr,"^^^ ");  		bnidx++;  		fpidx++;  		} diff --git a/openssl/crypto/bn/bn_div.c b/openssl/crypto/bn/bn_div.c index 1e8e57626..802a43d64 100644 --- a/openssl/crypto/bn/bn_div.c +++ b/openssl/crypto/bn/bn_div.c @@ -102,7 +102,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,  	/* The next 2 are needed so we can do a dv->d[0]|=1 later  	 * since BN_lshift1 will only work once there is a value :-) */  	BN_zero(dv); -	bn_wexpand(dv,1); +	if(bn_wexpand(dv,1) == NULL) goto end;  	dv->top=1;  	if (!BN_lshift(D,D,nm-nd)) goto end; @@ -229,7 +229,8 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,  	if (dv == NULL)  		res=BN_CTX_get(ctx);  	else	res=dv; -	if (sdiv == NULL || res == NULL) goto err; +	if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL) +		goto err;  	/* First we normalise the numbers */  	norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2); @@ -336,7 +337,7 @@ X) -> 0x%08X\n",  				t2 -= d1;  				}  #else /* !BN_LLONG */ -			BN_ULONG t2l,t2h,ql,qh; +			BN_ULONG t2l,t2h;  			q=bn_div_words(n0,n1,d0);  #ifdef BN_DEBUG_LEVITTE @@ -354,9 +355,12 @@ X) -> 0x%08X\n",  			t2l = d1 * q;  			t2h = BN_UMULT_HIGH(d1,q);  #else +			{ +			BN_ULONG ql, qh;  			t2l=LBITS(d1); t2h=HBITS(d1);  			ql =LBITS(q);  qh =HBITS(q);  			mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ +			}  #endif  			for (;;) @@ -560,7 +564,7 @@ X) -> 0x%08X\n",  				t2 -= d1;  				}  #else /* !BN_LLONG */ -			BN_ULONG t2l,t2h,ql,qh; +			BN_ULONG t2l,t2h;  			q=bn_div_words(n0,n1,d0);  #ifdef BN_DEBUG_LEVITTE @@ -578,9 +582,12 @@ X) -> 0x%08X\n",  			t2l = d1 * q;  			t2h = BN_UMULT_HIGH(d1,q);  #else +			{ +			BN_ULONG ql, qh;  			t2l=LBITS(d1); t2h=HBITS(d1);  			ql =LBITS(q);  qh =HBITS(q);  			mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */ +			}  #endif  			for (;;) diff --git a/openssl/crypto/bn/bn_exp.c b/openssl/crypto/bn/bn_exp.c index 70a33f0d9..d9b6c737f 100644 --- a/openssl/crypto/bn/bn_exp.c +++ b/openssl/crypto/bn/bn_exp.c @@ -134,7 +134,8 @@ int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  		rr = BN_CTX_get(ctx);  	else  		rr = r; -	if ((v = BN_CTX_get(ctx)) == NULL) goto err; +	v = BN_CTX_get(ctx); +	if (rr == NULL || v == NULL) goto err;  	if (BN_copy(v,a) == NULL) goto err;  	bits=BN_num_bits(p); diff --git a/openssl/crypto/bn/bn_gf2m.c b/openssl/crypto/bn/bn_gf2m.c index 306f029f2..527b0fa15 100644 --- a/openssl/crypto/bn/bn_gf2m.c +++ b/openssl/crypto/bn/bn_gf2m.c @@ -121,74 +121,12 @@ static const BN_ULONG SQR_tb[16] =      SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >>  8 & 0xF] << 16 | \      SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF]  #endif -#ifdef SIXTEEN_BIT -#define SQR1(w) \ -    SQR_tb[(w) >> 12 & 0xF] <<  8 | SQR_tb[(w) >>  8 & 0xF] -#define SQR0(w) \ -    SQR_tb[(w) >>  4 & 0xF] <<  8 | SQR_tb[(w)       & 0xF] -#endif -#ifdef EIGHT_BIT -#define SQR1(w) \ -    SQR_tb[(w) >>  4 & 0xF] -#define SQR0(w) \ -    SQR_tb[(w)       & 15] -#endif  /* Product of two polynomials a, b each with degree < BN_BITS2 - 1,   * result is a polynomial r with degree < 2 * BN_BITS - 1   * The caller MUST ensure that the variables have the right amount   * of space allocated.   */ -#ifdef EIGHT_BIT -static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) -	{ -	register BN_ULONG h, l, s; -	BN_ULONG tab[4], top1b = a >> 7; -	register BN_ULONG a1, a2; - -	a1 = a & (0x7F); a2 = a1 << 1; - -	tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; - -	s = tab[b      & 0x3]; l  = s; -	s = tab[b >> 2 & 0x3]; l ^= s << 2; h  = s >> 6; -	s = tab[b >> 4 & 0x3]; l ^= s << 4; h ^= s >> 4; -	s = tab[b >> 6      ]; l ^= s << 6; h ^= s >> 2; -	 -	/* compensate for the top bit of a */ - -	if (top1b & 01) { l ^= b << 7; h ^= b >> 1; }  - -	*r1 = h; *r0 = l; -	}  -#endif -#ifdef SIXTEEN_BIT -static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) -	{ -	register BN_ULONG h, l, s; -	BN_ULONG tab[4], top1b = a >> 15;  -	register BN_ULONG a1, a2; - -	a1 = a & (0x7FFF); a2 = a1 << 1; - -	tab[0] = 0; tab[1] = a1; tab[2] = a2; tab[3] = a1^a2; - -	s = tab[b      & 0x3]; l  = s; -	s = tab[b >> 2 & 0x3]; l ^= s <<  2; h  = s >> 14; -	s = tab[b >> 4 & 0x3]; l ^= s <<  4; h ^= s >> 12; -	s = tab[b >> 6 & 0x3]; l ^= s <<  6; h ^= s >> 10; -	s = tab[b >> 8 & 0x3]; l ^= s <<  8; h ^= s >>  8; -	s = tab[b >>10 & 0x3]; l ^= s << 10; h ^= s >>  6; -	s = tab[b >>12 & 0x3]; l ^= s << 12; h ^= s >>  4; -	s = tab[b >>14      ]; l ^= s << 14; h ^= s >>  2; - -	/* compensate for the top bit of a */ - -	if (top1b & 01) { l ^= b << 15; h ^= b >> 1; }  - -	*r1 = h; *r0 = l; -	}  -#endif  #ifdef THIRTY_TWO_BIT  static void bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b)  	{ @@ -294,7 +232,8 @@ int	BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)  	if (a->top < b->top) { at = b; bt = a; }  	else { at = a; bt = b; } -	bn_wexpand(r, at->top); +	if(bn_wexpand(r, at->top) == NULL) +		return 0;  	for (i = 0; i < bt->top; i++)  		{ @@ -320,7 +259,7 @@ int	BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)  /* Performs modular reduction of a and store result in r.  r could be a. */ -int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[]) +int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[])  	{  	int j, k;  	int n, dN, d0, d1; @@ -421,11 +360,11 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[])  int	BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p)  	{  	int ret = 0; -	const int max = BN_num_bits(p); -	unsigned int *arr=NULL; +	const int max = BN_num_bits(p) + 1; +	int *arr=NULL;  	bn_check_top(a);  	bn_check_top(p); -	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; +	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;  	ret = BN_GF2m_poly2arr(p, arr, max);  	if (!ret || ret > max)  		{ @@ -443,7 +382,7 @@ err:  /* Compute the product of two polynomials a and b, reduce modulo p, and store   * the result in r.  r could be a or b; a could be b.   */ -int	BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) +int	BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)  	{  	int zlen, i, j, k, ret = 0;  	BIGNUM *s; @@ -499,12 +438,12 @@ err:  int	BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)  	{  	int ret = 0; -	const int max = BN_num_bits(p); -	unsigned int *arr=NULL; +	const int max = BN_num_bits(p) + 1; +	int *arr=NULL;  	bn_check_top(a);  	bn_check_top(b);  	bn_check_top(p); -	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; +	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;  	ret = BN_GF2m_poly2arr(p, arr, max);  	if (!ret || ret > max)  		{ @@ -520,7 +459,7 @@ err:  /* Square a, reduce the result mod p, and store it in a.  r could be a. */ -int	BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) +int	BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)  	{  	int i, ret = 0;  	BIGNUM *s; @@ -555,12 +494,12 @@ err:  int	BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  	{  	int ret = 0; -	const int max = BN_num_bits(p); -	unsigned int *arr=NULL; +	const int max = BN_num_bits(p) + 1; +	int *arr=NULL;  	bn_check_top(a);  	bn_check_top(p); -	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; +	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;  	ret = BN_GF2m_poly2arr(p, arr, max);  	if (!ret || ret > max)  		{ @@ -642,7 +581,7 @@ err:   * function is only provided for convenience; for best performance, use the    * BN_GF2m_mod_inv function.   */ -int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx)  	{  	BIGNUM *field;  	int ret = 0; @@ -768,7 +707,7 @@ err:   * function is only provided for convenience; for best performance, use the    * BN_GF2m_mod_div function.   */ -int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, const int p[], BN_CTX *ctx)  	{  	BIGNUM *field;  	int ret = 0; @@ -793,7 +732,7 @@ err:   * the result in r.  r could be a.   * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363.   */ -int	BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const unsigned int p[], BN_CTX *ctx) +int	BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], BN_CTX *ctx)  	{  	int ret = 0, i, n;  	BIGNUM *u; @@ -839,12 +778,12 @@ err:  int BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, BN_CTX *ctx)  	{  	int ret = 0; -	const int max = BN_num_bits(p); -	unsigned int *arr=NULL; +	const int max = BN_num_bits(p) + 1; +	int *arr=NULL;  	bn_check_top(a);  	bn_check_top(b);  	bn_check_top(p); -	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; +	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;  	ret = BN_GF2m_poly2arr(p, arr, max);  	if (!ret || ret > max)  		{ @@ -862,7 +801,7 @@ err:   * the result in r.  r could be a.   * Uses exponentiation as in algorithm A.4.1 from IEEE P1363.   */ -int	BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const unsigned int p[], BN_CTX *ctx) +int	BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx)  	{  	int ret = 0;  	BIGNUM *u; @@ -898,11 +837,11 @@ err:  int BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  	{  	int ret = 0; -	const int max = BN_num_bits(p); -	unsigned int *arr=NULL; +	const int max = BN_num_bits(p) + 1; +	int *arr=NULL;  	bn_check_top(a);  	bn_check_top(p); -	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * max)) == NULL) goto err; +	if ((arr = (int *)OPENSSL_malloc(sizeof(int) * max)) == NULL) goto err;  	ret = BN_GF2m_poly2arr(p, arr, max);  	if (!ret || ret > max)  		{ @@ -919,10 +858,9 @@ err:  /* Find r such that r^2 + r = a mod p.  r could be a. If no r exists returns 0.   * Uses algorithms A.4.7 and A.4.6 from IEEE P1363.   */ -int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const unsigned int p[], BN_CTX *ctx) +int BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], BN_CTX *ctx)  	{ -	int ret = 0, count = 0; -	unsigned int j; +	int ret = 0, count = 0, j;  	BIGNUM *a, *z, *rho, *w, *w2, *tmp;  	bn_check_top(a_); @@ -1017,11 +955,11 @@ err:  int BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)  	{  	int ret = 0; -	const int max = BN_num_bits(p); -	unsigned int *arr=NULL; +	const int max = BN_num_bits(p) + 1; +	int *arr=NULL;  	bn_check_top(a);  	bn_check_top(p); -	if ((arr = (unsigned int *)OPENSSL_malloc(sizeof(unsigned int) * +	if ((arr = (int *)OPENSSL_malloc(sizeof(int) *  						max)) == NULL) goto err;  	ret = BN_GF2m_poly2arr(p, arr, max);  	if (!ret || ret > max) @@ -1037,20 +975,17 @@ err:  	}  /* Convert the bit-string representation of a polynomial - * ( \sum_{i=0}^n a_i * x^i , where a_0 is *not* zero) into an array - * of integers corresponding to the bits with non-zero coefficient. + * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding  + * to the bits with non-zero coefficient.  Array is terminated with -1.   * Up to max elements of the array will be filled.  Return value is total - * number of coefficients that would be extracted if array was large enough. + * number of array elements that would be filled if array was large enough.   */ -int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max) +int BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max)  	{  	int i, j, k = 0;  	BN_ULONG mask; -	if (BN_is_zero(a) || !BN_is_bit_set(a, 0)) -		/* a_0 == 0 => return error (the unsigned int array -		 * must be terminated by 0) -		 */ +	if (BN_is_zero(a))  		return 0;  	for (i = a->top - 1; i >= 0; i--) @@ -1070,24 +1005,28 @@ int BN_GF2m_poly2arr(const BIGNUM *a, unsigned int p[], int max)  			}  		} +	if (k < max) { +		p[k] = -1; +		k++; +	} +  	return k;  	}  /* Convert the coefficient array representation of a polynomial to a  - * bit-string.  The array must be terminated by 0. + * bit-string.  The array must be terminated by -1.   */ -int BN_GF2m_arr2poly(const unsigned int p[], BIGNUM *a) +int BN_GF2m_arr2poly(const int p[], BIGNUM *a)  	{  	int i;  	bn_check_top(a);  	BN_zero(a); -	for (i = 0; p[i] != 0; i++) +	for (i = 0; p[i] != -1; i++)  		{  		if (BN_set_bit(a, p[i]) == 0)  			return 0;  		} -	BN_set_bit(a, 0);  	bn_check_top(a);  	return 1; diff --git a/openssl/crypto/bn/bn_lcl.h b/openssl/crypto/bn/bn_lcl.h index 27ac4397a..8e5e98e3f 100644 --- a/openssl/crypto/bn/bn_lcl.h +++ b/openssl/crypto/bn/bn_lcl.h @@ -255,7 +255,8 @@ extern "C" {  	     : "r"(a), "r"(b));		\  	ret;			})  #  endif	/* compiler */ -# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) +# elif (defined(__x86_64) || defined(__x86_64__)) && \ +       (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))  #  if defined(__GNUC__)  #   define BN_UMULT_HIGH(a,b)	({	\  	register BN_ULONG ret,discard;	\ diff --git a/openssl/crypto/bn/bn_lib.c b/openssl/crypto/bn/bn_lib.c index 32a8fbaf5..5470fbe6e 100644 --- a/openssl/crypto/bn/bn_lib.c +++ b/openssl/crypto/bn/bn_lib.c @@ -133,15 +133,34 @@ int BN_get_params(int which)  const BIGNUM *BN_value_one(void)  	{ -	static BN_ULONG data_one=1L; -	static BIGNUM const_one={&data_one,1,1,0,BN_FLG_STATIC_DATA}; +	static const BN_ULONG data_one=1L; +	static const BIGNUM const_one={(BN_ULONG *)&data_one,1,1,0,BN_FLG_STATIC_DATA};  	return(&const_one);  	} +char *BN_options(void) +	{ +	static int init=0; +	static char data[16]; + +	if (!init) +		{ +		init++; +#ifdef BN_LLONG +		BIO_snprintf(data,sizeof data,"bn(%d,%d)", +			     (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); +#else +		BIO_snprintf(data,sizeof data,"bn(%d,%d)", +			     (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); +#endif +		} +	return(data); +	} +  int BN_num_bits_word(BN_ULONG l)  	{ -	static const char bits[256]={ +	static const unsigned char bits[256]={  		0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,  		5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,  		6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, @@ -216,7 +235,7 @@ int BN_num_bits_word(BN_ULONG l)  		else  #endif  			{ -#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG) +#if defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)  			if (l & 0xff00L)  				return(bits[(int)(l>>8)]+8);  			else	 @@ -744,7 +763,7 @@ int BN_is_bit_set(const BIGNUM *a, int n)  	i=n/BN_BITS2;  	j=n%BN_BITS2;  	if (a->top <= i) return 0; -	return(((a->d[i])>>j)&((BN_ULONG)1)); +	return (int)(((a->d[i])>>j)&((BN_ULONG)1));  	}  int BN_mask_bits(BIGNUM *a, int n) diff --git a/openssl/crypto/bn/bn_mont.c b/openssl/crypto/bn/bn_mont.c index 4799b152d..7224637ab 100644 --- a/openssl/crypto/bn/bn_mont.c +++ b/openssl/crypto/bn/bn_mont.c @@ -122,26 +122,10 @@  #define MONT_WORD /* use the faster word-based algorithm */ -#if defined(MONT_WORD) && defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) -/* This condition means we have a specific non-default build: - * In the 0.9.8 branch, OPENSSL_BN_ASM_MONT is normally not set for any - * BN_BITS2<=32 platform; an explicit "enable-montasm" is required. - * I.e., if we are here, the user intentionally deviates from the - * normal stable build to get better Montgomery performance from - * the 0.9.9-dev backport. - * - * In this case only, we also enable BN_from_montgomery_word() - * (another non-stable feature from 0.9.9-dev). - */ -#define MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD -#endif - -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD  static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);  #endif - -  int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  			  BN_MONT_CTX *mont, BN_CTX *ctx)  	{ @@ -153,11 +137,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  	if (num>1 && a->top==num && b->top==num)  		{  		if (bn_wexpand(r,num) == NULL) return(0); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */  		if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num)) -#else -		if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,&mont->n0,num)) -#endif  			{  			r->neg = a->neg^b->neg;  			r->top = num; @@ -181,7 +161,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,  		if (!BN_mul(tmp,a,b,ctx)) goto err;  		}  	/* reduce from aRR to aR */ -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD  	if (!BN_from_montgomery_word(r,tmp,mont)) goto err;  #else  	if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err; @@ -193,7 +173,7 @@ err:  	return(ret);  	} -#ifdef MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD +#ifdef MONT_WORD  static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)  	{  	BIGNUM *n; @@ -217,15 +197,15 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)  	nrp= &(r->d[nl]);  	/* clear the top words of T */ +#if 1  	for (i=r->top; i<max; i++) /* memset? XXX */  		r->d[i]=0; +#else +	memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));  +#endif  	r->top=max; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */  	n0=mont->n0[0]; -#else -	n0=mont->n0; -#endif  #ifdef BN_COUNT  	fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); @@ -270,6 +250,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)  		}  	al=r->top-ri; +#define BRANCH_FREE 1 +#if BRANCH_FREE  	if (bn_wexpand(ret,ri) == NULL) return(0);  	x=0-(((al-ri)>>(sizeof(al)*8-1))&1);  	ret->top=x=(ri&~x)|(al&x);	/* min(ri,al) */ @@ -317,164 +299,8 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)  		rp[i]=nrp[i], ap[i]=0;  	bn_correct_top(r);  	bn_correct_top(ret); -	bn_check_top(ret); - -	return(1); -	} - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, -	     BN_CTX *ctx) -	{ -	int retn=0; -	BIGNUM *t; - -	BN_CTX_start(ctx); -	if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) -		retn = BN_from_montgomery_word(ret,t,mont); -	BN_CTX_end(ctx); -	return retn; -	} - -#else /* !MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */ - -int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, -	     BN_CTX *ctx) -	{ -	int retn=0; - -#ifdef MONT_WORD -	BIGNUM *n,*r; -	BN_ULONG *ap,*np,*rp,n0,v,*nrp; -	int al,nl,max,i,x,ri; - -	BN_CTX_start(ctx); -	if ((r = BN_CTX_get(ctx)) == NULL) goto err; - -	if (!BN_copy(r,a)) goto err; -	n= &(mont->N); - -	ap=a->d; -	/* mont->ri is the size of mont->N in bits (rounded up -	   to the word size) */ -	al=ri=mont->ri/BN_BITS2; -	 -	nl=n->top; -	if ((al == 0) || (nl == 0)) { r->top=0; return(1); } - -	max=(nl+al+1); /* allow for overflow (no?) XXX */ -	if (bn_wexpand(r,max) == NULL) goto err; - -	r->neg=a->neg^n->neg; -	np=n->d; -	rp=r->d; -	nrp= &(r->d[nl]); - -	/* clear the top words of T */ -#if 1 -	for (i=r->top; i<max; i++) /* memset? XXX */ -		r->d[i]=0;  #else -	memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));  -#endif - -	r->top=max; -	n0=mont->n0; - -#ifdef BN_COUNT -	fprintf(stderr,"word BN_from_montgomery %d * %d\n",nl,nl); -#endif -	for (i=0; i<nl; i++) -		{ -#ifdef __TANDEM -                { -                   long long t1; -                   long long t2; -                   long long t3; -                   t1 = rp[0] * (n0 & 0177777); -                   t2 = 037777600000l; -                   t2 = n0 & t2; -                   t3 = rp[0] & 0177777; -                   t2 = (t3 * t2) & BN_MASK2; -                   t1 = t1 + t2; -                   v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1); -                } -#else -		v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2); -#endif -		nrp++; -		rp++; -		if (((nrp[-1]+=v)&BN_MASK2) >= v) -			continue; -		else -			{ -			if (((++nrp[0])&BN_MASK2) != 0) continue; -			if (((++nrp[1])&BN_MASK2) != 0) continue; -			for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ; -			} -		} -	bn_correct_top(r); -	 -	/* mont->ri will be a multiple of the word size and below code -	 * is kind of BN_rshift(ret,r,mont->ri) equivalent */ -	if (r->top <= ri) -		{ -		ret->top=0; -		retn=1; -		goto err; -		} -	al=r->top-ri; - -# define BRANCH_FREE 1 -# if BRANCH_FREE -	if (bn_wexpand(ret,ri) == NULL) goto err; -	x=0-(((al-ri)>>(sizeof(al)*8-1))&1); -	ret->top=x=(ri&~x)|(al&x);	/* min(ri,al) */ -	ret->neg=r->neg; - -	rp=ret->d; -	ap=&(r->d[ri]); - -	{ -	size_t m1,m2; - -	v=bn_sub_words(rp,ap,np,ri); -	/* this ----------------^^ works even in al<ri case -	 * thanks to zealous zeroing of top of the vector in the -	 * beginning. */ - -	/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */ -	/* in other words if subtraction result is real, then -	 * trick unconditional memcpy below to perform in-place -	 * "refresh" instead of actual copy. */ -	m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1);	/* al<ri */ -	m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1);	/* al>ri */ -	m1|=m2;			/* (al!=ri) */ -	m1|=(0-(size_t)v);	/* (al!=ri || v) */ -	m1&=~m2;		/* (al!=ri || v) && !al>ri */ -	nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1)); -	} - -	/* 'i<ri' is chosen to eliminate dependency on input data, even -	 * though it results in redundant copy in al<ri case. */ -	for (i=0,ri-=4; i<ri; i+=4) -		{ -		BN_ULONG t1,t2,t3,t4; -		 -		t1=nrp[i+0]; -		t2=nrp[i+1]; -		t3=nrp[i+2];	ap[i+0]=0; -		t4=nrp[i+3];	ap[i+1]=0; -		rp[i+0]=t1;	ap[i+2]=0; -		rp[i+1]=t2;	ap[i+3]=0; -		rp[i+2]=t3; -		rp[i+3]=t4; -		} -	for (ri+=4; i<ri; i++) -		rp[i]=nrp[i], ap[i]=0; -	bn_correct_top(r); -	bn_correct_top(ret); -# else -	if (bn_wexpand(ret,al) == NULL) goto err; +	if (bn_wexpand(ret,al) == NULL) return(0);  	ret->top=al;  	ret->neg=r->neg; @@ -497,8 +323,30 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,  	al+=4;  	for (; i<al; i++)  		rp[i]=ap[i]; -# endif -#else /* !MONT_WORD */  + +	if (BN_ucmp(ret, &(mont->N)) >= 0) +		{ +		if (!BN_usub(ret,ret,&(mont->N))) return(0); +		} +#endif +	bn_check_top(ret); + +	return(1); +	} +#endif	/* MONT_WORD */ + +int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, +	     BN_CTX *ctx) +	{ +	int retn=0; +#ifdef MONT_WORD +	BIGNUM *t; + +	BN_CTX_start(ctx); +	if ((t = BN_CTX_get(ctx)) && BN_copy(t,a)) +		retn = BN_from_montgomery_word(ret,t,mont); +	BN_CTX_end(ctx); +#else /* !MONT_WORD */  	BIGNUM *t1,*t2;  	BN_CTX_start(ctx); @@ -515,21 +363,18 @@ int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,  	if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;  	if (!BN_add(t2,a,t1)) goto err;  	if (!BN_rshift(ret,t2,mont->ri)) goto err; -#endif /* MONT_WORD */ -#if !defined(BRANCH_FREE) || BRANCH_FREE==0  	if (BN_ucmp(ret, &(mont->N)) >= 0)  		{  		if (!BN_usub(ret,ret,&(mont->N))) goto err;  		} -#endif  	retn=1;  	bn_check_top(ret);   err:  	BN_CTX_end(ctx); +#endif /* MONT_WORD */  	return(retn);  	} -#endif /* MONT_FROM_WORD___NON_DEFAULT_0_9_8_BUILD */  BN_MONT_CTX *BN_MONT_CTX_new(void)  	{ @@ -549,11 +394,7 @@ void BN_MONT_CTX_init(BN_MONT_CTX *ctx)  	BN_init(&(ctx->RR));  	BN_init(&(ctx->N));  	BN_init(&(ctx->Ni)); -#if 0 /* for OpenSSL 0.9.9 mont->n0 */  	ctx->n0[0] = ctx->n0[1] = 0; -#else -	ctx->n0 = 0; -#endif  	ctx->flags=0;  	} @@ -585,26 +426,22 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)  		BIGNUM tmod;  		BN_ULONG buf[2]; -		mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; -		BN_zero(R); -#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)", -         only certain BN_BITS2<=32 platforms actually need this */ -		if (!(BN_set_bit(R,2*BN_BITS2))) goto err;	/* R */ -#else -		if (!(BN_set_bit(R,BN_BITS2))) goto err;	/* R */ -#endif - -		buf[0]=mod->d[0]; /* tmod = N mod word size */ -		buf[1]=0; -  		BN_init(&tmod);  		tmod.d=buf; -		tmod.top = buf[0] != 0 ? 1 : 0;  		tmod.dmax=2;  		tmod.neg=0; -#if 0 /* for OpenSSL 0.9.9 mont->n0, would be "#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)"; -         only certain BN_BITS2<=32 platforms actually need this */ +		mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; + +#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) +		/* Only certain BN_BITS2<=32 platforms actually make use of +		 * n0[1], and we could use the #else case (with a shorter R +		 * value) for the others.  However, currently only the assembler +		 * files do know which is which. */ + +		BN_zero(R); +		if (!(BN_set_bit(R,2*BN_BITS2))) goto err; +  								tmod.top=0;  		if ((buf[0] = mod->d[0]))			tmod.top=1;  		if ((buf[1] = mod->top>1 ? mod->d[1] : 0))	tmod.top=2; @@ -632,6 +469,12 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)  		mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;  		mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;  #else +		BN_zero(R); +		if (!(BN_set_bit(R,BN_BITS2))) goto err;	/* R */ + +		buf[0]=mod->d[0]; /* tmod = N mod word size */ +		buf[1]=0; +		tmod.top = buf[0] != 0 ? 1 : 0;  							/* Ri = R^-1 mod N*/  		if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)  			goto err; @@ -647,12 +490,8 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)  		if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;  		/* Ni = (R*Ri-1)/N,  		 * keep only least significant word: */ -# if 0 /* for OpenSSL 0.9.9 mont->n0 */  		mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;  		mont->n0[1] = 0; -# else -		mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; -# endif  #endif  		}  #else /* !MONT_WORD */ @@ -689,12 +528,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)  	if (!BN_copy(&(to->N),&(from->N))) return NULL;  	if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;  	to->ri=from->ri; -#if 0 /* for OpenSSL 0.9.9 mont->n0 */  	to->n0[0]=from->n0[0];  	to->n0[1]=from->n0[1]; -#else -	to->n0=from->n0; -#endif  	return(to);  	} diff --git a/openssl/crypto/bn/bn_mul.c b/openssl/crypto/bn/bn_mul.c index b848c8cc6..a0e9ec3b4 100644 --- a/openssl/crypto/bn/bn_mul.c +++ b/openssl/crypto/bn/bn_mul.c @@ -1028,17 +1028,19 @@ int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)  			assert(j <= al || j <= bl);  			k = j+j;  			t = BN_CTX_get(ctx); +			if (t == NULL) +				goto err;  			if (al > j || bl > j)  				{ -				bn_wexpand(t,k*4); -				bn_wexpand(rr,k*4); +				if (bn_wexpand(t,k*4) == NULL) goto err; +				if (bn_wexpand(rr,k*4) == NULL) goto err;  				bn_mul_part_recursive(rr->d,a->d,b->d,  					j,al-j,bl-j,t->d);  				}  			else	/* al <= j || bl <= j */  				{ -				bn_wexpand(t,k*2); -				bn_wexpand(rr,k*2); +				if (bn_wexpand(t,k*2) == NULL) goto err; +				if (bn_wexpand(rr,k*2) == NULL) goto err;  				bn_mul_recursive(rr->d,a->d,b->d,  					j,al-j,bl-j,t->d);  				} diff --git a/openssl/crypto/bn/bn_opt.c b/openssl/crypto/bn/bn_opt.c deleted file mode 100644 index 21cbb38f6..000000000 --- a/openssl/crypto/bn/bn_opt.c +++ /dev/null @@ -1,87 +0,0 @@ -/* crypto/bn/bn_opt.c */ -/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) - * All rights reserved. - * - * This package is an SSL implementation written - * by Eric Young (eay@cryptsoft.com). - * The implementation was written so as to conform with Netscapes SSL. - *  - * This library is free for commercial and non-commercial use as long as - * the following conditions are aheared to.  The following conditions - * apply to all code found in this distribution, be it the RC4, RSA, - * lhash, DES, etc., code; not just the SSL code.  The SSL documentation - * included with this distribution is covered by the same copyright terms - * except that the holder is Tim Hudson (tjh@cryptsoft.com). - *  - * Copyright remains Eric Young's, and as such any Copyright notices in - * the code are not to be removed. - * If this package is used in a product, Eric Young should be given attribution - * as the author of the parts of the library used. - * This can be in the form of a textual message at program startup or - * in documentation (online or textual) provided with the package. - *  - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the copyright - *    notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - *    notice, this list of conditions and the following disclaimer in the - *    documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - *    must display the following acknowledgement: - *    "This product includes cryptographic software written by - *     Eric Young (eay@cryptsoft.com)" - *    The word 'cryptographic' can be left out if the rouines from the library - *    being used are not cryptographic related :-). - * 4. If you include any Windows specific code (or a derivative thereof) from  - *    the apps directory (application code) you must include an acknowledgement: - *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" - *  - * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - *  - * The licence and distribution terms for any publically available version or - * derivative of this code cannot be changed.  i.e. this code cannot simply be - * copied and put under another distribution licence - * [including the GNU Public Licence.] - */ - -#ifndef BN_DEBUG -# undef NDEBUG /* avoid conflicting definitions */ -# define NDEBUG -#endif - -#include <assert.h> -#include <limits.h> -#include <stdio.h> -#include "cryptlib.h" -#include "bn_lcl.h" - -char *BN_options(void) -	{ -	static int init=0; -	static char data[16]; - -	if (!init) -		{ -		init++; -#ifdef BN_LLONG -		BIO_snprintf(data,sizeof data,"bn(%d,%d)", -			     (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8); -#else -		BIO_snprintf(data,sizeof data,"bn(%d,%d)", -			     (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8); -#endif -		} -	return(data); -	} diff --git a/openssl/crypto/bn/bn_print.c b/openssl/crypto/bn/bn_print.c index 810dde34e..bebb466d0 100644 --- a/openssl/crypto/bn/bn_print.c +++ b/openssl/crypto/bn/bn_print.c @@ -294,6 +294,27 @@ err:  	return(0);  	} +int BN_asc2bn(BIGNUM **bn, const char *a) +	{ +	const char *p = a; +	if (*p == '-') +		p++; + +	if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) +		{		 +		if (!BN_hex2bn(bn, p + 2)) +			return 0; +		} +	else +		{ +		if (!BN_dec2bn(bn, p)) +			return 0; +		} +	if (*a == '-') +		(*bn)->neg = 1; +	return 1; +	} +  #ifndef OPENSSL_NO_BIO  #ifndef OPENSSL_NO_FP_API  int BN_print_fp(FILE *fp, const BIGNUM *a) diff --git a/openssl/crypto/bn/bn_x931p.c b/openssl/crypto/bn/bn_x931p.c deleted file mode 100644 index 04c5c874e..000000000 --- a/openssl/crypto/bn/bn_x931p.c +++ /dev/null @@ -1,272 +0,0 @@ -/* bn_x931p.c */ -/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL - * project 2005. - */ -/* ==================================================================== - * Copyright (c) 2005 The OpenSSL Project.  All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - *    notice, this list of conditions and the following disclaimer.  - * - * 2. Redistributions in binary form must reproduce the above copyright - *    notice, this list of conditions and the following disclaimer in - *    the documentation and/or other materials provided with the - *    distribution. - * - * 3. All advertising materials mentioning features or use of this - *    software must display the following acknowledgment: - *    "This product includes software developed by the OpenSSL Project - *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" - * - * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to - *    endorse or promote products derived from this software without - *    prior written permission. For written permission, please contact - *    licensing@OpenSSL.org. - * - * 5. Products derived from this software may not be called "OpenSSL" - *    nor may "OpenSSL" appear in their names without prior written - *    permission of the OpenSSL Project. - * - * 6. Redistributions of any form whatsoever must retain the following - *    acknowledgment: - *    "This product includes software developed by the OpenSSL Project - *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" - * - * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY - * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, - * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - * ==================================================================== - * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com).  This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * - */ - -#include <stdio.h> -#include <openssl/bn.h> - -/* X9.31 routines for prime derivation */ - -/* X9.31 prime derivation. This is used to generate the primes pi - * (p1, p2, q1, q2) from a parameter Xpi by checking successive odd - * integers. - */ - -static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx, -			BN_GENCB *cb) -	{ -	int i = 0; -	if (!BN_copy(pi, Xpi)) -		return 0; -	if (!BN_is_odd(pi) && !BN_add_word(pi, 1)) -		return 0; -	for(;;) -		{ -		i++; -		BN_GENCB_call(cb, 0, i); -		/* NB 27 MR is specificed in X9.31 */ -		if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb)) -			break; -		if (!BN_add_word(pi, 2)) -			return 0; -		} -	BN_GENCB_call(cb, 2, i); -	return 1; -	} - -/* This is the main X9.31 prime derivation function. From parameters - * Xp1, Xp2 and Xp derive the prime p. If the parameters p1 or p2 are - * not NULL they will be returned too: this is needed for testing. - */ - -int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, -			const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2, -			const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb) -	{ -	int ret = 0; - -	BIGNUM *t, *p1p2, *pm1; - -	/* Only even e supported */ -	if (!BN_is_odd(e)) -		return 0; - -	BN_CTX_start(ctx); -	if (!p1) -		p1 = BN_CTX_get(ctx); - -	if (!p2) -		p2 = BN_CTX_get(ctx); - -	t = BN_CTX_get(ctx); - -	p1p2 = BN_CTX_get(ctx); - -	pm1 = BN_CTX_get(ctx); - -	if (!bn_x931_derive_pi(p1, Xp1, ctx, cb)) -		goto err; - -	if (!bn_x931_derive_pi(p2, Xp2, ctx, cb)) -		goto err; - -	if (!BN_mul(p1p2, p1, p2, ctx)) -		goto err; - -	/* First set p to value of Rp */ - -	if (!BN_mod_inverse(p, p2, p1, ctx)) -		goto err; - -	if (!BN_mul(p, p, p2, ctx)) -		goto err; - -	if (!BN_mod_inverse(t, p1, p2, ctx)) -		goto err; - -	if (!BN_mul(t, t, p1, ctx)) -		goto err; - -	if (!BN_sub(p, p, t)) -		goto err; - -	if (p->neg && !BN_add(p, p, p1p2)) -		goto err; - -	/* p now equals Rp */ - -	if (!BN_mod_sub(p, p, Xp, p1p2, ctx)) -		goto err; - -	if (!BN_add(p, p, Xp)) -		goto err; - -	/* p now equals Yp0 */ - -	for (;;) -		{ -		int i = 1; -		BN_GENCB_call(cb, 0, i++); -		if (!BN_copy(pm1, p)) -			goto err; -		if (!BN_sub_word(pm1, 1)) -			goto err; -		if (!BN_gcd(t, pm1, e, ctx)) -			goto err; -		if (BN_is_one(t) -		/* X9.31 specifies 8 MR and 1 Lucas test or any prime test -		 * offering similar or better guarantees 50 MR is considerably  -		 * better. -		 */ -			&& BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb)) -			break; -		if (!BN_add(p, p, p1p2)) -			goto err; -		} - -	BN_GENCB_call(cb, 3, 0); - -	ret = 1; - -	err: - -	BN_CTX_end(ctx); - -	return ret; -	} - -/* Generate pair of paramters Xp, Xq for X9.31 prime generation. - * Note: nbits paramter is sum of number of bits in both. - */ - -int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) -	{ -	BIGNUM *t; -	int i; -	/* Number of bits for each prime is of the form -	 * 512+128s for s = 0, 1, ... -	 */ -	if ((nbits < 1024) || (nbits & 0xff)) -		return 0; -	nbits >>= 1; -	/* The random value Xp must be between sqrt(2) * 2^(nbits-1) and -	 * 2^nbits - 1. By setting the top two bits we ensure that the lower -	 * bound is exceeded. -	 */ -	if (!BN_rand(Xp, nbits, 1, 0)) -		return 0; - -	BN_CTX_start(ctx); -	t = BN_CTX_get(ctx); - -	for (i = 0; i < 1000; i++) -		{ -		if (!BN_rand(Xq, nbits, 1, 0)) -			return 0; -		/* Check that |Xp - Xq| > 2^(nbits - 100) */ -		BN_sub(t, Xp, Xq); -		if (BN_num_bits(t) > (nbits - 100)) -			break; -		} - -	BN_CTX_end(ctx); - -	if (i < 1000) -		return 1; - -	return 0; - -	} - -/* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1 - * and Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL - * the relevant parameter will be stored in it. - * - * Due to the fact that |Xp - Xq| > 2^(nbits - 100) must be satisfied Xp and Xq - * are generated using the previous function and supplied as input. - */ - -int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2, -			BIGNUM *Xp1, BIGNUM *Xp2, -			const BIGNUM *Xp, -			const BIGNUM *e, BN_CTX *ctx, -			BN_GENCB *cb) -	{ -	int ret = 0; - -	BN_CTX_start(ctx); -	if (!Xp1) -		Xp1 = BN_CTX_get(ctx); -	if (!Xp2) -		Xp2 = BN_CTX_get(ctx); - -	if (!BN_rand(Xp1, 101, 0, 0)) -		goto error; -	if (!BN_rand(Xp2, 101, 0, 0)) -		goto error; -	if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb)) -		goto error; - -	ret = 1; - -	error: -	BN_CTX_end(ctx); - -	return ret; - -	} - diff --git a/openssl/crypto/bn/bntest.c b/openssl/crypto/bn/bntest.c index cf190380f..0cd99c5b4 100644 --- a/openssl/crypto/bn/bntest.c +++ b/openssl/crypto/bn/bntest.c @@ -486,7 +486,7 @@ static void print_word(BIO *bp,BN_ULONG w)  		return;  		}  #endif -	BIO_printf(bp,"%lX",w); +	BIO_printf(bp,BN_HEX_FMT1,w);  	}  int test_div_word(BIO *bp) @@ -732,6 +732,8 @@ int test_mont(BIO *bp, BN_CTX *ctx)  	BN_init(&n);  	mont=BN_MONT_CTX_new(); +	if (mont == NULL) +		return 0;  	BN_bntest_rand(&a,100,0,0); /**/  	BN_bntest_rand(&b,100,0,0); /**/ @@ -1027,7 +1029,7 @@ int test_exp(BIO *bp, BN_CTX *ctx)  		BN_bntest_rand(a,20+i*5,0,0); /**/  		BN_bntest_rand(b,2+i,0,0); /**/ -		if (!BN_exp(d,a,b,ctx)) +		if (BN_exp(d,a,b,ctx) <= 0)  			return(0);  		if (bp != NULL) @@ -1116,8 +1118,8 @@ int test_gf2m_mod(BIO *bp)  	{  	BIGNUM *a,*b[2],*c,*d,*e;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1174,8 +1176,8 @@ int test_gf2m_mod_mul(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d,*e,*f,*g,*h;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1245,8 +1247,8 @@ int test_gf2m_mod_sqr(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1304,8 +1306,8 @@ int test_gf2m_mod_inv(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1359,8 +1361,8 @@ int test_gf2m_mod_div(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d,*e,*f;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1422,8 +1424,8 @@ int test_gf2m_mod_exp(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d,*e,*f;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1493,8 +1495,8 @@ int test_gf2m_mod_sqrt(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d,*e,*f;  	int i, j, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); @@ -1552,8 +1554,8 @@ int test_gf2m_mod_solve_quad(BIO *bp,BN_CTX *ctx)  	{  	BIGNUM *a,*b[2],*c,*d,*e;  	int i, j, s = 0, t, ret = 0; -	unsigned int p0[] = {163,7,6,3,0}; -	unsigned int p1[] = {193,15,0}; +	int p0[] = {163,7,6,3,0,-1}; +	int p1[] = {193,15,0,-1};  	a=BN_new();  	b[0]=BN_new(); diff --git a/openssl/crypto/bn/exptest.c b/openssl/crypto/bn/exptest.c index f598a07cf..074a8e882 100644 --- a/openssl/crypto/bn/exptest.c +++ b/openssl/crypto/bn/exptest.c @@ -163,7 +163,7 @@ int main(int argc, char *argv[])  		  	{  			if (BN_cmp(r_simple,r_mont) != 0)  				printf("\nsimple and mont results differ\n"); -			if (BN_cmp(r_simple,r_mont) != 0) +			if (BN_cmp(r_simple,r_mont_const) != 0)  				printf("\nsimple and mont const time results differ\n");  			if (BN_cmp(r_simple,r_recp) != 0)  				printf("\nsimple and recp results differ\n"); @@ -187,7 +187,7 @@ int main(int argc, char *argv[])  	BN_free(b);  	BN_free(m);  	BN_CTX_free(ctx); -	ERR_remove_state(0); +	ERR_remove_thread_state(NULL);  	CRYPTO_mem_leaks(out);  	BIO_free(out);  	printf(" done\n"); | 
