aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/bn/asm/bn-586.pl
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2009-06-28 22:07:26 +0000
committermarha <marha@users.sourceforge.net>2009-06-28 22:07:26 +0000
commit3562e78743202e43aec8727005182a2558117eca (patch)
tree8f9113a77d12470c5c851a2a8e4cb02e89df7d43 /openssl/crypto/bn/asm/bn-586.pl
downloadvcxsrv-3562e78743202e43aec8727005182a2558117eca.tar.gz
vcxsrv-3562e78743202e43aec8727005182a2558117eca.tar.bz2
vcxsrv-3562e78743202e43aec8727005182a2558117eca.zip
Checked in the following released items:
xkeyboard-config-1.4.tar.gz ttf-bitstream-vera-1.10.tar.gz font-alias-1.0.1.tar.gz font-sun-misc-1.0.0.tar.gz font-sun-misc-1.0.0.tar.gz font-sony-misc-1.0.0.tar.gz font-schumacher-misc-1.0.0.tar.gz font-mutt-misc-1.0.0.tar.gz font-misc-misc-1.0.0.tar.gz font-misc-meltho-1.0.0.tar.gz font-micro-misc-1.0.0.tar.gz font-jis-misc-1.0.0.tar.gz font-isas-misc-1.0.0.tar.gz font-dec-misc-1.0.0.tar.gz font-daewoo-misc-1.0.0.tar.gz font-cursor-misc-1.0.0.tar.gz font-arabic-misc-1.0.0.tar.gz font-winitzki-cyrillic-1.0.0.tar.gz font-misc-cyrillic-1.0.0.tar.gz font-cronyx-cyrillic-1.0.0.tar.gz font-screen-cyrillic-1.0.1.tar.gz font-xfree86-type1-1.0.1.tar.gz font-adobe-utopia-type1-1.0.1.tar.gz font-ibm-type1-1.0.0.tar.gz font-bitstream-type1-1.0.0.tar.gz font-bitstream-speedo-1.0.0.tar.gz font-bh-ttf-1.0.0.tar.gz font-bh-type1-1.0.0.tar.gz font-bitstream-100dpi-1.0.0.tar.gz font-bh-lucidatypewriter-100dpi-1.0.0.tar.gz font-bh-100dpi-1.0.0.tar.gz font-adobe-utopia-100dpi-1.0.1.tar.gz font-adobe-100dpi-1.0.0.tar.gz font-util-1.0.1.tar.gz font-bitstream-75dpi-1.0.0.tar.gz font-bh-lucidatypewriter-75dpi-1.0.0.tar.gz font-adobe-utopia-75dpi-1.0.1.tar.gz font-bh-75dpi-1.0.0.tar.gz bdftopcf-1.0.1.tar.gz font-adobe-75dpi-1.0.0.tar.gz mkfontscale-1.0.6.tar.gz openssl-0.9.8k.tar.gz bigreqsproto-1.0.2.tar.gz xtrans-1.2.2.tar.gz resourceproto-1.0.2.tar.gz inputproto-1.4.4.tar.gz compositeproto-0.4.tar.gz damageproto-1.1.0.tar.gz zlib-1.2.3.tar.gz xkbcomp-1.0.5.tar.gz freetype-2.3.9.tar.gz pthreads-w32-2-8-0-release.tar.gz pixman-0.12.0.tar.gz kbproto-1.0.3.tar.gz evieext-1.0.2.tar.gz fixesproto-4.0.tar.gz recordproto-1.13.2.tar.gz randrproto-1.2.2.tar.gz scrnsaverproto-1.1.0.tar.gz renderproto-0.9.3.tar.gz xcmiscproto-1.1.2.tar.gz fontsproto-2.0.2.tar.gz xextproto-7.0.3.tar.gz xproto-7.0.14.tar.gz libXdmcp-1.0.2.tar.gz libxkbfile-1.0.5.tar.gz libfontenc-1.0.4.tar.gz libXfont-1.3.4.tar.gz libX11-1.1.5.tar.gz libXau-1.0.4.tar.gz libxcb-1.1.tar.gz xorg-server-1.5.3.tar.gz
Diffstat (limited to 'openssl/crypto/bn/asm/bn-586.pl')
-rw-r--r--openssl/crypto/bn/asm/bn-586.pl675
1 files changed, 675 insertions, 0 deletions
diff --git a/openssl/crypto/bn/asm/bn-586.pl b/openssl/crypto/bn/asm/bn-586.pl
new file mode 100644
index 000000000..26c2685a7
--- /dev/null
+++ b/openssl/crypto/bn/asm/bn-586.pl
@@ -0,0 +1,675 @@
+#!/usr/local/bin/perl
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],$0);
+
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
+&bn_mul_add_words("bn_mul_add_words");
+&bn_mul_words("bn_mul_words");
+&bn_sqr_words("bn_sqr_words");
+&bn_div_words("bn_div_words");
+&bn_add_words("bn_add_words");
+&bn_sub_words("bn_sub_words");
+&bn_sub_part_words("bn_sub_part_words");
+
+&asm_finish();
+
+sub bn_mul_add_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
+
+ &comment("");
+ $Low="eax";
+ $High="edx";
+ $a="ebx";
+ $w="ebp";
+ $r="edi";
+ $c="esi";
+
+ &xor($c,$c); # clear carry
+ &mov($r,&wparam(0)); #
+
+ &mov("ecx",&wparam(2)); #
+ &mov($a,&wparam(1)); #
+
+ &and("ecx",0xfffffff8); # num / 8
+ &mov($w,&wparam(3)); #
+
+ &push("ecx"); # Up the stack for a tmp variable
+
+ &jz(&label("maw_finish"));
+
+ if ($sse2) {
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt(&DWP(0,"eax"),26);
+ &jnc(&label("maw_loop"));
+
+ &movd("mm0",$w); # mm0 = w
+ &pxor("mm1","mm1"); # mm1 = carry_in
+
+ &set_label("maw_sse2_loop",0);
+ &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
+ &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
+ &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
+ &pmuludq("mm2","mm0"); # mm2 = w*a[0]
+ &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
+ &pmuludq("mm4","mm0"); # mm4 = w*a[1]
+ &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
+ &pmuludq("mm6","mm0"); # mm6 = w*a[2]
+ &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
+ &pmuludq("mm7","mm0"); # mm7 = w*a[3]
+ &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
+ &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
+ &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
+ &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
+ &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
+ &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
+ &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
+ &movd(&DWP(0,$r,"",0),"mm1");
+ &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
+ &pmuludq("mm2","mm0"); # mm2 = w*a[4]
+ &psrlq("mm1",32); # mm1 = carry0
+ &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
+ &pmuludq("mm4","mm0"); # mm4 = w*a[5]
+ &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
+ &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
+ &pmuludq("mm6","mm0"); # mm6 = w*a[6]
+ &movd(&DWP(4,$r,"",0),"mm1");
+ &psrlq("mm1",32); # mm1 = carry1
+ &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
+ &add($a,32);
+ &pmuludq("mm3","mm0"); # mm3 = w*a[7]
+ &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
+ &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
+ &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
+ &movd(&DWP(8,$r,"",0),"mm1");
+ &psrlq("mm1",32); # mm1 = carry2
+ &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
+ &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
+ &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
+ &movd(&DWP(12,$r,"",0),"mm1");
+ &psrlq("mm1",32); # mm1 = carry3
+ &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
+ &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
+ &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
+ &movd(&DWP(16,$r,"",0),"mm1");
+ &psrlq("mm1",32); # mm1 = carry4
+ &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
+ &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
+ &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
+ &movd(&DWP(20,$r,"",0),"mm1");
+ &psrlq("mm1",32); # mm1 = carry5
+ &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
+ &movd(&DWP(24,$r,"",0),"mm1");
+ &psrlq("mm1",32); # mm1 = carry6
+ &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
+ &movd(&DWP(28,$r,"",0),"mm1");
+ &add($r,32);
+ &psrlq("mm1",32); # mm1 = carry_out
+
+ &sub("ecx",8);
+ &jnz(&label("maw_sse2_loop"));
+
+ &movd($c,"mm1"); # c = carry_out
+ &emms();
+
+ &jmp(&label("maw_finish"));
+ }
+
+ &set_label("maw_loop",0);
+
+ &mov(&swtmp(0),"ecx"); #
+
+ for ($i=0; $i<32; $i+=4)
+ {
+ &comment("Round $i");
+
+ &mov("eax",&DWP($i,$a,"",0)); # *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+= *r
+ &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
+ &adc("edx",0); # H(t)+=carry
+ &add("eax",$c); # L(t)+=c
+ &adc("edx",0); # H(t)+=carry
+ &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
+ &mov($c,"edx"); # c= H(t);
+ }
+
+ &comment("");
+ &mov("ecx",&swtmp(0)); #
+ &add($a,32);
+ &add($r,32);
+ &sub("ecx",8);
+ &jnz(&label("maw_loop"));
+
+ &set_label("maw_finish",0);
+ &mov("ecx",&wparam(2)); # get num
+ &and("ecx",7);
+ &jnz(&label("maw_finish2")); # helps branch prediction
+ &jmp(&label("maw_end"));
+
+ &set_label("maw_finish2",1);
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov("eax",&DWP($i*4,$a,"",0));# *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+=c
+ &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
+ &adc("edx",0); # H(t)+=carry
+ &add("eax",$c);
+ &adc("edx",0); # H(t)+=carry
+ &dec("ecx") if ($i != 7-1);
+ &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
+ &mov($c,"edx"); # c= H(t);
+ &jz(&label("maw_end")) if ($i != 7-1);
+ }
+ &set_label("maw_end",0);
+ &mov("eax",$c);
+
+ &pop("ecx"); # clear variable from
+
+ &function_end($name);
+ }
+
+sub bn_mul_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $Low="eax";
+ $High="edx";
+ $a="ebx";
+ $w="ecx";
+ $r="edi";
+ $c="esi";
+ $num="ebp";
+
+ &xor($c,$c); # clear carry
+ &mov($r,&wparam(0)); #
+ &mov($a,&wparam(1)); #
+ &mov($num,&wparam(2)); #
+ &mov($w,&wparam(3)); #
+
+ &and($num,0xfffffff8); # num / 8
+ &jz(&label("mw_finish"));
+
+ &set_label("mw_loop",0);
+ for ($i=0; $i<32; $i+=4)
+ {
+ &comment("Round $i");
+
+ &mov("eax",&DWP($i,$a,"",0)); # *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+=c
+ # XXX
+
+ &adc("edx",0); # H(t)+=carry
+ &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
+
+ &mov($c,"edx"); # c= H(t);
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($r,32);
+ &sub($num,8);
+ &jz(&label("mw_finish"));
+ &jmp(&label("mw_loop"));
+
+ &set_label("mw_finish",0);
+ &mov($num,&wparam(2)); # get num
+ &and($num,7);
+ &jnz(&label("mw_finish2"));
+ &jmp(&label("mw_end"));
+
+ &set_label("mw_finish2",1);
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov("eax",&DWP($i*4,$a,"",0));# *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+=c
+ # XXX
+ &adc("edx",0); # H(t)+=carry
+ &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
+ &mov($c,"edx"); # c= H(t);
+ &dec($num) if ($i != 7-1);
+ &jz(&label("mw_end")) if ($i != 7-1);
+ }
+ &set_label("mw_end",0);
+ &mov("eax",$c);
+
+ &function_end($name);
+ }
+
+sub bn_sqr_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $r="esi";
+ $a="edi";
+ $num="ebx";
+
+ &mov($r,&wparam(0)); #
+ &mov($a,&wparam(1)); #
+ &mov($num,&wparam(2)); #
+
+ &and($num,0xfffffff8); # num / 8
+ &jz(&label("sw_finish"));
+
+ &set_label("sw_loop",0);
+ for ($i=0; $i<32; $i+=4)
+ {
+ &comment("Round $i");
+ &mov("eax",&DWP($i,$a,"",0)); # *a
+ # XXX
+ &mul("eax"); # *a * *a
+ &mov(&DWP($i*2,$r,"",0),"eax"); #
+ &mov(&DWP($i*2+4,$r,"",0),"edx");#
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($r,64);
+ &sub($num,8);
+ &jnz(&label("sw_loop"));
+
+ &set_label("sw_finish",0);
+ &mov($num,&wparam(2)); # get num
+ &and($num,7);
+ &jz(&label("sw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov("eax",&DWP($i*4,$a,"",0)); # *a
+ # XXX
+ &mul("eax"); # *a * *a
+ &mov(&DWP($i*8,$r,"",0),"eax"); #
+ &dec($num) if ($i != 7-1);
+ &mov(&DWP($i*8+4,$r,"",0),"edx");
+ &jz(&label("sw_end")) if ($i != 7-1);
+ }
+ &set_label("sw_end",0);
+
+ &function_end($name);
+ }
+
+sub bn_div_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+ &mov("edx",&wparam(0)); #
+ &mov("eax",&wparam(1)); #
+ &mov("ebx",&wparam(2)); #
+ &div("ebx");
+ &function_end($name);
+ }
+
+sub bn_add_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $a="esi";
+ $b="edi";
+ $c="eax";
+ $r="ebx";
+ $tmp1="ecx";
+ $tmp2="edx";
+ $num="ebp";
+
+ &mov($r,&wparam(0)); # get r
+ &mov($a,&wparam(1)); # get a
+ &mov($b,&wparam(2)); # get b
+ &mov($num,&wparam(3)); # get num
+ &xor($c,$c); # clear carry
+ &and($num,0xfffffff8); # num / 8
+
+ &jz(&label("aw_finish"));
+
+ &set_label("aw_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &add($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &add($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("aw_loop"));
+
+ &set_label("aw_finish",0);
+ &mov($num,&wparam(3)); # get num
+ &and($num,7);
+ &jz(&label("aw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+ &add($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &add($tmp1,$tmp2);
+ &adc($c,0);
+ &dec($num) if ($i != 6);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &jz(&label("aw_end")) if ($i != 6);
+ }
+ &set_label("aw_end",0);
+
+# &mov("eax",$c); # $c is "eax"
+
+ &function_end($name);
+ }
+
+sub bn_sub_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $a="esi";
+ $b="edi";
+ $c="eax";
+ $r="ebx";
+ $tmp1="ecx";
+ $tmp2="edx";
+ $num="ebp";
+
+ &mov($r,&wparam(0)); # get r
+ &mov($a,&wparam(1)); # get a
+ &mov($b,&wparam(2)); # get b
+ &mov($num,&wparam(3)); # get num
+ &xor($c,$c); # clear carry
+ &and($num,0xfffffff8); # num / 8
+
+ &jz(&label("aw_finish"));
+
+ &set_label("aw_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("aw_loop"));
+
+ &set_label("aw_finish",0);
+ &mov($num,&wparam(3)); # get num
+ &and($num,7);
+ &jz(&label("aw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &dec($num) if ($i != 6);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &jz(&label("aw_end")) if ($i != 6);
+ }
+ &set_label("aw_end",0);
+
+# &mov("eax",$c); # $c is "eax"
+
+ &function_end($name);
+ }
+
+sub bn_sub_part_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $a="esi";
+ $b="edi";
+ $c="eax";
+ $r="ebx";
+ $tmp1="ecx";
+ $tmp2="edx";
+ $num="ebp";
+
+ &mov($r,&wparam(0)); # get r
+ &mov($a,&wparam(1)); # get a
+ &mov($b,&wparam(2)); # get b
+ &mov($num,&wparam(3)); # get num
+ &xor($c,$c); # clear carry
+ &and($num,0xfffffff8); # num / 8
+
+ &jz(&label("aw_finish"));
+
+ &set_label("aw_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("aw_loop"));
+
+ &set_label("aw_finish",0);
+ &mov($num,&wparam(3)); # get num
+ &and($num,7);
+ &jz(&label("aw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov($tmp1,&DWP(0,$a,"",0)); # *a
+ &mov($tmp2,&DWP(0,$b,"",0));# *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP(0,$r,"",0),$tmp1); # *r
+ &add($a, 4);
+ &add($b, 4);
+ &add($r, 4);
+ &dec($num) if ($i != 6);
+ &jz(&label("aw_end")) if ($i != 6);
+ }
+ &set_label("aw_end",0);
+
+ &cmp(&wparam(4),0);
+ &je(&label("pw_end"));
+
+ &mov($num,&wparam(4)); # get dl
+ &cmp($num,0);
+ &je(&label("pw_end"));
+ &jge(&label("pw_pos"));
+
+ &comment("pw_neg");
+ &mov($tmp2,0);
+ &sub($tmp2,$num);
+ &mov($num,$tmp2);
+ &and($num,0xfffffff8); # num / 8
+ &jz(&label("pw_neg_finish"));
+
+ &set_label("pw_neg_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("dl<0 Round $i");
+
+ &mov($tmp1,0);
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("pw_neg_loop"));
+
+ &set_label("pw_neg_finish",0);
+ &mov($tmp2,&wparam(4)); # get dl
+ &mov($num,0);
+ &sub($num,$tmp2);
+ &and($num,7);
+ &jz(&label("pw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("dl<0 Tail Round $i");
+ &mov($tmp1,0);
+ &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &dec($num) if ($i != 6);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &jz(&label("pw_end")) if ($i != 6);
+ }
+
+ &jmp(&label("pw_end"));
+
+ &set_label("pw_pos",0);
+
+ &and($num,0xfffffff8); # num / 8
+ &jz(&label("pw_pos_finish"));
+
+ &set_label("pw_pos_loop",0);
+
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("dl>0 Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &sub($tmp1,$c);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &jnc(&label("pw_nc".$i));
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("pw_pos_loop"));
+
+ &set_label("pw_pos_finish",0);
+ &mov($num,&wparam(4)); # get dl
+ &and($num,7);
+ &jz(&label("pw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("dl>0 Tail Round $i");
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &sub($tmp1,$c);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &jnc(&label("pw_tail_nc".$i));
+ &dec($num) if ($i != 6);
+ &jz(&label("pw_end")) if ($i != 6);
+ }
+ &mov($c,1);
+ &jmp(&label("pw_end"));
+
+ &set_label("pw_nc_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &set_label("pw_nc".$i,0);
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("pw_nc_loop"));
+
+ &mov($num,&wparam(4)); # get dl
+ &and($num,7);
+ &jz(&label("pw_nc_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ &set_label("pw_tail_nc".$i,0);
+ &dec($num) if ($i != 6);
+ &jz(&label("pw_nc_end")) if ($i != 6);
+ }
+
+ &set_label("pw_nc_end",0);
+ &mov($c,0);
+
+ &set_label("pw_end",0);
+
+# &mov("eax",$c); # $c is "eax"
+
+ &function_end($name);
+ }
+