diff options
Diffstat (limited to 'openssl/crypto/whrlpool/asm')
-rw-r--r-- | openssl/crypto/whrlpool/asm/wp-mmx.pl | 42 | ||||
-rw-r--r-- | openssl/crypto/whrlpool/asm/wp-x86_64.pl | 43 |
2 files changed, 45 insertions, 40 deletions
diff --git a/openssl/crypto/whrlpool/asm/wp-mmx.pl b/openssl/crypto/whrlpool/asm/wp-mmx.pl index cb2381c22..c584e5b92 100644 --- a/openssl/crypto/whrlpool/asm/wp-mmx.pl +++ b/openssl/crypto/whrlpool/asm/wp-mmx.pl @@ -118,34 +118,36 @@ $tbl="ebp"; &movq (@mm[0],&QWP(2048*$SCALE,$tbl,"esi",8)); # rc[r] &mov ("eax",&DWP(0,"esp")); &mov ("ebx",&DWP(4,"esp")); + &movz ("ecx",&LB("eax")); + &movz ("edx",&HB("eax")); for($i=0;$i<8;$i++) { my $func = ($i==0)? \&movq : \&pxor; - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &$func (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(($i+1)*8,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &$func (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &$func (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &$func (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &$func (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(($i+1)*8+4,"esp")); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &$func (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &$func (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); @@ -154,32 +156,32 @@ for($i=0;$i<8;$i++) { for($i=0;$i<8;$i++) { &movq(&QWP($i*8,"esp"),@mm[$i]); } # K=L for($i=0;$i<8;$i++) { - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); + &shr ("eax",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); - &shr ("eax",16); + &movz ("edx",&HB("eax")); &pxor (@mm[0],&QWP(&row(0),$tbl,"esi",8)); &pxor (@mm[1],&QWP(&row(1),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("eax")); - &movb (&LB("edx"),&HB("eax")); &mov ("eax",&DWP(64+($i+1)*8,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); + &movz ("edx",&HB("ebx")); &pxor (@mm[2],&QWP(&row(2),$tbl,"esi",8)); &pxor (@mm[3],&QWP(&row(3),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); + &shr ("ebx",16); &scale ("esi","ecx"); + &movz ("ecx",&LB("ebx")); &scale ("edi","edx"); - &shr ("ebx",16); + &movz ("edx",&HB("ebx")); &pxor (@mm[4],&QWP(&row(4),$tbl,"esi",8)); &pxor (@mm[5],&QWP(&row(5),$tbl,"edi",8)); - &movb (&LB("ecx"),&LB("ebx")); - &movb (&LB("edx"),&HB("ebx")); &mov ("ebx",&DWP(64+($i+1)*8+4,"esp")) if ($i<7); &scale ("esi","ecx"); + &movz ("ecx",&LB("eax")); &scale ("edi","edx"); + &movz ("edx",&HB("eax")); &pxor (@mm[6],&QWP(&row(6),$tbl,"esi",8)); &pxor (@mm[7],&QWP(&row(7),$tbl,"edi",8)); push(@mm,shift(@mm)); diff --git a/openssl/crypto/whrlpool/asm/wp-x86_64.pl b/openssl/crypto/whrlpool/asm/wp-x86_64.pl index 24b2ff60c..5a3bdbcf2 100644 --- a/openssl/crypto/whrlpool/asm/wp-x86_64.pl +++ b/openssl/crypto/whrlpool/asm/wp-x86_64.pl @@ -91,41 +91,44 @@ for($i=0;$i<8;$i++) { $code.="mov @mm[$i],64+$i*8(%rsp)\n"; } # S=L $code.=<<___; xor %rsi,%rsi mov %rsi,24(%rbx) # zero round counter + jmp .Lround .align 16 .Lround: mov 4096(%rbp,%rsi,8),@mm[0] # rc[r] mov 0(%rsp),%eax mov 4(%rsp),%ebx + movz %al,%ecx + movz %ah,%edx ___ for($i=0;$i<8;$i++) { my $func = ($i==0)? "mov" : "xor"; $code.=<<___; - mov %al,%cl - mov %ah,%dl + shr \$16,%eax lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%eax + movz %ah,%edx xor 0(%rbp,%rsi,8),@mm[0] $func 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl mov $i*8+8(%rsp),%eax # ($i+1)*8 lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi + movz %bh,%edx $func 6(%rbp,%rsi,8),@mm[2] $func 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl + shr \$16,%ebx lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%ebx + movz %bh,%edx $func 4(%rbp,%rsi,8),@mm[4] $func 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl mov $i*8+8+4(%rsp),%ebx # ($i+1)*8+4 lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi + movz %ah,%edx $func 2(%rbp,%rsi,8),@mm[6] $func 1(%rbp,%rdi,8),@mm[7] ___ @@ -134,32 +137,32 @@ ___ for($i=0;$i<8;$i++) { $code.="mov @mm[$i],$i*8(%rsp)\n"; } # K=L for($i=0;$i<8;$i++) { $code.=<<___; - mov %al,%cl - mov %ah,%dl + shr \$16,%eax lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%eax + movz %ah,%edx xor 0(%rbp,%rsi,8),@mm[0] xor 7(%rbp,%rdi,8),@mm[1] - mov %al,%cl - mov %ah,%dl `"mov 64+$i*8+8(%rsp),%eax" if($i<7);` # 64+($i+1)*8 lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi + movz %bh,%edx xor 6(%rbp,%rsi,8),@mm[2] xor 5(%rbp,%rdi,8),@mm[3] - mov %bl,%cl - mov %bh,%dl + shr \$16,%ebx lea (%rcx,%rcx),%rsi + movz %bl,%ecx lea (%rdx,%rdx),%rdi - shr \$16,%ebx + movz %bh,%edx xor 4(%rbp,%rsi,8),@mm[4] xor 3(%rbp,%rdi,8),@mm[5] - mov %bl,%cl - mov %bh,%dl `"mov 64+$i*8+8+4(%rsp),%ebx" if($i<7);` # 64+($i+1)*8+4 lea (%rcx,%rcx),%rsi + movz %al,%ecx lea (%rdx,%rdx),%rdi + movz %ah,%edx xor 2(%rbp,%rsi,8),@mm[6] xor 1(%rbp,%rdi,8),@mm[7] ___ |