diff options
author | marha <marha@users.sourceforge.net> | 2011-01-23 20:17:10 +0000 |
---|---|---|
committer | marha <marha@users.sourceforge.net> | 2011-01-23 20:17:10 +0000 |
commit | 1b639dab951fb73d5031aa0c1afb13e8480d1dae (patch) | |
tree | 4e13a4d1e4c609bd01d0bf5d65c6a8acdc3d0c50 /openssl/crypto/sha/asm/sha512-armv4.pl | |
parent | 51181fb7f4d135d214974bb6611d51f21475eec8 (diff) | |
parent | b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b (diff) | |
download | vcxsrv-1b639dab951fb73d5031aa0c1afb13e8480d1dae.tar.gz vcxsrv-1b639dab951fb73d5031aa0c1afb13e8480d1dae.tar.bz2 vcxsrv-1b639dab951fb73d5031aa0c1afb13e8480d1dae.zip |
svn merge ^/branches/released .
Diffstat (limited to 'openssl/crypto/sha/asm/sha512-armv4.pl')
-rw-r--r-- | openssl/crypto/sha/asm/sha512-armv4.pl | 32 |
1 files changed, 18 insertions, 14 deletions
diff --git a/openssl/crypto/sha/asm/sha512-armv4.pl b/openssl/crypto/sha/asm/sha512-armv4.pl index 4fbb94a91..3a35861ac 100644 --- a/openssl/crypto/sha/asm/sha512-armv4.pl +++ b/openssl/crypto/sha/asm/sha512-armv4.pl @@ -10,7 +10,13 @@ # SHA512 block procedure for ARMv4. September 2007. # This code is ~4.5 (four and a half) times faster than code generated -# by gcc 3.4 and it spends ~72 clock cycles per byte. +# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue +# Xscale PXA250 core]. +# +# July 2010. +# +# Rescheduling for dual-issue pipeline resulted in 6% improvement on +# Cortex A8 core and ~40 cycles per processed byte. # Byte order [in]dependence. ========================================= # @@ -22,7 +28,7 @@ $hi=0; $lo=4; # ==================================================================== -$output=shift; +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; $ctx="r0"; @@ -73,33 +79,31 @@ $code.=<<___; eor $t0,$t0,$Elo,lsl#23 eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e) adds $Tlo,$Tlo,$t0 - adc $Thi,$Thi,$t1 @ T += Sigma1(e) - adds $Tlo,$Tlo,$t2 - adc $Thi,$Thi,$t3 @ T += h - ldr $t0,[sp,#$Foff+0] @ f.lo + adc $Thi,$Thi,$t1 @ T += Sigma1(e) ldr $t1,[sp,#$Foff+4] @ f.hi + adds $Tlo,$Tlo,$t2 ldr $t2,[sp,#$Goff+0] @ g.lo + adc $Thi,$Thi,$t3 @ T += h ldr $t3,[sp,#$Goff+4] @ g.hi - str $Elo,[sp,#$Eoff+0] - str $Ehi,[sp,#$Eoff+4] - str $Alo,[sp,#$Aoff+0] - str $Ahi,[sp,#$Aoff+4] eor $t0,$t0,$t2 + str $Elo,[sp,#$Eoff+0] eor $t1,$t1,$t3 + str $Ehi,[sp,#$Eoff+4] and $t0,$t0,$Elo + str $Alo,[sp,#$Aoff+0] and $t1,$t1,$Ehi + str $Ahi,[sp,#$Aoff+4] eor $t0,$t0,$t2 - eor $t1,$t1,$t3 @ Ch(e,f,g) - ldr $t2,[$Ktbl,#4] @ K[i].lo + eor $t1,$t1,$t3 @ Ch(e,f,g) ldr $t3,[$Ktbl,#0] @ K[i].hi - ldr $Elo,[sp,#$Doff+0] @ d.lo - ldr $Ehi,[sp,#$Doff+4] @ d.hi adds $Tlo,$Tlo,$t0 + ldr $Elo,[sp,#$Doff+0] @ d.lo adc $Thi,$Thi,$t1 @ T += Ch(e,f,g) + ldr $Ehi,[sp,#$Doff+4] @ d.hi adds $Tlo,$Tlo,$t2 adc $Thi,$Thi,$t3 @ T += K[i] adds $Elo,$Elo,$Tlo |