aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/sha/asm/sha512-armv4.pl
diff options
context:
space:
mode:
authormarha <marha@users.sourceforge.net>2011-01-23 19:50:13 +0000
committermarha <marha@users.sourceforge.net>2011-01-23 19:50:13 +0000
commitb680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b (patch)
tree4722cd31e41fdda28e5c2b37bdf8500d27868384 /openssl/crypto/sha/asm/sha512-armv4.pl
parent8cd59857a99c534c560f58c931f5c2466d4c1f9b (diff)
downloadvcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.tar.gz
vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.tar.bz2
vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.zip
Updated to openssl-1.0.0c
Diffstat (limited to 'openssl/crypto/sha/asm/sha512-armv4.pl')
-rw-r--r--openssl/crypto/sha/asm/sha512-armv4.pl32
1 files changed, 18 insertions, 14 deletions
diff --git a/openssl/crypto/sha/asm/sha512-armv4.pl b/openssl/crypto/sha/asm/sha512-armv4.pl
index 4fbb94a91..3a35861ac 100644
--- a/openssl/crypto/sha/asm/sha512-armv4.pl
+++ b/openssl/crypto/sha/asm/sha512-armv4.pl
@@ -10,7 +10,13 @@
# SHA512 block procedure for ARMv4. September 2007.
# This code is ~4.5 (four and a half) times faster than code generated
-# by gcc 3.4 and it spends ~72 clock cycles per byte.
+# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+# Xscale PXA250 core].
+#
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 6% improvement on
+# Cortex A8 core and ~40 cycles per processed byte.
# Byte order [in]dependence. =========================================
#
@@ -22,7 +28,7 @@ $hi=0;
$lo=4;
# ====================================================================
-$output=shift;
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$ctx="r0";
@@ -73,33 +79,31 @@ $code.=<<___;
eor $t0,$t0,$Elo,lsl#23
eor $t1,$t1,$Ehi,lsl#23 @ Sigma1(e)
adds $Tlo,$Tlo,$t0
- adc $Thi,$Thi,$t1 @ T += Sigma1(e)
- adds $Tlo,$Tlo,$t2
- adc $Thi,$Thi,$t3 @ T += h
-
ldr $t0,[sp,#$Foff+0] @ f.lo
+ adc $Thi,$Thi,$t1 @ T += Sigma1(e)
ldr $t1,[sp,#$Foff+4] @ f.hi
+ adds $Tlo,$Tlo,$t2
ldr $t2,[sp,#$Goff+0] @ g.lo
+ adc $Thi,$Thi,$t3 @ T += h
ldr $t3,[sp,#$Goff+4] @ g.hi
- str $Elo,[sp,#$Eoff+0]
- str $Ehi,[sp,#$Eoff+4]
- str $Alo,[sp,#$Aoff+0]
- str $Ahi,[sp,#$Aoff+4]
eor $t0,$t0,$t2
+ str $Elo,[sp,#$Eoff+0]
eor $t1,$t1,$t3
+ str $Ehi,[sp,#$Eoff+4]
and $t0,$t0,$Elo
+ str $Alo,[sp,#$Aoff+0]
and $t1,$t1,$Ehi
+ str $Ahi,[sp,#$Aoff+4]
eor $t0,$t0,$t2
- eor $t1,$t1,$t3 @ Ch(e,f,g)
-
ldr $t2,[$Ktbl,#4] @ K[i].lo
+ eor $t1,$t1,$t3 @ Ch(e,f,g)
ldr $t3,[$Ktbl,#0] @ K[i].hi
- ldr $Elo,[sp,#$Doff+0] @ d.lo
- ldr $Ehi,[sp,#$Doff+4] @ d.hi
adds $Tlo,$Tlo,$t0
+ ldr $Elo,[sp,#$Doff+0] @ d.lo
adc $Thi,$Thi,$t1 @ T += Ch(e,f,g)
+ ldr $Ehi,[sp,#$Doff+4] @ d.hi
adds $Tlo,$Tlo,$t2
adc $Thi,$Thi,$t3 @ T += K[i]
adds $Elo,$Elo,$Tlo