Updated to openssl-1.0.0c

author: marha <marha@users.sourceforge.net> 2011-01-23 19:50:13 +0000
committer: marha <marha@users.sourceforge.net> 2011-01-23 19:50:13 +0000
commit: b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b (patch)
tree: 4722cd31e41fdda28e5c2b37bdf8500d27868384 /openssl/crypto/sha/asm/sha512-armv4.pl
parent: 8cd59857a99c534c560f58c931f5c2466d4c1f9b (diff)
download: vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.tar.gz
vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.tar.bz2
vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.zip
1 files changed, 18 insertions, 14 deletions
diff --git a/openssl/crypto/sha/asm/sha512-armv4.pl b/openssl/crypto/sha/asm/sha512-armv4.pl
index 4fbb94a91..3a35861ac 100644
--- a/openssl/crypto/sha/asm/sha512-armv4.pl
+++ b/openssl/crypto/sha/asm/sha512-armv4.pl
@@ -10,7 +10,13 @@
 # SHA512 block procedure for ARMv4. September 2007.
 
 # This code is ~4.5 (four and a half) times faster than code generated
-# by gcc 3.4 and it spends ~72 clock cycles per byte. 
+# by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
+# Xscale PXA250 core].
+#
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 6% improvement on
+# Cortex A8 core and ~40 cycles per processed byte.
 
 # Byte order [in]dependence. =========================================
 #
@@ -22,7 +28,7 @@ $hi=0;
 $lo=4;
 # ====================================================================
 
-$output=shift;
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
 open STDOUT,">$output";
 
 $ctx="r0";
@@ -73,33 +79,31 @@ $code.=<<___;
 	eor	$t0,$t0,$Elo,lsl#23
 	eor	$t1,$t1,$Ehi,lsl#23	@ Sigma1(e)
 	adds	$Tlo,$Tlo,$t0
-	adc	$Thi,$Thi,$t1		@ T += Sigma1(e)
-	adds	$Tlo,$Tlo,$t2
-	adc	$Thi,$Thi,$t3		@ T += h
-
 	ldr	$t0,[sp,#$Foff+0]	@ f.lo
+	adc	$Thi,$Thi,$t1		@ T += Sigma1(e)
 	ldr	$t1,[sp,#$Foff+4]	@ f.hi
+	adds	$Tlo,$Tlo,$t2
 	ldr	$t2,[sp,#$Goff+0]	@ g.lo
+	adc	$Thi,$Thi,$t3		@ T += h
 	ldr	$t3,[sp,#$Goff+4]	@ g.hi
-	str	$Elo,[sp,#$Eoff+0]
-	str	$Ehi,[sp,#$Eoff+4]
-	str	$Alo,[sp,#$Aoff+0]
-	str	$Ahi,[sp,#$Aoff+4]
 
 	eor	$t0,$t0,$t2
+	str	$Elo,[sp,#$Eoff+0]
 	eor	$t1,$t1,$t3
+	str	$Ehi,[sp,#$Eoff+4]
 	and	$t0,$t0,$Elo
+	str	$Alo,[sp,#$Aoff+0]
 	and	$t1,$t1,$Ehi
+	str	$Ahi,[sp,#$Aoff+4]
 	eor	$t0,$t0,$t2
-	eor	$t1,$t1,$t3		@ Ch(e,f,g)
-
 	ldr	$t2,[$Ktbl,#4]		@ K[i].lo
+	eor	$t1,$t1,$t3		@ Ch(e,f,g)
 	ldr	$t3,[$Ktbl,#0]		@ K[i].hi
-	ldr	$Elo,[sp,#$Doff+0]	@ d.lo
-	ldr	$Ehi,[sp,#$Doff+4]	@ d.hi
 
 	adds	$Tlo,$Tlo,$t0
+	ldr	$Elo,[sp,#$Doff+0]	@ d.lo
 	adc	$Thi,$Thi,$t1		@ T += Ch(e,f,g)
+	ldr	$Ehi,[sp,#$Doff+4]	@ d.hi
 	adds	$Tlo,$Tlo,$t2
 	adc	$Thi,$Thi,$t3		@ T += K[i]
 	adds	$Elo,$Elo,$Tlo
author	marha <marha@users.sourceforge.net>	2011-01-23 19:50:13 +0000
committer	marha <marha@users.sourceforge.net>	2011-01-23 19:50:13 +0000
commit	b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b (patch)
tree	4722cd31e41fdda28e5c2b37bdf8500d27868384 /openssl/crypto/sha/asm/sha512-armv4.pl
parent	8cd59857a99c534c560f58c931f5c2466d4c1f9b (diff)
download	vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.tar.gz vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.tar.bz2 vcxsrv-b680cf39ed5bc37e0eb7eb86ad8599bf92df3f2b.zip