aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/bn/asm
diff options
context:
space:
mode:
Diffstat (limited to 'openssl/crypto/bn/asm')
-rw-r--r--openssl/crypto/bn/asm/alpha-mont.pl36
-rw-r--r--openssl/crypto/bn/asm/s390x.S86
2 files changed, 63 insertions, 59 deletions
diff --git a/openssl/crypto/bn/asm/alpha-mont.pl b/openssl/crypto/bn/asm/alpha-mont.pl
index f7e0ca164..c63458e94 100644
--- a/openssl/crypto/bn/asm/alpha-mont.pl
+++ b/openssl/crypto/bn/asm/alpha-mont.pl
@@ -41,8 +41,12 @@ $j="s4";
$m1="s5";
$code=<<___;
+#indef __linux__
+#include <asm/regdef.h>
+#else
#include <asm.h>
#include <regdef.h>
+#endif
.text
@@ -76,7 +80,7 @@ bn_mul_mont:
ldq $aj,8($ap)
subq sp,AT,sp
ldq $bi,0($bp) # bp[0]
- mov -4096,AT
+ lda AT,-4096(zero) # mov -4096,AT
ldq $n0,0($n0)
and sp,AT,sp
@@ -106,9 +110,9 @@ bn_mul_mont:
.align 4
.L1st:
.set noreorder
- ldq $aj,($aj)
+ ldq $aj,0($aj)
addl $j,1,$j
- ldq $nj,($nj)
+ ldq $nj,0($nj)
lda $tp,8($tp)
addq $alo,$hi0,$lo0
@@ -159,12 +163,12 @@ bn_mul_mont:
.align 4
.Louter:
s8addq $i,$bp,$bi
- ldq $hi0,($ap)
+ ldq $hi0,0($ap)
ldq $aj,8($ap)
- ldq $bi,($bi)
- ldq $hi1,($np)
+ ldq $bi,0($bi)
+ ldq $hi1,0($np)
ldq $nj,8($np)
- ldq $tj,(sp)
+ ldq $tj,0(sp)
mulq $hi0,$bi,$lo0
umulh $hi0,$bi,$hi0
@@ -195,10 +199,10 @@ bn_mul_mont:
.set noreorder
ldq $tj,8($tp) #L0
nop #U1
- ldq $aj,($aj) #L1
+ ldq $aj,0($aj) #L1
s8addq $j,$np,$nj #U0
- ldq $nj,($nj) #L0
+ ldq $nj,0($nj) #L0
nop #U1
addq $alo,$hi0,$lo0 #L1
lda $tp,8($tp)
@@ -247,7 +251,7 @@ bn_mul_mont:
addq $hi1,v0,$hi1
addq $hi1,$hi0,$lo1
- stq $j,($tp)
+ stq $j,0($tp)
cmpult $lo1,$hi0,$hi1
addq $lo1,$tj,$lo1
cmpult $lo1,$tj,AT
@@ -265,8 +269,8 @@ bn_mul_mont:
mov 0,$hi0 # clear borrow bit
.align 4
-.Lsub: ldq $lo0,($tp)
- ldq $lo1,($np)
+.Lsub: ldq $lo0,0($tp)
+ ldq $lo1,0($np)
lda $tp,8($tp)
lda $np,8($np)
subq $lo0,$lo1,$lo1 # tp[i]-np[i]
@@ -274,7 +278,7 @@ bn_mul_mont:
subq $lo1,$hi0,$lo0
cmpult $lo1,$lo0,$hi0
or $hi0,AT,$hi0
- stq $lo0,($rp)
+ stq $lo0,0($rp)
cmpult $tp,$tj,v0
lda $rp,8($rp)
bne v0,.Lsub
@@ -288,7 +292,7 @@ bn_mul_mont:
bis $bp,$ap,$ap # ap=borrow?tp:rp
.align 4
-.Lcopy: ldq $aj,($ap) # copy or in-place refresh
+.Lcopy: ldq $aj,0($ap) # copy or in-place refresh
lda $tp,8($tp)
lda $rp,8($rp)
lda $ap,8($ap)
@@ -309,8 +313,8 @@ bn_mul_mont:
lda sp,48(sp)
ret (ra)
.end bn_mul_mont
-.rdata
-.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+.ascii "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
___
print $code;
diff --git a/openssl/crypto/bn/asm/s390x.S b/openssl/crypto/bn/asm/s390x.S
index 8f45f5d51..43fcb79bc 100644
--- a/openssl/crypto/bn/asm/s390x.S
+++ b/openssl/crypto/bn/asm/s390x.S
@@ -1,4 +1,4 @@
-.ident "s390x.S, version 1.0"
+.ident "s390x.S, version 1.1"
// ====================================================================
// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
// project.
@@ -24,67 +24,67 @@ bn_mul_add_words:
bler %r14 // if (len<=0) return 0;
stmg %r6,%r10,48(%r15)
+ lghi %r10,3
lghi %r8,0 // carry = 0
- srag %r10,%r4,2 // cnt=len/4
- jz .Loop1_madd
+ nr %r10,%r4 // len%4
+ sra %r4,2 // cnt=len/4
+ jz .Loop1_madd // carry is incidentally cleared if branch taken
+ algr zero,zero // clear carry
.Loop4_madd:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- algr %r7,%r8 // +=carry
+ alcgr %r7,%r8 // +=carry
alcgr %r6,zero
alg %r7,0(%r2,%r1) // +=rp[i]
- alcgr %r6,zero
stg %r7,0(%r2,%r1) // rp[i]=
lg %r9,8(%r2,%r3)
mlgr %r8,%r5
- algr %r9,%r6
+ alcgr %r9,%r6
alcgr %r8,zero
alg %r9,8(%r2,%r1)
- alcgr %r8,zero
stg %r9,8(%r2,%r1)
lg %r7,16(%r2,%r3)
mlgr %r6,%r5
- algr %r7,%r8
+ alcgr %r7,%r8
alcgr %r6,zero
alg %r7,16(%r2,%r1)
- alcgr %r6,zero
stg %r7,16(%r2,%r1)
lg %r9,24(%r2,%r3)
mlgr %r8,%r5
- algr %r9,%r6
+ alcgr %r9,%r6
alcgr %r8,zero
alg %r9,24(%r2,%r1)
- alcgr %r8,zero
stg %r9,24(%r2,%r1)
la %r2,32(%r2) // i+=4
- brct %r10,.Loop4_madd
+ brct %r4,.Loop4_madd
- lghi %r10,3
- nr %r4,%r10 // cnt=len%4
- jz .Lend_madd
+ la %r10,1(%r10) // see if len%4 is zero ...
+ brct %r10,.Loop1_madd // without touching condition code:-)
+
+.Lend_madd:
+ alcgr %r8,zero // collect carry bit
+ lgr %r2,%r8
+ lmg %r6,%r10,48(%r15)
+ br %r14
.Loop1_madd:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- algr %r7,%r8 // +=carry
+ alcgr %r7,%r8 // +=carry
alcgr %r6,zero
alg %r7,0(%r2,%r1) // +=rp[i]
- alcgr %r6,zero
stg %r7,0(%r2,%r1) // rp[i]=
lgr %r8,%r6
la %r2,8(%r2) // i++
- brct %r4,.Loop1_madd
+ brct %r10,.Loop1_madd
-.Lend_madd:
- lgr %r2,%r8
- lmg %r6,%r10,48(%r15)
- br %r14
+ j .Lend_madd
.size bn_mul_add_words,.-bn_mul_add_words
// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
@@ -99,57 +99,57 @@ bn_mul_words:
bler %r14 // if (len<=0) return 0;
stmg %r6,%r10,48(%r15)
+ lghi %r10,3
lghi %r8,0 // carry = 0
- srag %r10,%r4,2 // cnt=len/4
- jz .Loop1_mul
+ nr %r10,%r4 // len%4
+ sra %r4,2 // cnt=len/4
+ jz .Loop1_mul // carry is incidentally cleared if branch taken
+ algr zero,zero // clear carry
.Loop4_mul:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- algr %r7,%r8 // +=carry
- alcgr %r6,zero
+ alcgr %r7,%r8 // +=carry
stg %r7,0(%r2,%r1) // rp[i]=
lg %r9,8(%r2,%r3)
mlgr %r8,%r5
- algr %r9,%r6
- alcgr %r8,zero
+ alcgr %r9,%r6
stg %r9,8(%r2,%r1)
lg %r7,16(%r2,%r3)
mlgr %r6,%r5
- algr %r7,%r8
- alcgr %r6,zero
+ alcgr %r7,%r8
stg %r7,16(%r2,%r1)
lg %r9,24(%r2,%r3)
mlgr %r8,%r5
- algr %r9,%r6
- alcgr %r8,zero
+ alcgr %r9,%r6
stg %r9,24(%r2,%r1)
la %r2,32(%r2) // i+=4
- brct %r10,.Loop4_mul
+ brct %r4,.Loop4_mul
- lghi %r10,3
- nr %r4,%r10 // cnt=len%4
- jz .Lend_mul
+ la %r10,1(%r10) // see if len%4 is zero ...
+ brct %r10,.Loop1_mul // without touching condition code:-)
+
+.Lend_mul:
+ alcgr %r8,zero // collect carry bit
+ lgr %r2,%r8
+ lmg %r6,%r10,48(%r15)
+ br %r14
.Loop1_mul:
lg %r7,0(%r2,%r3) // ap[i]
mlgr %r6,%r5 // *=w
- algr %r7,%r8 // +=carry
- alcgr %r6,zero
+ alcgr %r7,%r8 // +=carry
stg %r7,0(%r2,%r1) // rp[i]=
lgr %r8,%r6
la %r2,8(%r2) // i++
- brct %r4,.Loop1_mul
+ brct %r10,.Loop1_mul
-.Lend_mul:
- lgr %r2,%r8
- lmg %r6,%r10,48(%r15)
- br %r14
+ j .Lend_mul
.size bn_mul_words,.-bn_mul_words
// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)