aboutsummaryrefslogtreecommitdiff
path: root/openssl/crypto/aes/asm
diff options
context:
space:
mode:
Diffstat (limited to 'openssl/crypto/aes/asm')
-rw-r--r--openssl/crypto/aes/asm/aes-armv4.pl5
-rw-r--r--openssl/crypto/aes/asm/aes-s390x.pl37
-rw-r--r--openssl/crypto/aes/asm/bsaes-x86_64.pl118
-rw-r--r--openssl/crypto/aes/asm/vpaes-x86.pl4
-rw-r--r--openssl/crypto/aes/asm/vpaes-x86_64.pl6
5 files changed, 106 insertions, 64 deletions
diff --git a/openssl/crypto/aes/asm/aes-armv4.pl b/openssl/crypto/aes/asm/aes-armv4.pl
index 943ce45ff..86b86c4a0 100644
--- a/openssl/crypto/aes/asm/aes-armv4.pl
+++ b/openssl/crypto/aes/asm/aes-armv4.pl
@@ -408,6 +408,7 @@ _armv4_AES_encrypt:
.type private_AES_set_encrypt_key,%function
.align 5
private_AES_set_encrypt_key:
+_armv4_AES_set_encrypt_key:
sub r3,pc,#8 @ AES_set_encrypt_key
teq r0,#0
moveq r0,#-1
@@ -425,7 +426,7 @@ private_AES_set_encrypt_key:
bne .Labrt
.Lok: stmdb sp!,{r4-r12,lr}
- sub $tbl,r3,#private_AES_set_encrypt_key-AES_Te-1024 @ Te4
+ sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
mov $rounds,r0 @ inp
mov lr,r1 @ bits
@@ -685,7 +686,7 @@ private_AES_set_encrypt_key:
.align 5
private_AES_set_decrypt_key:
str lr,[sp,#-4]! @ push lr
- bl private_AES_set_encrypt_key
+ bl _armv4_AES_set_encrypt_key
teq r0,#0
ldrne lr,[sp],#4 @ pop lr
bne .Labrt
diff --git a/openssl/crypto/aes/asm/aes-s390x.pl b/openssl/crypto/aes/asm/aes-s390x.pl
index f749a52d7..445a1e676 100644
--- a/openssl/crypto/aes/asm/aes-s390x.pl
+++ b/openssl/crypto/aes/asm/aes-s390x.pl
@@ -783,6 +783,7 @@ $code.=<<___;
.type private_AES_set_encrypt_key,\@function
.align 16
private_AES_set_encrypt_key:
+_s390x_AES_set_encrypt_key:
lghi $t0,0
cl${g}r $inp,$t0
je .Lminus1
@@ -836,7 +837,8 @@ $code.=<<___ if (!$softonly);
je 1f
lg %r1,24($inp)
stg %r1,24($key)
-1: st $bits,236($key) # save bits
+1: st $bits,236($key) # save bits [for debugging purposes]
+ lgr $t0,%r5
st %r5,240($key) # save km code
lghi %r2,0
br %r14
@@ -844,7 +846,7 @@ ___
$code.=<<___;
.align 16
.Lekey_internal:
- stm${g} %r6,%r13,6*$SIZE_T($sp) # all non-volatile regs
+ stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
larl $tbl,AES_Te+2048
@@ -904,8 +906,9 @@ $code.=<<___;
la $key,16($key) # key+=4
la $t3,4($t3) # i++
brct $rounds,.L128_loop
+ lghi $t0,10
lghi %r2,0
- lm${g} %r6,%r13,6*$SIZE_T($sp)
+ lm${g} %r4,%r13,4*$SIZE_T($sp)
br $ra
.align 16
@@ -952,8 +955,9 @@ $code.=<<___;
st $s2,32($key)
st $s3,36($key)
brct $rounds,.L192_continue
+ lghi $t0,12
lghi %r2,0
- lm${g} %r6,%r13,6*$SIZE_T($sp)
+ lm${g} %r4,%r13,4*$SIZE_T($sp)
br $ra
.align 16
@@ -1014,8 +1018,9 @@ $code.=<<___;
st $s2,40($key)
st $s3,44($key)
brct $rounds,.L256_continue
+ lghi $t0,14
lghi %r2,0
- lm${g} %r6,%r13,6*$SIZE_T($sp)
+ lm${g} %r4,%r13,4*$SIZE_T($sp)
br $ra
.align 16
@@ -1066,34 +1071,26 @@ $code.=<<___;
.type private_AES_set_decrypt_key,\@function
.align 16
private_AES_set_decrypt_key:
- st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
- st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers!
- bras $ra,AES_set_encrypt_key
- l${g} $key,4*$SIZE_T($sp)
+ #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
+ st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
+ bras $ra,_s390x_AES_set_encrypt_key
+ #l${g} $key,4*$SIZE_T($sp)
l${g} $ra,14*$SIZE_T($sp)
ltgr %r2,%r2
bnzr $ra
___
$code.=<<___ if (!$softonly);
- l $t0,240($key)
+ #l $t0,240($key)
lhi $t1,16
cr $t0,$t1
jl .Lgo
oill $t0,0x80 # set "decrypt" bit
st $t0,240($key)
br $ra
-
-.align 16
-.Ldkey_internal:
- st${g} $key,4*$SIZE_T($sp)
- st${g} $ra,14*$SIZE_T($sp)
- bras $ra,.Lekey_internal
- l${g} $key,4*$SIZE_T($sp)
- l${g} $ra,14*$SIZE_T($sp)
___
$code.=<<___;
-
-.Lgo: llgf $rounds,240($key)
+.align 16
+.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
la $i1,0($key)
sllg $i2,$rounds,4
la $i2,0($i2,$key)
diff --git a/openssl/crypto/aes/asm/bsaes-x86_64.pl b/openssl/crypto/aes/asm/bsaes-x86_64.pl
index ff7e3afe8..c9c6312fa 100644
--- a/openssl/crypto/aes/asm/bsaes-x86_64.pl
+++ b/openssl/crypto/aes/asm/bsaes-x86_64.pl
@@ -65,12 +65,12 @@
# function is:
#
# conversion conversion/8x block
-# Core 2 410 0.37
-# Nehalem 310 0.35
-# Atom 570 0.26
+# Core 2 240 0.22
+# Nehalem 180 0.20
+# Atom 430 0.19
#
# The ratio values mean that 128-byte blocks will be processed
-# 21-27% slower, 256-byte blocks - 12-16%, 384-byte blocks - 8-11%,
+# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%,
# etc. Then keep in mind that input sizes not divisible by 128 are
# *effectively* slower, especially shortest ones, e.g. consecutive
# 144-byte blocks are processed 44% slower than one would expect,
@@ -85,6 +85,7 @@
#
# Core 2 11.0
# Nehalem 9.16
+# Atom 20.9
#
# November 2011.
#
@@ -754,7 +755,7 @@ _bsaes_encrypt8:
movdqa ($key), @XMM[9] # round 0 key
lea 0x10($key), $key
- movdqa 0x60($const), @XMM[8] # .LM0SR
+ movdqa 0x50($const), @XMM[8] # .LM0SR
pxor @XMM[9], @XMM[0] # xor with round0 key
pxor @XMM[9], @XMM[1]
pshufb @XMM[8], @XMM[0]
@@ -905,46 +906,82 @@ $code.=<<___;
.type _bsaes_key_convert,\@abi-omnipotent
.align 16
_bsaes_key_convert:
- lea .LBS1(%rip), $const
+ lea .Lmasks(%rip), $const
movdqu ($inp), %xmm7 # load round 0 key
- movdqa -0x10($const), %xmm8 # .LBS0
- movdqa 0x00($const), %xmm9 # .LBS1
- movdqa 0x10($const), %xmm10 # .LBS2
- movdqa 0x40($const), %xmm13 # .LM0
- movdqa 0x60($const), %xmm14 # .LNOT
-
- movdqu 0x10($inp), %xmm6 # load round 1 key
lea 0x10($inp), $inp
+ movdqa 0x00($const), %xmm0 # 0x01...
+ movdqa 0x10($const), %xmm1 # 0x02...
+ movdqa 0x20($const), %xmm2 # 0x04...
+ movdqa 0x30($const), %xmm3 # 0x08...
+ movdqa 0x40($const), %xmm4 # .LM0
+ pcmpeqd %xmm5, %xmm5 # .LNOT
+
+ movdqu ($inp), %xmm6 # load round 1 key
movdqa %xmm7, ($out) # save round 0 key
lea 0x10($out), $out
dec $rounds
jmp .Lkey_loop
.align 16
.Lkey_loop:
- pshufb %xmm13, %xmm6 # .LM0
- movdqa %xmm6, %xmm7
-___
- &bitslice_key (map("%xmm$_",(0..7, 8..12)));
-$code.=<<___;
- pxor %xmm14, %xmm5 # "pnot"
- pxor %xmm14, %xmm6
- pxor %xmm14, %xmm0
- pxor %xmm14, %xmm1
- lea 0x10($inp), $inp
- movdqa %xmm0, 0x00($out) # write bit-sliced round key
- movdqa %xmm1, 0x10($out)
- movdqa %xmm2, 0x20($out)
- movdqa %xmm3, 0x30($out)
- movdqa %xmm4, 0x40($out)
- movdqa %xmm5, 0x50($out)
- movdqa %xmm6, 0x60($out)
- movdqa %xmm7, 0x70($out)
+ pshufb %xmm4, %xmm6 # .LM0
+
+ movdqa %xmm0, %xmm8
+ movdqa %xmm1, %xmm9
+
+ pand %xmm6, %xmm8
+ pand %xmm6, %xmm9
+ movdqa %xmm2, %xmm10
+ pcmpeqb %xmm0, %xmm8
+ psllq \$4, %xmm0 # 0x10...
+ movdqa %xmm3, %xmm11
+ pcmpeqb %xmm1, %xmm9
+ psllq \$4, %xmm1 # 0x20...
+
+ pand %xmm6, %xmm10
+ pand %xmm6, %xmm11
+ movdqa %xmm0, %xmm12
+ pcmpeqb %xmm2, %xmm10
+ psllq \$4, %xmm2 # 0x40...
+ movdqa %xmm1, %xmm13
+ pcmpeqb %xmm3, %xmm11
+ psllq \$4, %xmm3 # 0x80...
+
+ movdqa %xmm2, %xmm14
+ movdqa %xmm3, %xmm15
+ pxor %xmm5, %xmm8 # "pnot"
+ pxor %xmm5, %xmm9
+
+ pand %xmm6, %xmm12
+ pand %xmm6, %xmm13
+ movdqa %xmm8, 0x00($out) # write bit-sliced round key
+ pcmpeqb %xmm0, %xmm12
+ psrlq \$4, %xmm0 # 0x01...
+ movdqa %xmm9, 0x10($out)
+ pcmpeqb %xmm1, %xmm13
+ psrlq \$4, %xmm1 # 0x02...
+ lea 0x10($inp), $inp
+
+ pand %xmm6, %xmm14
+ pand %xmm6, %xmm15
+ movdqa %xmm10, 0x20($out)
+ pcmpeqb %xmm2, %xmm14
+ psrlq \$4, %xmm2 # 0x04...
+ movdqa %xmm11, 0x30($out)
+ pcmpeqb %xmm3, %xmm15
+ psrlq \$4, %xmm3 # 0x08...
+ movdqu ($inp), %xmm6 # load next round key
+
+ pxor %xmm5, %xmm13 # "pnot"
+ pxor %xmm5, %xmm14
+ movdqa %xmm12, 0x40($out)
+ movdqa %xmm13, 0x50($out)
+ movdqa %xmm14, 0x60($out)
+ movdqa %xmm15, 0x70($out)
lea 0x80($out),$out
- movdqu ($inp), %xmm6 # load next round key
dec $rounds
jnz .Lkey_loop
- movdqa 0x70($const), %xmm7 # .L63
+ movdqa 0x50($const), %xmm7 # .L63
#movdqa %xmm6, ($out) # don't save last round key
ret
.size _bsaes_key_convert,.-_bsaes_key_convert
@@ -2800,14 +2837,8 @@ _bsaes_const:
.quad 0x0504070600030201, 0x0f0e0d0c0a09080b
.LSRM0:
.quad 0x0304090e00050a0f, 0x01060b0c0207080d
-.LM0:
- .quad 0x02060a0e03070b0f, 0x0004080c0105090d
.LM0SR:
.quad 0x0a0e02060f03070b, 0x0004080c05090d01
-.LNOT: # magic constants
- .quad 0xffffffffffffffff, 0xffffffffffffffff
-.L63:
- .quad 0x6363636363636363, 0x6363636363636363
.LSWPUP: # byte-swap upper dword
.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908
.LSWPUPM0SR:
@@ -2830,6 +2861,15 @@ _bsaes_const:
.quad 0x0000000000000000, 0x0000000800000000
.Lxts_magic:
.long 0x87,0,1,0
+.Lmasks:
+ .quad 0x0101010101010101, 0x0101010101010101
+ .quad 0x0202020202020202, 0x0202020202020202
+ .quad 0x0404040404040404, 0x0404040404040404
+ .quad 0x0808080808080808, 0x0808080808080808
+.LM0:
+ .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+.L63:
+ .quad 0x6363636363636363, 0x6363636363636363
.asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov"
.align 64
.size _bsaes_const,.-_bsaes_const
diff --git a/openssl/crypto/aes/asm/vpaes-x86.pl b/openssl/crypto/aes/asm/vpaes-x86.pl
index 84a6f6d33..1533e2c30 100644
--- a/openssl/crypto/aes/asm/vpaes-x86.pl
+++ b/openssl/crypto/aes/asm/vpaes-x86.pl
@@ -843,6 +843,8 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov ($out,&wparam(1)); # out
&mov ($round,&wparam(2)); # len
&mov ($key,&wparam(3)); # key
+ &sub ($round,16);
+ &jc (&label("cbc_abort"));
&lea ($base,&DWP(-56,"esp"));
&mov ($const,&wparam(4)); # ivp
&and ($base,-16);
@@ -853,7 +855,6 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov (&DWP(48,"esp"),$base);
&mov (&DWP(0,"esp"),$out); # save out
- &sub ($round,16);
&mov (&DWP(4,"esp"),$key) # save key
&mov (&DWP(8,"esp"),$const); # save ivp
&mov ($out,$round); # $out works as $len
@@ -896,6 +897,7 @@ $k_dsbo=0x2c0; # decryption sbox final output
&mov ($base,&DWP(8,"esp")); # restore ivp
&mov ("esp",&DWP(48,"esp"));
&movdqu (&QWP(0,$base),"xmm1"); # write IV
+&set_label("cbc_abort");
&function_end("${PREFIX}_cbc_encrypt");
&asm_finish();
diff --git a/openssl/crypto/aes/asm/vpaes-x86_64.pl b/openssl/crypto/aes/asm/vpaes-x86_64.pl
index 025470223..37998db5e 100644
--- a/openssl/crypto/aes/asm/vpaes-x86_64.pl
+++ b/openssl/crypto/aes/asm/vpaes-x86_64.pl
@@ -263,7 +263,7 @@ _vpaes_decrypt_core:
pshufb %xmm2, %xmm4 # 4 = sbou
pxor %xmm0, %xmm4 # 4 = sb1u + k
movdqa 0x70(%r10), %xmm0 # 0 : sbot
- movdqa .Lk_sr-.Lk_dsbd(%r11), %xmm2
+ movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
pshufb %xmm3, %xmm0 # 0 = sb1t
pxor %xmm4, %xmm0 # 0 = A
pshufb %xmm2, %xmm0
@@ -869,6 +869,8 @@ ${PREFIX}_cbc_encrypt:
___
($len,$key)=($key,$len);
$code.=<<___;
+ sub \$16,$len
+ jc .Lcbc_abort
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
@@ -887,7 +889,6 @@ ___
$code.=<<___;
movdqu ($ivp),%xmm6 # load IV
sub $inp,$out
- sub \$16,$len
call _vpaes_preheat
cmp \$0,${enc}d
je .Lcbc_dec_loop
@@ -932,6 +933,7 @@ $code.=<<___ if ($win64);
.Lcbc_epilogue:
___
$code.=<<___;
+.Lcbc_abort:
ret
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt
___