diff options
Diffstat (limited to 'openssl/crypto/aes/asm')
-rw-r--r-- | openssl/crypto/aes/asm/aes-armv4.pl | 5 | ||||
-rw-r--r-- | openssl/crypto/aes/asm/aes-s390x.pl | 37 | ||||
-rw-r--r-- | openssl/crypto/aes/asm/bsaes-x86_64.pl | 118 | ||||
-rw-r--r-- | openssl/crypto/aes/asm/vpaes-x86.pl | 4 | ||||
-rw-r--r-- | openssl/crypto/aes/asm/vpaes-x86_64.pl | 6 |
5 files changed, 106 insertions, 64 deletions
diff --git a/openssl/crypto/aes/asm/aes-armv4.pl b/openssl/crypto/aes/asm/aes-armv4.pl index 943ce45ff..86b86c4a0 100644 --- a/openssl/crypto/aes/asm/aes-armv4.pl +++ b/openssl/crypto/aes/asm/aes-armv4.pl @@ -408,6 +408,7 @@ _armv4_AES_encrypt: .type private_AES_set_encrypt_key,%function .align 5 private_AES_set_encrypt_key: +_armv4_AES_set_encrypt_key: sub r3,pc,#8 @ AES_set_encrypt_key teq r0,#0 moveq r0,#-1 @@ -425,7 +426,7 @@ private_AES_set_encrypt_key: bne .Labrt .Lok: stmdb sp!,{r4-r12,lr} - sub $tbl,r3,#private_AES_set_encrypt_key-AES_Te-1024 @ Te4 + sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4 mov $rounds,r0 @ inp mov lr,r1 @ bits @@ -685,7 +686,7 @@ private_AES_set_encrypt_key: .align 5 private_AES_set_decrypt_key: str lr,[sp,#-4]! @ push lr - bl private_AES_set_encrypt_key + bl _armv4_AES_set_encrypt_key teq r0,#0 ldrne lr,[sp],#4 @ pop lr bne .Labrt diff --git a/openssl/crypto/aes/asm/aes-s390x.pl b/openssl/crypto/aes/asm/aes-s390x.pl index f749a52d7..445a1e676 100644 --- a/openssl/crypto/aes/asm/aes-s390x.pl +++ b/openssl/crypto/aes/asm/aes-s390x.pl @@ -783,6 +783,7 @@ $code.=<<___; .type private_AES_set_encrypt_key,\@function .align 16 private_AES_set_encrypt_key: +_s390x_AES_set_encrypt_key: lghi $t0,0 cl${g}r $inp,$t0 je .Lminus1 @@ -836,7 +837,8 @@ $code.=<<___ if (!$softonly); je 1f lg %r1,24($inp) stg %r1,24($key) -1: st $bits,236($key) # save bits +1: st $bits,236($key) # save bits [for debugging purposes] + lgr $t0,%r5 st %r5,240($key) # save km code lghi %r2,0 br %r14 @@ -844,7 +846,7 @@ ___ $code.=<<___; .align 16 .Lekey_internal: - stm${g} %r6,%r13,6*$SIZE_T($sp) # all non-volatile regs + stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key larl $tbl,AES_Te+2048 @@ -904,8 +906,9 @@ $code.=<<___; la $key,16($key) # key+=4 la $t3,4($t3) # i++ brct $rounds,.L128_loop + lghi $t0,10 lghi %r2,0 - lm${g} %r6,%r13,6*$SIZE_T($sp) + lm${g} %r4,%r13,4*$SIZE_T($sp) br $ra .align 16 @@ -952,8 +955,9 @@ $code.=<<___; st $s2,32($key) st $s3,36($key) brct $rounds,.L192_continue + lghi $t0,12 lghi %r2,0 - lm${g} %r6,%r13,6*$SIZE_T($sp) + lm${g} %r4,%r13,4*$SIZE_T($sp) br $ra .align 16 @@ -1014,8 +1018,9 @@ $code.=<<___; st $s2,40($key) st $s3,44($key) brct $rounds,.L256_continue + lghi $t0,14 lghi %r2,0 - lm${g} %r6,%r13,6*$SIZE_T($sp) + lm${g} %r4,%r13,4*$SIZE_T($sp) br $ra .align 16 @@ -1066,34 +1071,26 @@ $code.=<<___; .type private_AES_set_decrypt_key,\@function .align 16 private_AES_set_decrypt_key: - st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to - st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers! - bras $ra,AES_set_encrypt_key - l${g} $key,4*$SIZE_T($sp) + #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to + st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! + bras $ra,_s390x_AES_set_encrypt_key + #l${g} $key,4*$SIZE_T($sp) l${g} $ra,14*$SIZE_T($sp) ltgr %r2,%r2 bnzr $ra ___ $code.=<<___ if (!$softonly); - l $t0,240($key) + #l $t0,240($key) lhi $t1,16 cr $t0,$t1 jl .Lgo oill $t0,0x80 # set "decrypt" bit st $t0,240($key) br $ra - -.align 16 -.Ldkey_internal: - st${g} $key,4*$SIZE_T($sp) - st${g} $ra,14*$SIZE_T($sp) - bras $ra,.Lekey_internal - l${g} $key,4*$SIZE_T($sp) - l${g} $ra,14*$SIZE_T($sp) ___ $code.=<<___; - -.Lgo: llgf $rounds,240($key) +.align 16 +.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) la $i1,0($key) sllg $i2,$rounds,4 la $i2,0($i2,$key) diff --git a/openssl/crypto/aes/asm/bsaes-x86_64.pl b/openssl/crypto/aes/asm/bsaes-x86_64.pl index ff7e3afe8..c9c6312fa 100644 --- a/openssl/crypto/aes/asm/bsaes-x86_64.pl +++ b/openssl/crypto/aes/asm/bsaes-x86_64.pl @@ -65,12 +65,12 @@ # function is: # # conversion conversion/8x block -# Core 2 410 0.37 -# Nehalem 310 0.35 -# Atom 570 0.26 +# Core 2 240 0.22 +# Nehalem 180 0.20 +# Atom 430 0.19 # # The ratio values mean that 128-byte blocks will be processed -# 21-27% slower, 256-byte blocks - 12-16%, 384-byte blocks - 8-11%, +# 16-18% slower, 256-byte blocks - 9-10%, 384-byte blocks - 6-7%, # etc. Then keep in mind that input sizes not divisible by 128 are # *effectively* slower, especially shortest ones, e.g. consecutive # 144-byte blocks are processed 44% slower than one would expect, @@ -85,6 +85,7 @@ # # Core 2 11.0 # Nehalem 9.16 +# Atom 20.9 # # November 2011. # @@ -754,7 +755,7 @@ _bsaes_encrypt8: movdqa ($key), @XMM[9] # round 0 key lea 0x10($key), $key - movdqa 0x60($const), @XMM[8] # .LM0SR + movdqa 0x50($const), @XMM[8] # .LM0SR pxor @XMM[9], @XMM[0] # xor with round0 key pxor @XMM[9], @XMM[1] pshufb @XMM[8], @XMM[0] @@ -905,46 +906,82 @@ $code.=<<___; .type _bsaes_key_convert,\@abi-omnipotent .align 16 _bsaes_key_convert: - lea .LBS1(%rip), $const + lea .Lmasks(%rip), $const movdqu ($inp), %xmm7 # load round 0 key - movdqa -0x10($const), %xmm8 # .LBS0 - movdqa 0x00($const), %xmm9 # .LBS1 - movdqa 0x10($const), %xmm10 # .LBS2 - movdqa 0x40($const), %xmm13 # .LM0 - movdqa 0x60($const), %xmm14 # .LNOT - - movdqu 0x10($inp), %xmm6 # load round 1 key lea 0x10($inp), $inp + movdqa 0x00($const), %xmm0 # 0x01... + movdqa 0x10($const), %xmm1 # 0x02... + movdqa 0x20($const), %xmm2 # 0x04... + movdqa 0x30($const), %xmm3 # 0x08... + movdqa 0x40($const), %xmm4 # .LM0 + pcmpeqd %xmm5, %xmm5 # .LNOT + + movdqu ($inp), %xmm6 # load round 1 key movdqa %xmm7, ($out) # save round 0 key lea 0x10($out), $out dec $rounds jmp .Lkey_loop .align 16 .Lkey_loop: - pshufb %xmm13, %xmm6 # .LM0 - movdqa %xmm6, %xmm7 -___ - &bitslice_key (map("%xmm$_",(0..7, 8..12))); -$code.=<<___; - pxor %xmm14, %xmm5 # "pnot" - pxor %xmm14, %xmm6 - pxor %xmm14, %xmm0 - pxor %xmm14, %xmm1 - lea 0x10($inp), $inp - movdqa %xmm0, 0x00($out) # write bit-sliced round key - movdqa %xmm1, 0x10($out) - movdqa %xmm2, 0x20($out) - movdqa %xmm3, 0x30($out) - movdqa %xmm4, 0x40($out) - movdqa %xmm5, 0x50($out) - movdqa %xmm6, 0x60($out) - movdqa %xmm7, 0x70($out) + pshufb %xmm4, %xmm6 # .LM0 + + movdqa %xmm0, %xmm8 + movdqa %xmm1, %xmm9 + + pand %xmm6, %xmm8 + pand %xmm6, %xmm9 + movdqa %xmm2, %xmm10 + pcmpeqb %xmm0, %xmm8 + psllq \$4, %xmm0 # 0x10... + movdqa %xmm3, %xmm11 + pcmpeqb %xmm1, %xmm9 + psllq \$4, %xmm1 # 0x20... + + pand %xmm6, %xmm10 + pand %xmm6, %xmm11 + movdqa %xmm0, %xmm12 + pcmpeqb %xmm2, %xmm10 + psllq \$4, %xmm2 # 0x40... + movdqa %xmm1, %xmm13 + pcmpeqb %xmm3, %xmm11 + psllq \$4, %xmm3 # 0x80... + + movdqa %xmm2, %xmm14 + movdqa %xmm3, %xmm15 + pxor %xmm5, %xmm8 # "pnot" + pxor %xmm5, %xmm9 + + pand %xmm6, %xmm12 + pand %xmm6, %xmm13 + movdqa %xmm8, 0x00($out) # write bit-sliced round key + pcmpeqb %xmm0, %xmm12 + psrlq \$4, %xmm0 # 0x01... + movdqa %xmm9, 0x10($out) + pcmpeqb %xmm1, %xmm13 + psrlq \$4, %xmm1 # 0x02... + lea 0x10($inp), $inp + + pand %xmm6, %xmm14 + pand %xmm6, %xmm15 + movdqa %xmm10, 0x20($out) + pcmpeqb %xmm2, %xmm14 + psrlq \$4, %xmm2 # 0x04... + movdqa %xmm11, 0x30($out) + pcmpeqb %xmm3, %xmm15 + psrlq \$4, %xmm3 # 0x08... + movdqu ($inp), %xmm6 # load next round key + + pxor %xmm5, %xmm13 # "pnot" + pxor %xmm5, %xmm14 + movdqa %xmm12, 0x40($out) + movdqa %xmm13, 0x50($out) + movdqa %xmm14, 0x60($out) + movdqa %xmm15, 0x70($out) lea 0x80($out),$out - movdqu ($inp), %xmm6 # load next round key dec $rounds jnz .Lkey_loop - movdqa 0x70($const), %xmm7 # .L63 + movdqa 0x50($const), %xmm7 # .L63 #movdqa %xmm6, ($out) # don't save last round key ret .size _bsaes_key_convert,.-_bsaes_key_convert @@ -2800,14 +2837,8 @@ _bsaes_const: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b .LSRM0: .quad 0x0304090e00050a0f, 0x01060b0c0207080d -.LM0: - .quad 0x02060a0e03070b0f, 0x0004080c0105090d .LM0SR: .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -.LNOT: # magic constants - .quad 0xffffffffffffffff, 0xffffffffffffffff -.L63: - .quad 0x6363636363636363, 0x6363636363636363 .LSWPUP: # byte-swap upper dword .quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 .LSWPUPM0SR: @@ -2830,6 +2861,15 @@ _bsaes_const: .quad 0x0000000000000000, 0x0000000800000000 .Lxts_magic: .long 0x87,0,1,0 +.Lmasks: + .quad 0x0101010101010101, 0x0101010101010101 + .quad 0x0202020202020202, 0x0202020202020202 + .quad 0x0404040404040404, 0x0404040404040404 + .quad 0x0808080808080808, 0x0808080808080808 +.LM0: + .quad 0x02060a0e03070b0f, 0x0004080c0105090d +.L63: + .quad 0x6363636363636363, 0x6363636363636363 .asciz "Bit-sliced AES for x86_64/SSSE3, Emilia Käsper, Peter Schwabe, Andy Polyakov" .align 64 .size _bsaes_const,.-_bsaes_const diff --git a/openssl/crypto/aes/asm/vpaes-x86.pl b/openssl/crypto/aes/asm/vpaes-x86.pl index 84a6f6d33..1533e2c30 100644 --- a/openssl/crypto/aes/asm/vpaes-x86.pl +++ b/openssl/crypto/aes/asm/vpaes-x86.pl @@ -843,6 +843,8 @@ $k_dsbo=0x2c0; # decryption sbox final output &mov ($out,&wparam(1)); # out &mov ($round,&wparam(2)); # len &mov ($key,&wparam(3)); # key + &sub ($round,16); + &jc (&label("cbc_abort")); &lea ($base,&DWP(-56,"esp")); &mov ($const,&wparam(4)); # ivp &and ($base,-16); @@ -853,7 +855,6 @@ $k_dsbo=0x2c0; # decryption sbox final output &mov (&DWP(48,"esp"),$base); &mov (&DWP(0,"esp"),$out); # save out - &sub ($round,16); &mov (&DWP(4,"esp"),$key) # save key &mov (&DWP(8,"esp"),$const); # save ivp &mov ($out,$round); # $out works as $len @@ -896,6 +897,7 @@ $k_dsbo=0x2c0; # decryption sbox final output &mov ($base,&DWP(8,"esp")); # restore ivp &mov ("esp",&DWP(48,"esp")); &movdqu (&QWP(0,$base),"xmm1"); # write IV +&set_label("cbc_abort"); &function_end("${PREFIX}_cbc_encrypt"); &asm_finish(); diff --git a/openssl/crypto/aes/asm/vpaes-x86_64.pl b/openssl/crypto/aes/asm/vpaes-x86_64.pl index 025470223..37998db5e 100644 --- a/openssl/crypto/aes/asm/vpaes-x86_64.pl +++ b/openssl/crypto/aes/asm/vpaes-x86_64.pl @@ -263,7 +263,7 @@ _vpaes_decrypt_core: pshufb %xmm2, %xmm4 # 4 = sbou pxor %xmm0, %xmm4 # 4 = sb1u + k movdqa 0x70(%r10), %xmm0 # 0 : sbot - movdqa .Lk_sr-.Lk_dsbd(%r11), %xmm2 + movdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 pshufb %xmm3, %xmm0 # 0 = sb1t pxor %xmm4, %xmm0 # 0 = A pshufb %xmm2, %xmm0 @@ -869,6 +869,8 @@ ${PREFIX}_cbc_encrypt: ___ ($len,$key)=($key,$len); $code.=<<___; + sub \$16,$len + jc .Lcbc_abort ___ $code.=<<___ if ($win64); lea -0xb8(%rsp),%rsp @@ -887,7 +889,6 @@ ___ $code.=<<___; movdqu ($ivp),%xmm6 # load IV sub $inp,$out - sub \$16,$len call _vpaes_preheat cmp \$0,${enc}d je .Lcbc_dec_loop @@ -932,6 +933,7 @@ $code.=<<___ if ($win64); .Lcbc_epilogue: ___ $code.=<<___; +.Lcbc_abort: ret .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ |