x86_64-mont5.pl (openssl-1.1.1o) | : | x86_64-mont5.pl (openssl-1.1.1p) | ||
---|---|---|---|---|
#! /usr/bin/env perl | #! /usr/bin/env perl | |||
# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. | # Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved. | |||
# | # | |||
# Licensed under the OpenSSL license (the "License"). You may not use | # Licensed under the OpenSSL license (the "License"). You may not use | |||
# this file except in compliance with the License. You can obtain a copy | # this file except in compliance with the License. You can obtain a copy | |||
# in the file LICENSE in the source distribution or at | # in the file LICENSE in the source distribution or at | |||
# https://www.openssl.org/source/license.html | # https://www.openssl.org/source/license.html | |||
# ==================================================================== | # ==================================================================== | |||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |||
# project. The module is, however, dual licensed under OpenSSL and | # project. The module is, however, dual licensed under OpenSSL and | |||
# CRYPTOGAMS licenses depending on where you obtain it. For further | # CRYPTOGAMS licenses depending on where you obtain it. For further | |||
skipping to change at line 2097 | skipping to change at line 2097 | |||
inc %rcx # pass %cf | inc %rcx # pass %cf | |||
jnz .Lsqr4x_sub | jnz .Lsqr4x_sub | |||
mov $num,%r10 # prepare for back-to-back call | mov $num,%r10 # prepare for back-to-back call | |||
neg $num # restore $num | neg $num # restore $num | |||
ret | ret | |||
.cfi_endproc | .cfi_endproc | |||
.size __bn_post4x_internal,.-__bn_post4x_internal | .size __bn_post4x_internal,.-__bn_post4x_internal | |||
___ | ___ | |||
} | } | |||
{ | ||||
$code.=<<___; | ||||
.globl bn_from_montgomery | ||||
.type bn_from_montgomery,\@abi-omnipotent | ||||
.align 32 | ||||
bn_from_montgomery: | ||||
.cfi_startproc | ||||
testl \$7,`($win64?"48(%rsp)":"%r9d")` | ||||
jz bn_from_mont8x | ||||
xor %eax,%eax | ||||
ret | ||||
.cfi_endproc | ||||
.size bn_from_montgomery,.-bn_from_montgomery | ||||
.type bn_from_mont8x,\@function,6 | ||||
.align 32 | ||||
bn_from_mont8x: | ||||
.cfi_startproc | ||||
.byte 0x67 | ||||
mov %rsp,%rax | ||||
.cfi_def_cfa_register %rax | ||||
push %rbx | ||||
.cfi_push %rbx | ||||
push %rbp | ||||
.cfi_push %rbp | ||||
push %r12 | ||||
.cfi_push %r12 | ||||
push %r13 | ||||
.cfi_push %r13 | ||||
push %r14 | ||||
.cfi_push %r14 | ||||
push %r15 | ||||
.cfi_push %r15 | ||||
.Lfrom_prologue: | ||||
shl \$3,${num}d # convert $num to bytes | ||||
lea ($num,$num,2),%r10 # 3*$num in bytes | ||||
neg $num | ||||
mov ($n0),$n0 # *n0 | ||||
############################################################## | ||||
# Ensure that stack frame doesn't alias with $rptr+3*$num | ||||
# modulo 4096, which covers ret[num], am[num] and n[num] | ||||
# (see bn_exp.c). The stack is allocated to aligned with | ||||
# bn_power5's frame, and as bn_from_montgomery happens to be | ||||
# last operation, we use the opportunity to cleanse it. | ||||
# | ||||
lea -320(%rsp,$num,2),%r11 | ||||
mov %rsp,%rbp | ||||
sub $rptr,%r11 | ||||
and \$4095,%r11 | ||||
cmp %r11,%r10 | ||||
jb .Lfrom_sp_alt | ||||
sub %r11,%rbp # align with $aptr | ||||
lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256) | ||||
jmp .Lfrom_sp_done | ||||
.align 32 | ||||
.Lfrom_sp_alt: | ||||
lea 4096-320(,$num,2),%r10 | ||||
lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256) | ||||
sub %r10,%r11 | ||||
mov \$0,%r10 | ||||
cmovc %r10,%r11 | ||||
sub %r11,%rbp | ||||
.Lfrom_sp_done: | ||||
and \$-64,%rbp | ||||
mov %rsp,%r11 | ||||
sub %rbp,%r11 | ||||
and \$-4096,%r11 | ||||
lea (%rbp,%r11),%rsp | ||||
mov (%rsp),%r10 | ||||
cmp %rbp,%rsp | ||||
ja .Lfrom_page_walk | ||||
jmp .Lfrom_page_walk_done | ||||
.Lfrom_page_walk: | ||||
lea -4096(%rsp),%rsp | ||||
mov (%rsp),%r10 | ||||
cmp %rbp,%rsp | ||||
ja .Lfrom_page_walk | ||||
.Lfrom_page_walk_done: | ||||
mov $num,%r10 | ||||
neg $num | ||||
############################################################## | ||||
# Stack layout | ||||
# | ||||
# +0 saved $num, used in reduction section | ||||
# +8 &t[2*$num], used in reduction section | ||||
# +32 saved *n0 | ||||
# +40 saved %rsp | ||||
# +48 t[2*$num] | ||||
# | ||||
mov $n0, 32(%rsp) | ||||
mov %rax, 40(%rsp) # save original %rsp | ||||
.cfi_cfa_expression %rsp+40,deref,+8 | ||||
.Lfrom_body: | ||||
mov $num,%r11 | ||||
lea 48(%rsp),%rax | ||||
pxor %xmm0,%xmm0 | ||||
jmp .Lmul_by_1 | ||||
.align 32 | ||||
.Lmul_by_1: | ||||
movdqu ($aptr),%xmm1 | ||||
movdqu 16($aptr),%xmm2 | ||||
movdqu 32($aptr),%xmm3 | ||||
movdqa %xmm0,(%rax,$num) | ||||
movdqu 48($aptr),%xmm4 | ||||
movdqa %xmm0,16(%rax,$num) | ||||
.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 # lea 64($aptr),$aptr | ||||
movdqa %xmm1,(%rax) | ||||
movdqa %xmm0,32(%rax,$num) | ||||
movdqa %xmm2,16(%rax) | ||||
movdqa %xmm0,48(%rax,$num) | ||||
movdqa %xmm3,32(%rax) | ||||
movdqa %xmm4,48(%rax) | ||||
lea 64(%rax),%rax | ||||
sub \$64,%r11 | ||||
jnz .Lmul_by_1 | ||||
movq $rptr,%xmm1 | ||||
movq $nptr,%xmm2 | ||||
.byte 0x67 | ||||
mov $nptr,%rbp | ||||
movq %r10, %xmm3 # -num | ||||
___ | ||||
$code.=<<___ if ($addx); | ||||
mov OPENSSL_ia32cap_P+8(%rip),%r11d | ||||
and \$0x80108,%r11d | ||||
cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1 | ||||
jne .Lfrom_mont_nox | ||||
lea (%rax,$num),$rptr | ||||
call __bn_sqrx8x_reduction | ||||
call __bn_postx4x_internal | ||||
pxor %xmm0,%xmm0 | ||||
lea 48(%rsp),%rax | ||||
jmp .Lfrom_mont_zero | ||||
.align 32 | ||||
.Lfrom_mont_nox: | ||||
___ | ||||
$code.=<<___; | ||||
call __bn_sqr8x_reduction | ||||
call __bn_post4x_internal | ||||
pxor %xmm0,%xmm0 | ||||
lea 48(%rsp),%rax | ||||
jmp .Lfrom_mont_zero | ||||
.align 32 | ||||
.Lfrom_mont_zero: | ||||
mov 40(%rsp),%rsi # restore %rsp | ||||
.cfi_def_cfa %rsi,8 | ||||
movdqa %xmm0,16*0(%rax) | ||||
movdqa %xmm0,16*1(%rax) | ||||
movdqa %xmm0,16*2(%rax) | ||||
movdqa %xmm0,16*3(%rax) | ||||
lea 16*4(%rax),%rax | ||||
sub \$32,$num | ||||
jnz .Lfrom_mont_zero | ||||
mov \$1,%rax | ||||
mov -48(%rsi),%r15 | ||||
.cfi_restore %r15 | ||||
mov -40(%rsi),%r14 | ||||
.cfi_restore %r14 | ||||
mov -32(%rsi),%r13 | ||||
.cfi_restore %r13 | ||||
mov -24(%rsi),%r12 | ||||
.cfi_restore %r12 | ||||
mov -16(%rsi),%rbp | ||||
.cfi_restore %rbp | ||||
mov -8(%rsi),%rbx | ||||
.cfi_restore %rbx | ||||
lea (%rsi),%rsp | ||||
.cfi_def_cfa_register %rsp | ||||
.Lfrom_epilogue: | ||||
ret | ||||
.cfi_endproc | ||||
.size bn_from_mont8x,.-bn_from_mont8x | ||||
___ | ||||
} | ||||
}}} | }}} | |||
if ($addx) {{{ | if ($addx) {{{ | |||
my $bp="%rdx"; # restore original value | my $bp="%rdx"; # restore original value | |||
$code.=<<___; | $code.=<<___; | |||
.type bn_mulx4x_mont_gather5,\@function,6 | .type bn_mulx4x_mont_gather5,\@function,6 | |||
.align 32 | .align 32 | |||
bn_mulx4x_mont_gather5: | bn_mulx4x_mont_gather5: | |||
.cfi_startproc | .cfi_startproc | |||
mov %rsp,%rax | mov %rsp,%rax | |||
skipping to change at line 3884 | skipping to change at line 3697 | |||
.rva .LSEH_end_bn_mul_mont_gather5 | .rva .LSEH_end_bn_mul_mont_gather5 | |||
.rva .LSEH_info_bn_mul_mont_gather5 | .rva .LSEH_info_bn_mul_mont_gather5 | |||
.rva .LSEH_begin_bn_mul4x_mont_gather5 | .rva .LSEH_begin_bn_mul4x_mont_gather5 | |||
.rva .LSEH_end_bn_mul4x_mont_gather5 | .rva .LSEH_end_bn_mul4x_mont_gather5 | |||
.rva .LSEH_info_bn_mul4x_mont_gather5 | .rva .LSEH_info_bn_mul4x_mont_gather5 | |||
.rva .LSEH_begin_bn_power5 | .rva .LSEH_begin_bn_power5 | |||
.rva .LSEH_end_bn_power5 | .rva .LSEH_end_bn_power5 | |||
.rva .LSEH_info_bn_power5 | .rva .LSEH_info_bn_power5 | |||
.rva .LSEH_begin_bn_from_mont8x | ||||
.rva .LSEH_end_bn_from_mont8x | ||||
.rva .LSEH_info_bn_from_mont8x | ||||
___ | ___ | |||
$code.=<<___ if ($addx); | $code.=<<___ if ($addx); | |||
.rva .LSEH_begin_bn_mulx4x_mont_gather5 | .rva .LSEH_begin_bn_mulx4x_mont_gather5 | |||
.rva .LSEH_end_bn_mulx4x_mont_gather5 | .rva .LSEH_end_bn_mulx4x_mont_gather5 | |||
.rva .LSEH_info_bn_mulx4x_mont_gather5 | .rva .LSEH_info_bn_mulx4x_mont_gather5 | |||
.rva .LSEH_begin_bn_powerx5 | .rva .LSEH_begin_bn_powerx5 | |||
.rva .LSEH_end_bn_powerx5 | .rva .LSEH_end_bn_powerx5 | |||
.rva .LSEH_info_bn_powerx5 | .rva .LSEH_info_bn_powerx5 | |||
___ | ___ | |||
skipping to change at line 3919 | skipping to change at line 3728 | |||
.align 8 | .align 8 | |||
.LSEH_info_bn_mul4x_mont_gather5: | .LSEH_info_bn_mul4x_mont_gather5: | |||
.byte 9,0,0,0 | .byte 9,0,0,0 | |||
.rva mul_handler | .rva mul_handler | |||
.rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # Handler Data[] | .rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # Handler Data[] | |||
.align 8 | .align 8 | |||
.LSEH_info_bn_power5: | .LSEH_info_bn_power5: | |||
.byte 9,0,0,0 | .byte 9,0,0,0 | |||
.rva mul_handler | .rva mul_handler | |||
.rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # Handler Data[] | .rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # Handler Data[] | |||
.align 8 | ||||
.LSEH_info_bn_from_mont8x: | ||||
.byte 9,0,0,0 | ||||
.rva mul_handler | ||||
.rva .Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue # Handler | ||||
Data[] | ||||
___ | ___ | |||
$code.=<<___ if ($addx); | $code.=<<___ if ($addx); | |||
.align 8 | .align 8 | |||
.LSEH_info_bn_mulx4x_mont_gather5: | .LSEH_info_bn_mulx4x_mont_gather5: | |||
.byte 9,0,0,0 | .byte 9,0,0,0 | |||
.rva mul_handler | .rva mul_handler | |||
.rva .Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue # Handler Data[] | .rva .Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue # Handler Data[] | |||
.align 8 | .align 8 | |||
.LSEH_info_bn_powerx5: | .LSEH_info_bn_powerx5: | |||
.byte 9,0,0,0 | .byte 9,0,0,0 | |||
End of changes. 4 change blocks. | ||||
198 lines changed or deleted | 1 lines changed or added |