"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "crypto/bn/asm/x86_64-mont5.pl" between
openssl-1.1.1o.tar.gz and openssl-1.1.1p.tar.gz

About: OpenSSL is a toolkit implementing the Transport Layer Security (TLS) protocols (including SSLv3) as well as a full-strength general purpose cryptographic library. Long Term Support (LTS) version (includes support for TLSv1.3).

x86_64-mont5.pl  (openssl-1.1.1o):x86_64-mont5.pl  (openssl-1.1.1p)
#! /usr/bin/env perl #! /usr/bin/env perl
# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. # Copyright 2011-2022 The OpenSSL Project Authors. All Rights Reserved.
# #
# Licensed under the OpenSSL license (the "License"). You may not use # Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy # this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at # in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html # https://www.openssl.org/source/license.html
# ==================================================================== # ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and # project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further # CRYPTOGAMS licenses depending on where you obtain it. For further
skipping to change at line 2097 skipping to change at line 2097
inc %rcx # pass %cf inc %rcx # pass %cf
jnz .Lsqr4x_sub jnz .Lsqr4x_sub
mov $num,%r10 # prepare for back-to-back call mov $num,%r10 # prepare for back-to-back call
neg $num # restore $num neg $num # restore $num
ret ret
.cfi_endproc .cfi_endproc
.size __bn_post4x_internal,.-__bn_post4x_internal .size __bn_post4x_internal,.-__bn_post4x_internal
___ ___
} }
{
$code.=<<___;
.globl bn_from_montgomery
.type bn_from_montgomery,\@abi-omnipotent
.align 32
bn_from_montgomery:
.cfi_startproc
testl \$7,`($win64?"48(%rsp)":"%r9d")`
jz bn_from_mont8x
xor %eax,%eax
ret
.cfi_endproc
.size bn_from_montgomery,.-bn_from_montgomery
.type bn_from_mont8x,\@function,6
.align 32
bn_from_mont8x:
.cfi_startproc
.byte 0x67
mov %rsp,%rax
.cfi_def_cfa_register %rax
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lfrom_prologue:
shl \$3,${num}d # convert $num to bytes
lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num
mov ($n0),$n0 # *n0
##############################################################
# Ensure that stack frame doesn't alias with $rptr+3*$num
# modulo 4096, which covers ret[num], am[num] and n[num]
# (see bn_exp.c). The stack is allocated to aligned with
# bn_power5's frame, and as bn_from_montgomery happens to be
# last operation, we use the opportunity to cleanse it.
#
lea -320(%rsp,$num,2),%r11
mov %rsp,%rbp
sub $rptr,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lfrom_sp_alt
sub %r11,%rbp # align with $aptr
lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256)
jmp .Lfrom_sp_done
.align 32
.Lfrom_sp_alt:
lea 4096-320(,$num,2),%r10
lea -320(%rbp,$num,2),%rbp # future alloca(frame+2*$num*8+256)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
sub %r11,%rbp
.Lfrom_sp_done:
and \$-64,%rbp
mov %rsp,%r11
sub %rbp,%r11
and \$-4096,%r11
lea (%rbp,%r11),%rsp
mov (%rsp),%r10
cmp %rbp,%rsp
ja .Lfrom_page_walk
jmp .Lfrom_page_walk_done
.Lfrom_page_walk:
lea -4096(%rsp),%rsp
mov (%rsp),%r10
cmp %rbp,%rsp
ja .Lfrom_page_walk
.Lfrom_page_walk_done:
mov $num,%r10
neg $num
##############################################################
# Stack layout
#
# +0 saved $num, used in reduction section
# +8 &t[2*$num], used in reduction section
# +32 saved *n0
# +40 saved %rsp
# +48 t[2*$num]
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lfrom_body:
mov $num,%r11
lea 48(%rsp),%rax
pxor %xmm0,%xmm0
jmp .Lmul_by_1
.align 32
.Lmul_by_1:
movdqu ($aptr),%xmm1
movdqu 16($aptr),%xmm2
movdqu 32($aptr),%xmm3
movdqa %xmm0,(%rax,$num)
movdqu 48($aptr),%xmm4
movdqa %xmm0,16(%rax,$num)
.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 # lea 64($aptr),$aptr
movdqa %xmm1,(%rax)
movdqa %xmm0,32(%rax,$num)
movdqa %xmm2,16(%rax)
movdqa %xmm0,48(%rax,$num)
movdqa %xmm3,32(%rax)
movdqa %xmm4,48(%rax)
lea 64(%rax),%rax
sub \$64,%r11
jnz .Lmul_by_1
movq $rptr,%xmm1
movq $nptr,%xmm2
.byte 0x67
mov $nptr,%rbp
movq %r10, %xmm3 # -num
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
and \$0x80108,%r11d
cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
jne .Lfrom_mont_nox
lea (%rax,$num),$rptr
call __bn_sqrx8x_reduction
call __bn_postx4x_internal
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_nox:
___
$code.=<<___;
call __bn_sqr8x_reduction
call __bn_post4x_internal
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
jmp .Lfrom_mont_zero
.align 32
.Lfrom_mont_zero:
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax)
movdqa %xmm0,16*3(%rax)
lea 16*4(%rax),%rax
sub \$32,$num
jnz .Lfrom_mont_zero
mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
ret
.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
___
}
}}} }}}
if ($addx) {{{ if ($addx) {{{
my $bp="%rdx"; # restore original value my $bp="%rdx"; # restore original value
$code.=<<___; $code.=<<___;
.type bn_mulx4x_mont_gather5,\@function,6 .type bn_mulx4x_mont_gather5,\@function,6
.align 32 .align 32
bn_mulx4x_mont_gather5: bn_mulx4x_mont_gather5:
.cfi_startproc .cfi_startproc
mov %rsp,%rax mov %rsp,%rax
skipping to change at line 3884 skipping to change at line 3697
.rva .LSEH_end_bn_mul_mont_gather5 .rva .LSEH_end_bn_mul_mont_gather5
.rva .LSEH_info_bn_mul_mont_gather5 .rva .LSEH_info_bn_mul_mont_gather5
.rva .LSEH_begin_bn_mul4x_mont_gather5 .rva .LSEH_begin_bn_mul4x_mont_gather5
.rva .LSEH_end_bn_mul4x_mont_gather5 .rva .LSEH_end_bn_mul4x_mont_gather5
.rva .LSEH_info_bn_mul4x_mont_gather5 .rva .LSEH_info_bn_mul4x_mont_gather5
.rva .LSEH_begin_bn_power5 .rva .LSEH_begin_bn_power5
.rva .LSEH_end_bn_power5 .rva .LSEH_end_bn_power5
.rva .LSEH_info_bn_power5 .rva .LSEH_info_bn_power5
.rva .LSEH_begin_bn_from_mont8x
.rva .LSEH_end_bn_from_mont8x
.rva .LSEH_info_bn_from_mont8x
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
.rva .LSEH_begin_bn_mulx4x_mont_gather5 .rva .LSEH_begin_bn_mulx4x_mont_gather5
.rva .LSEH_end_bn_mulx4x_mont_gather5 .rva .LSEH_end_bn_mulx4x_mont_gather5
.rva .LSEH_info_bn_mulx4x_mont_gather5 .rva .LSEH_info_bn_mulx4x_mont_gather5
.rva .LSEH_begin_bn_powerx5 .rva .LSEH_begin_bn_powerx5
.rva .LSEH_end_bn_powerx5 .rva .LSEH_end_bn_powerx5
.rva .LSEH_info_bn_powerx5 .rva .LSEH_info_bn_powerx5
___ ___
skipping to change at line 3919 skipping to change at line 3728
.align 8 .align 8
.LSEH_info_bn_mul4x_mont_gather5: .LSEH_info_bn_mul4x_mont_gather5:
.byte 9,0,0,0 .byte 9,0,0,0
.rva mul_handler .rva mul_handler
.rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # Handler Data[] .rva .Lmul4x_prologue,.Lmul4x_body,.Lmul4x_epilogue # Handler Data[]
.align 8 .align 8
.LSEH_info_bn_power5: .LSEH_info_bn_power5:
.byte 9,0,0,0 .byte 9,0,0,0
.rva mul_handler .rva mul_handler
.rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # Handler Data[] .rva .Lpower5_prologue,.Lpower5_body,.Lpower5_epilogue # Handler Data[]
.align 8
.LSEH_info_bn_from_mont8x:
.byte 9,0,0,0
.rva mul_handler
.rva .Lfrom_prologue,.Lfrom_body,.Lfrom_epilogue # Handler
Data[]
___ ___
$code.=<<___ if ($addx); $code.=<<___ if ($addx);
.align 8 .align 8
.LSEH_info_bn_mulx4x_mont_gather5: .LSEH_info_bn_mulx4x_mont_gather5:
.byte 9,0,0,0 .byte 9,0,0,0
.rva mul_handler .rva mul_handler
.rva .Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue # Handler Data[] .rva .Lmulx4x_prologue,.Lmulx4x_body,.Lmulx4x_epilogue # Handler Data[]
.align 8 .align 8
.LSEH_info_bn_powerx5: .LSEH_info_bn_powerx5:
.byte 9,0,0,0 .byte 9,0,0,0
 End of changes. 4 change blocks. 
198 lines changed or deleted 1 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)