"Fossies" - the Fresh Open Source Software Archive

Member "gmp-6.2.1/mpn/x86_64/addaddmul_1msb0.asm" (14 Nov 2020, 3203 Bytes) of package /linux/misc/gmp-6.2.1.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Generic Assembler source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 dnl  AMD64 mpn_addaddmul_1msb0, R = Au + Bv, u,v < 2^63.
    2 
    3 dnl  Copyright 2008 Free Software Foundation, Inc.
    4 
    5 dnl  This file is part of the GNU MP Library.
    6 dnl
    7 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    8 dnl  it under the terms of either:
    9 dnl
   10 dnl    * the GNU Lesser General Public License as published by the Free
   11 dnl      Software Foundation; either version 3 of the License, or (at your
   12 dnl      option) any later version.
   13 dnl
   14 dnl  or
   15 dnl
   16 dnl    * the GNU General Public License as published by the Free Software
   17 dnl      Foundation; either version 2 of the License, or (at your option) any
   18 dnl      later version.
   19 dnl
   20 dnl  or both in parallel, as here.
   21 dnl
   22 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
   23 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   24 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   25 dnl  for more details.
   26 dnl
   27 dnl  You should have received copies of the GNU General Public License and the
   28 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
   29 dnl  see https://www.gnu.org/licenses/.
   30 
   31 include(`../config.m4')
   32 
   33 C        cycles/limb
   34 C AMD K8,K9  2.167
   35 C AMD K10    2.167
   36 C Intel P4  12.0
   37 C Intel core2    4.0
   38 C Intel corei    ?
   39 C Intel atom     ?
   40 C VIA nano   ?
   41 
   42 C TODO
   43 C  * Perhaps handle various n mod 3 sizes better.  The code now is too large.
   44 
   45 C INPUT PARAMETERS
   46 define(`rp',    `%rdi')
   47 define(`ap',    `%rsi')
   48 define(`bp_param', `%rdx')
   49 define(`n', `%rcx')
   50 define(`u0',    `%r8')
   51 define(`v0',    `%r9')
   52 
   53 
   54 define(`bp', `%rbp')
   55 
   56 ASM_START()
   57     TEXT
   58     ALIGN(16)
   59 PROLOGUE(mpn_addaddmul_1msb0)
   60     push    %r12
   61     push    %rbp
   62 
   63     lea (ap,n,8), ap
   64     lea (bp_param,n,8), bp
   65     lea (rp,n,8), rp
   66     neg n
   67 
   68     mov (ap,n,8), %rax
   69     mul %r8
   70     mov %rax, %r12
   71     mov (bp,n,8), %rax
   72     mov %rdx, %r10
   73     add $3, n
   74     jns L(end)
   75 
   76     ALIGN(16)
   77 L(top): mul %r9
   78     add %rax, %r12
   79     mov -16(ap,n,8), %rax
   80     adc %rdx, %r10
   81     mov %r12, -24(rp,n,8)
   82     mul %r8
   83     add %rax, %r10
   84     mov -16(bp,n,8), %rax
   85     mov $0, R32(%r11)
   86     adc %rdx, %r11
   87     mul %r9
   88     add %rax, %r10
   89     mov -8(ap,n,8), %rax
   90     adc %rdx, %r11
   91     mov %r10, -16(rp,n,8)
   92     mul %r8
   93     add %rax, %r11
   94     mov -8(bp,n,8), %rax
   95     mov $0, R32(%r12)
   96     adc %rdx, %r12
   97     mul %r9
   98     add %rax, %r11
   99     adc %rdx, %r12
  100     mov (ap,n,8), %rax
  101     mul %r8
  102     add %rax, %r12
  103     mov %r11, -8(rp,n,8)
  104     mov (bp,n,8), %rax
  105     mov $0, R32(%r10)
  106     adc %rdx, %r10
  107     add $3, n
  108     js  L(top)
  109 
  110 L(end): cmp $1, R32(n)
  111     ja  2f
  112     jz  1f
  113 
  114     mul %r9
  115     add %rax, %r12
  116     mov -16(ap), %rax
  117     adc %rdx, %r10
  118     mov %r12, -24(rp)
  119     mul %r8
  120     add %rax, %r10
  121     mov -16(bp), %rax
  122     mov $0, R32(%r11)
  123     adc %rdx, %r11
  124     mul %r9
  125     add %rax, %r10
  126     mov -8(ap), %rax
  127     adc %rdx, %r11
  128     mov %r10, -16(rp)
  129     mul %r8
  130     add %rax, %r11
  131     mov -8(bp), %rax
  132     mov $0, R32(%r12)
  133     adc %rdx, %r12
  134     mul %r9
  135     add %rax, %r11
  136     adc %rdx, %r12
  137     mov %r11, -8(rp)
  138     mov %r12, %rax
  139     pop %rbp
  140     pop %r12
  141     ret
  142 
  143 1:  mul %r9
  144     add %rax, %r12
  145     mov -8(ap), %rax
  146     adc %rdx, %r10
  147     mov %r12, -16(rp)
  148     mul %r8
  149     add %rax, %r10
  150     mov -8(bp), %rax
  151     mov $0, R32(%r11)
  152     adc %rdx, %r11
  153     mul %r9
  154     add %rax, %r10
  155     adc %rdx, %r11
  156     mov %r10, -8(rp)
  157     mov %r11, %rax
  158     pop %rbp
  159     pop %r12
  160     ret
  161 
  162 2:  mul %r9
  163     add %rax, %r12
  164     mov %r12, -8(rp)
  165     adc %rdx, %r10
  166     mov %r10, %rax
  167     pop %rbp
  168     pop %r12
  169     ret
  170 EPILOGUE()