"Fossies" - the Fresh Open Source Software Archive

Member "gmp-6.2.1/mpn/sparc64/ultrasparct3/addmul_1.asm" (14 Nov 2020, 3847 Bytes) of package /linux/misc/gmp-6.2.1.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Generic Assembler source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 dnl  SPARC v9 mpn_addmul_1 for T3/T4/T5.
    2 
    3 dnl  Contributed to the GNU project by David Miller and Torbjörn Granlund.
    4 
    5 dnl  Copyright 2013 Free Software Foundation, Inc.
    6 
    7 dnl  This file is part of the GNU MP Library.
    8 dnl
    9 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
   10 dnl  it under the terms of either:
   11 dnl
   12 dnl    * the GNU Lesser General Public License as published by the Free
   13 dnl      Software Foundation; either version 3 of the License, or (at your
   14 dnl      option) any later version.
   15 dnl
   16 dnl  or
   17 dnl
   18 dnl    * the GNU General Public License as published by the Free Software
   19 dnl      Foundation; either version 2 of the License, or (at your option) any
   20 dnl      later version.
   21 dnl
   22 dnl  or both in parallel, as here.
   23 dnl
   24 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
   25 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   26 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   27 dnl  for more details.
   28 dnl
   29 dnl  You should have received copies of the GNU General Public License and the
   30 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
   31 dnl  see https://www.gnu.org/licenses/.
   32 
   33 include(`../config.m4')
   34 
   35 C          cycles/limb
   36 C UltraSPARC T3:    26
   37 C UltraSPARC T4:    4.5
   38 
   39 C INPUT PARAMETERS
   40 define(`rp', `%i0')
   41 define(`up', `%i1')
   42 define(`n',  `%i2')
   43 define(`v0', `%i3')
   44 
   45 define(`u0',  `%l0')
   46 define(`u1',  `%l1')
   47 define(`u2',  `%l2')
   48 define(`u3',  `%l3')
   49 define(`r0',  `%l4')
   50 define(`r1',  `%l5')
   51 define(`r2',  `%l6')
   52 define(`r3',  `%l7')
   53 
   54 ASM_START()
   55     REGISTER(%g2,#scratch)
   56     REGISTER(%g3,#scratch)
   57 PROLOGUE(mpn_addmul_1)
   58     save    %sp, -176, %sp
   59     ldx [up+0], %g1
   60 
   61     and n, 3, %g3
   62     brz %g3, L(b0)
   63      addcc  %g0, %g0, %g5           C clear carry limb, flag
   64     cmp %g3, 2
   65     bcs %xcc, L(b01)
   66      nop
   67     be  %xcc, L(b10)
   68      ldx    [up+8], %g5
   69 
   70 L(b11): ldx [up+16], u3
   71     mulx    %g1, v0, %o2
   72     umulxhi(%g1, v0, %o3)
   73     ldx [rp+0], r1
   74     mulx    %g5, v0, %o4
   75     ldx [rp+8], r2
   76     umulxhi(%g5, v0, %o5)
   77     ldx [rp+16], r3
   78     mulx    u3, v0, %g4
   79     umulxhi(u3, v0, %g5)
   80     addcc   %o3, %o4, %o4
   81     addxccc(%o5, %g4, %g4)
   82     addxc(  %g0, %g5, %g5)
   83     addcc   r1, %o2, r1
   84     stx r1, [rp+0]
   85     addxccc(r2, %o4, r2)
   86     stx r2, [rp+8]
   87     addxccc(r3, %g4, r3)
   88     stx r3, [rp+16]
   89     add n, -3, n
   90     add up, 24, up
   91     brz n, L(xit)
   92      add    rp, 24, rp
   93     b   L(com)
   94      nop
   95 
   96 L(b10): mulx    %g1, v0, %o4
   97     ldx [rp+0], r2
   98     umulxhi(%g1, v0, %o5)
   99     ldx [rp+8], r3
  100     mulx    %g5, v0, %g4
  101     umulxhi(%g5, v0, %g5)
  102     addcc   %o5, %g4, %g4
  103     addxc(  %g0, %g5, %g5)
  104     addcc   r2, %o4, r2
  105     stx r2, [rp+0]
  106     addxccc(r3, %g4, r3)
  107     stx r3, [rp+8]
  108     add n, -2, n
  109     add up, 16, up
  110     brz n, L(xit)
  111      add    rp, 16, rp
  112     b   L(com)
  113      nop
  114 
  115 L(b01): ldx [rp+0], r3
  116     mulx    %g1, v0, %g4
  117     umulxhi(%g1, v0, %g5)
  118     addcc   r3, %g4, r3
  119     stx r3, [rp+0]
  120     add n, -1, n
  121     add up, 8, up
  122     brz n, L(xit)
  123      add    rp, 8, rp
  124 
  125 L(com): ldx [up+0], %g1
  126 L(b0):  ldx [up+8], u1
  127     ldx [up+16], u2
  128     ldx [up+24], u3
  129     mulx    %g1, v0, %o0
  130     umulxhi(%g1, v0, %o1)
  131     b   L(lo0)
  132      nop
  133 
  134     ALIGN(16)
  135 L(top): ldx [up+0], u0
  136     addxc(  %g0, %g5, %g5)      C propagate carry into carry limb
  137     ldx [up+8], u1
  138     addcc   r0, %o0, r0
  139     ldx [up+16], u2
  140     addxccc(r1, %o2, r1)
  141     ldx [up+24], u3
  142     addxccc(r2, %o4, r2)
  143     stx r0, [rp-32]
  144     addxccc(r3, %g4, r3)
  145     stx r1, [rp-24]
  146     mulx    u0, v0, %o0
  147     stx r2, [rp-16]
  148     umulxhi(u0, v0, %o1)
  149     stx r3, [rp-8]
  150 L(lo0): mulx    u1, v0, %o2
  151     ldx [rp+0], r0
  152     umulxhi(u1, v0, %o3)
  153     ldx [rp+8], r1
  154     mulx    u2, v0, %o4
  155     ldx [rp+16], r2
  156     umulxhi(u2, v0, %o5)
  157     ldx [rp+24], r3
  158     mulx    u3, v0, %g4
  159     addxccc(%g5, %o0, %o0)
  160     umulxhi(u3, v0, %g5)
  161     add up, 32, up
  162     addxccc(%o1, %o2, %o2)
  163     add rp, 32, rp
  164     addxccc(%o3, %o4, %o4)
  165     add n, -4, n
  166     addxccc(%o5, %g4, %g4)
  167     brgz    n, L(top)
  168      nop
  169 
  170     addxc(  %g0, %g5, %g5)
  171     addcc   r0, %o0, r0
  172     stx r0, [rp-32]
  173     addxccc(r1, %o2, r1)
  174     stx r1, [rp-24]
  175     addxccc(r2, %o4, r2)
  176     stx r2, [rp-16]
  177     addxccc(r3, %g4, r3)
  178     stx r3, [rp-8]
  179 L(xit): addxc(  %g0, %g5, %i0)
  180     ret
  181      restore
  182 EPILOGUE()