"Fossies" - the Fresh Open Source Software Archive

Member "gmp-6.2.1/mpn/pa32/hppa1_1/pa7100/addmul_1.asm" (14 Nov 2020, 4729 Bytes) of package /linux/misc/gmp-6.2.1.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Generic Assembler source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 dnl  HP-PA 7100/7200 mpn_addmul_1 -- Multiply a limb vector with a limb and
    2 dnl  add the result to a second limb vector.
    3 
    4 dnl  Copyright 1995, 2000-2003 Free Software Foundation, Inc.
    5 
    6 dnl  This file is part of the GNU MP Library.
    7 dnl
    8 dnl  The GNU MP Library is free software; you can redistribute it and/or modify
    9 dnl  it under the terms of either:
   10 dnl
   11 dnl    * the GNU Lesser General Public License as published by the Free
   12 dnl      Software Foundation; either version 3 of the License, or (at your
   13 dnl      option) any later version.
   14 dnl
   15 dnl  or
   16 dnl
   17 dnl    * the GNU General Public License as published by the Free Software
   18 dnl      Foundation; either version 2 of the License, or (at your option) any
   19 dnl      later version.
   20 dnl
   21 dnl  or both in parallel, as here.
   22 dnl
   23 dnl  The GNU MP Library is distributed in the hope that it will be useful, but
   24 dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   25 dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   26 dnl  for more details.
   27 dnl
   28 dnl  You should have received copies of the GNU General Public License and the
   29 dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
   30 dnl  see https://www.gnu.org/licenses/.
   31 
   32 include(`../config.m4')
   33 
   34 C INPUT PARAMETERS
   35 define(`res_ptr',`%r26')
   36 define(`s1_ptr',`%r25')
   37 define(`size_param',`%r24')
   38 define(`s2_limb',`%r23')
   39 
   40 define(`cylimb',`%r28')
   41 define(`s0',`%r19')
   42 define(`s1',`%r20')
   43 define(`s2',`%r3')
   44 define(`s3',`%r4')
   45 define(`lo0',`%r21')
   46 define(`lo1',`%r5')
   47 define(`lo2',`%r6')
   48 define(`lo3',`%r7')
   49 define(`hi0',`%r22')
   50 define(`hi1',`%r23')                C safe to reuse
   51 define(`hi2',`%r29')
   52 define(`hi3',`%r1')
   53 
   54 ASM_START()
   55 PROLOGUE(mpn_addmul_1)
   56 C   .callinfo   frame=128,no_calls
   57 
   58     ldo 128(%r30),%r30
   59     stws    s2_limb,-16(%r30)
   60     add  %r0,%r0,cylimb         C clear cy and cylimb
   61     addib,< -4,size_param,L(few_limbs)
   62     fldws   -16(%r30),%fr31R
   63 
   64     ldo -112(%r30),%r31
   65     stw %r3,-96(%r30)
   66     stw %r4,-92(%r30)
   67     stw %r5,-88(%r30)
   68     stw %r6,-84(%r30)
   69     stw %r7,-80(%r30)
   70 
   71     bb,>=,n  s1_ptr,29,L(0)
   72 
   73     fldws,ma 4(s1_ptr),%fr4
   74     ldws     0(res_ptr),s0
   75     xmpyu    %fr4,%fr31R,%fr5
   76     fstds    %fr5,-16(%r31)
   77     ldws    -16(%r31),cylimb
   78     ldws    -12(%r31),lo0
   79     add  s0,lo0,s0
   80     addib,< -1,size_param,L(few_limbs)
   81     stws,ma  s0,4(res_ptr)
   82 
   83 C start software pipeline ----------------------------------------------------
   84 LDEF(0)
   85     fldds,ma 8(s1_ptr),%fr4
   86     fldds,ma 8(s1_ptr),%fr8
   87 
   88     xmpyu    %fr4L,%fr31R,%fr5
   89     xmpyu    %fr4R,%fr31R,%fr6
   90     xmpyu    %fr8L,%fr31R,%fr9
   91     xmpyu    %fr8R,%fr31R,%fr10
   92 
   93     fstds    %fr5,-16(%r31)
   94     fstds    %fr6,-8(%r31)
   95     fstds    %fr9,0(%r31)
   96     fstds    %fr10,8(%r31)
   97 
   98     ldws   -16(%r31),hi0
   99     ldws   -12(%r31),lo0
  100     ldws    -8(%r31),hi1
  101     ldws    -4(%r31),lo1
  102     ldws     0(%r31),hi2
  103     ldws     4(%r31),lo2
  104     ldws     8(%r31),hi3
  105     ldws    12(%r31),lo3
  106 
  107     addc     lo0,cylimb,lo0
  108     addc     lo1,hi0,lo1
  109     addc     lo2,hi1,lo2
  110     addc     lo3,hi2,lo3
  111 
  112     addib,<  -4,size_param,L(end)
  113     addc     %r0,hi3,cylimb         C propagate carry into cylimb
  114 C main loop ------------------------------------------------------------------
  115 LDEF(loop)
  116     fldds,ma 8(s1_ptr),%fr4
  117     fldds,ma 8(s1_ptr),%fr8
  118 
  119     ldws     0(res_ptr),s0
  120     xmpyu    %fr4L,%fr31R,%fr5
  121     ldws     4(res_ptr),s1
  122     xmpyu    %fr4R,%fr31R,%fr6
  123     ldws     8(res_ptr),s2
  124     xmpyu    %fr8L,%fr31R,%fr9
  125     ldws    12(res_ptr),s3
  126     xmpyu    %fr8R,%fr31R,%fr10
  127 
  128     fstds    %fr5,-16(%r31)
  129     add  s0,lo0,s0
  130     fstds    %fr6,-8(%r31)
  131     addc     s1,lo1,s1
  132     fstds    %fr9,0(%r31)
  133     addc     s2,lo2,s2
  134     fstds    %fr10,8(%r31)
  135     addc     s3,lo3,s3
  136 
  137     ldws   -16(%r31),hi0
  138     ldws   -12(%r31),lo0
  139     ldws    -8(%r31),hi1
  140     ldws    -4(%r31),lo1
  141     ldws     0(%r31),hi2
  142     ldws     4(%r31),lo2
  143     ldws     8(%r31),hi3
  144     ldws    12(%r31),lo3
  145 
  146     addc     lo0,cylimb,lo0
  147     stws,ma  s0,4(res_ptr)
  148     addc     lo1,hi0,lo1
  149     stws,ma  s1,4(res_ptr)
  150     addc     lo2,hi1,lo2
  151     stws,ma  s2,4(res_ptr)
  152     addc     lo3,hi2,lo3
  153     stws,ma  s3,4(res_ptr)
  154 
  155     addib,>= -4,size_param,L(loop)
  156     addc     %r0,hi3,cylimb         C propagate carry into cylimb
  157 C finish software pipeline ---------------------------------------------------
  158 LDEF(end)
  159     ldws     0(res_ptr),s0
  160     ldws     4(res_ptr),s1
  161     ldws     8(res_ptr),s2
  162     ldws    12(res_ptr),s3
  163 
  164     add  s0,lo0,s0
  165     stws,ma  s0,4(res_ptr)
  166     addc     s1,lo1,s1
  167     stws,ma  s1,4(res_ptr)
  168     addc     s2,lo2,s2
  169     stws,ma  s2,4(res_ptr)
  170     addc     s3,lo3,s3
  171     stws,ma  s3,4(res_ptr)
  172 
  173 C restore callee-saves registers ---------------------------------------------
  174     ldw -96(%r30),%r3
  175     ldw -92(%r30),%r4
  176     ldw -88(%r30),%r5
  177     ldw -84(%r30),%r6
  178     ldw -80(%r30),%r7
  179 
  180 LDEF(few_limbs)
  181     addib,=,n 4,size_param,L(ret)
  182 
  183 LDEF(loop2)
  184     fldws,ma 4(s1_ptr),%fr4
  185     ldws     0(res_ptr),s0
  186     xmpyu    %fr4,%fr31R,%fr5
  187     fstds    %fr5,-16(%r30)
  188     ldws    -16(%r30),hi0
  189     ldws    -12(%r30),lo0
  190     addc     lo0,cylimb,lo0
  191     addc     %r0,hi0,cylimb
  192     add  s0,lo0,s0
  193     stws,ma  s0,4(res_ptr)
  194     addib,<> -1,size_param,L(loop2)
  195     nop
  196 
  197 LDEF(ret)
  198     addc     %r0,cylimb,cylimb
  199     bv   0(%r2)
  200     ldo  -128(%r30),%r30
  201 EPILOGUE(mpn_addmul_1)