"Fossies" - the Fresh Open Source Software Archive

Member "mapm_4.9.5a/mapmfmul.c" (21 Feb 2010, 19871 Bytes) of package /linux/misc/old/mapm-4.9.5a.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "mapmfmul.c" see the Fossies "Dox" file reference documentation.

    1 
    2 /* 
    3  *  M_APM  -  mapmfmul.c
    4  *
    5  *  Copyright (C) 1999 - 2007   Michael C. Ring
    6  *
    7  *  Permission to use, copy, and distribute this software and its
    8  *  documentation for any purpose with or without fee is hereby granted,
    9  *  provided that the above copyright notice appear in all copies and
   10  *  that both that copyright notice and this permission notice appear
   11  *  in supporting documentation.
   12  *
   13  *  Permission to modify the software is granted. Permission to distribute
   14  *  the modified code is granted. Modifications are to be distributed by
   15  *  using the file 'license.txt' as a template to modify the file header.
   16  *  'license.txt' is available in the official MAPM distribution.
   17  *
   18  *  This software is provided "as is" without express or implied warranty.
   19  */
   20 
   21 /*
   22  *      $Id: mapmfmul.c,v 1.33 2007/12/03 01:52:22 mike Exp $
   23  *
   24  *      This file contains the divide-and-conquer FAST MULTIPLICATION 
   25  *  function as well as its support functions.
   26  *
   27  *      $Log: mapmfmul.c,v $
   28  *      Revision 1.33  2007/12/03 01:52:22  mike
   29  *      Update license
   30  *
   31  *      Revision 1.32  2004/02/18 03:16:15  mike
   32  *      optimize 4 byte multiply (when FFT is disabled)
   33  *
   34  *      Revision 1.31  2003/12/04 01:14:06  mike
   35  *      redo math on 'borrow'
   36  *
   37  *      Revision 1.30  2003/07/21 20:34:18  mike
   38  *      Modify error messages to be in a consistent format.
   39  *
   40  *      Revision 1.29  2003/03/31 21:55:07  mike
   41  *      call generic error handling function
   42  *
   43  *      Revision 1.28  2002/11/03 22:38:15  mike
   44  *      Updated function parameters to use the modern style
   45  *
   46  *      Revision 1.27  2002/02/14 19:53:32  mike
   47  *      add conditional compiler option to disable use
   48  *      of FFT multiply if the user so chooses.
   49  *
   50  *      Revision 1.26  2001/07/26 20:56:38  mike
   51  *      fix comment, no code change
   52  *
   53  *      Revision 1.25  2001/07/16 19:43:45  mike
   54  *      add function M_free_all_fmul
   55  *
   56  *      Revision 1.24  2001/02/11 22:34:47  mike
   57  *      modify parameters to REALLOC
   58  *
   59  *      Revision 1.23  2000/10/20 19:23:26  mike
   60  *      adjust power_of_2 function so it should work with
   61  *      64 bit processors and beyond.
   62  *
   63  *      Revision 1.22  2000/08/23 22:27:34  mike
   64  *      no real code change, re-named 2 local functions
   65  *      so they make more sense.
   66  *
   67  *      Revision 1.21  2000/08/01 22:24:38  mike
   68  *      use sizeof(int) function call to stop some
   69  *      compilers from complaining.
   70  *
   71  *      Revision 1.20  2000/07/19 17:12:00  mike
   72  *      lower the number of bytes that the FFT can handle. worst case
   73  *      testing indicated math overflow when >= 1048576
   74  *
   75  *      Revision 1.19  2000/07/08 18:29:03  mike
   76  *      increase define so FFT can handle bigger numbers
   77  *
   78  *      Revision 1.18  2000/07/06 23:20:12  mike
   79  *      changed my mind. use static local MAPM numbers
   80  *      for temp data storage
   81  *
   82  *      Revision 1.17  2000/07/06 20:52:34  mike
   83  *      use init function to get local writable copies
   84  *      instead of using the stack
   85  *
   86  *      Revision 1.16  2000/07/04 17:25:09  mike
   87  *      guarantee 16 bit compilers still work OK
   88  *
   89  *      Revision 1.15  2000/07/04 15:40:02  mike
   90  *      add call to use FFT algorithm
   91  *
   92  *      Revision 1.14  2000/05/05 21:10:46  mike
   93  *      add comment indicating availability of assembly language
   94  *      version of M_4_byte_multiply for Linux on x86 platforms.
   95  *
   96  *      Revision 1.13  2000/04/20 19:30:45  mike
   97  *      minor optimization to 4 byte multiply
   98  *
   99  *      Revision 1.12  2000/04/14 15:39:30  mike
  100  *      optimize the fast multiply function. don't re-curse down
  101  *      to a size of 1. recurse down to a size of '4' and then
  102  *      call a special 4 byte multiply function.
  103  *
  104  *      Revision 1.11  2000/02/03 23:02:13  mike
  105  *      put in RCS for real...
  106  *
  107  *      Revision 1.10  2000/02/03 22:59:08  mike
  108  *      remove the extra recursive function. not needed any
  109  *      longer since all current compilers should not have
  110  *      any problem with true recursive calls.
  111  *
  112  *      Revision 1.9  2000/02/03 22:47:39  mike
  113  *      use MAPM_* generic memory function
  114  *
  115  *      Revision 1.8  1999/09/19 21:13:44  mike
  116  *      eliminate unneeded local int in _split
  117  *
  118  *      Revision 1.7  1999/08/12 22:36:23  mike
  119  *      move the 3 'simple' function to the top of file
  120  *      so GCC can in-line the code.
  121  *
  122  *      Revision 1.6  1999/08/12 22:01:14  mike
  123  *      more minor optimizations
  124  *
  125  *      Revision 1.5  1999/08/12 02:02:06  mike
  126  *      minor optimization
  127  *
  128  *      Revision 1.4  1999/08/10 22:51:59  mike
  129  *      minor tweak
  130  *
  131  *      Revision 1.3  1999/08/10 00:45:47  mike
  132  *      added more comments and a few minor tweaks
  133  *
  134  *      Revision 1.2  1999/08/09 02:50:02  mike
  135  *      add some comments
  136  *
  137  *      Revision 1.1  1999/08/08 18:27:57  mike
  138  *      Initial revision
  139  */
  140 
  141 #include "m_apm_lc.h"
  142 
  143 static int M_firsttimef = TRUE;
  144 
  145 /*
  146  *      specify the max size the FFT routine can handle 
  147  *      (in MAPM, #digits = 2 * #bytes)
  148  *
  149  *      this number *must* be an exact power of 2.
  150  *
  151  *      **WORST** case input numbers (all 9's) has shown that
  152  *  the FFT math will overflow if the #define here is 
  153  *      >= 1048576. On my system, 524,288 worked OK. I will
  154  *      factor down another factor of 2 to safeguard against 
  155  *  other computers have less precise floating point math. 
  156  *  If you are confident in your system, 524288 will 
  157  *  theoretically work fine.
  158  *
  159  *      the define here allows the FFT algorithm to multiply two
  160  *      524,288 digit numbers yielding a 1,048,576 digit result.
  161  */
  162 
  163 #define MAX_FFT_BYTES 262144
  164 
  165 /*
  166  *      the Divide-and-Conquer multiplication kicks in when the size of
  167  *  the numbers exceed the capability of the FFT (#define just above).
  168  *
  169  *  #bytes    D&C call depth
  170  *  ------    --------------
  171  *       512K           1
  172  *        1M            2
  173  *        2M            3
  174  *        4M            4
  175  *       ...           ...
  176  *    2.1990E+12       23 
  177  *
  178  *  the following stack sizes are sized to meet the 
  179  *      above 2.199E+12 example, though I wouldn't want to
  180  *  wait for it to finish...
  181  *
  182  *      Each call requires 7 stack variables to be saved so 
  183  *  we need a stack depth of 23 * 7 + PAD.  (we use 164)
  184  *
  185  *      For 'exp_stack', 3 integers also are required to be saved 
  186  *      for each recursive call so we need a stack depth of 
  187  *      23 * 3 + PAD. (we use 72)
  188  *
  189  *
  190  *      If the FFT multiply is disabled, resize the arrays
  191  *  as follows:
  192  *
  193  *      the following stack sizes are sized to meet the 
  194  *      worst case expected assuming we are multiplying 
  195  *      numbers with 2.14E+9 (2 ^ 31) digits. 
  196  *
  197  *      For sizeof(int) == 4 (32 bits) there may be up to 32 recursive
  198  *      calls. Each call requires 7 stack variables so we need a
  199  *      stack depth of 32 * 7 + PAD.  (we use 240)
  200  *
  201  *      For 'exp_stack', 3 integers also are required to be saved 
  202  *      for each recursive call so we need a stack depth of 
  203  *      32 * 3 + PAD. (we use 100)
  204  */
  205 
  206 #ifdef NO_FFT_MULTIPLY
  207 #define M_STACK_SIZE 240
  208 #define M_ISTACK_SIZE 100
  209 #else
  210 #define M_STACK_SIZE 164
  211 #define M_ISTACK_SIZE 72
  212 #endif
  213 
  214 static int    exp_stack[M_ISTACK_SIZE];
  215 static int    exp_stack_ptr;
  216 
  217 static UCHAR  *mul_stack_data[M_STACK_SIZE];
  218 static int    mul_stack_data_size[M_STACK_SIZE];
  219 static int    M_mul_stack_ptr;
  220 
  221 static UCHAR  *fmul_a1, *fmul_a0, *fmul_a9, *fmul_b1, *fmul_b0, 
  222           *fmul_b9, *fmul_t0;
  223 
  224 static int    size_flag, bit_limit, stmp, itmp, mii;
  225 
  226 static M_APM  M_ain;
  227 static M_APM  M_bin;
  228 
  229 static char   *M_stack_ptr_error_msg = "\'M_get_stack_ptr\', Out of memory";
  230 
  231 extern void   M_fast_multiply(M_APM, M_APM, M_APM);
  232 extern void   M_fmul_div_conq(UCHAR *, UCHAR *, UCHAR *, int);
  233 extern void   M_fmul_add(UCHAR *, UCHAR *, int, int);
  234 extern int    M_fmul_subtract(UCHAR *, UCHAR *, UCHAR *, int);
  235 extern void   M_fmul_split(UCHAR *, UCHAR *, UCHAR *, int);
  236 extern int    M_next_power_of_2(int);
  237 extern int    M_get_stack_ptr(int);
  238 extern void   M_push_mul_int(int);
  239 extern int    M_pop_mul_int(void);
  240 
  241 #ifdef NO_FFT_MULTIPLY
  242 extern void   M_4_byte_multiply(UCHAR *, UCHAR *, UCHAR *);
  243 #else
  244 extern void   M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
  245 #endif
  246 
  247 /*
  248  *      the following algorithm is used in this fast multiply routine
  249  *  (sometimes called the divide-and-conquer technique.)
  250  *
  251  *  assume we have 2 numbers (a & b) with 2N digits. 
  252  *
  253  *      let : a = (2^N) * A1 + A0 , b = (2^N) * B1 + B0      
  254  *
  255  *  where 'A1' is the 'most significant half' of 'a' and 
  256  *      'A0' is the 'least significant half' of 'a'. Same for 
  257  *  B1 and B0.
  258  *
  259  *  Now use the identity :
  260  *
  261  *               2N   N            N                    N
  262  *  ab  =  (2  + 2 ) A1B1  +  2 (A1-A0)(B0-B1)  + (2 + 1)A0B0
  263  *
  264  *
  265  *      The original problem of multiplying 2 (2N) digit numbers has
  266  *  been reduced to 3 multiplications of N digit numbers plus some
  267  *  additions, subtractions, and shifts.
  268  *
  269  *  The fast multiplication algorithm used here uses the above 
  270  *  identity in a recursive process. This algorithm results in
  271  *  O(n ^ 1.585) growth.
  272  */
  273 
  274 
  275 /****************************************************************************/
  276 void    M_free_all_fmul()
  277 {
  278 int k;
  279 
  280 if (M_firsttimef == FALSE)
  281   {
  282    m_apm_free(M_ain);
  283    m_apm_free(M_bin);
  284 
  285    for (k=0; k < M_STACK_SIZE; k++)
  286      {
  287       if (mul_stack_data_size[k] != 0)
  288         {
  289          MAPM_FREE(mul_stack_data[k]);
  290     }
  291      }
  292 
  293    M_firsttimef = TRUE;
  294   }
  295 }
  296 /****************************************************************************/
  297 void    M_push_mul_int(int val)
  298 {
  299 exp_stack[++exp_stack_ptr] = val;
  300 }
  301 /****************************************************************************/
  302 int M_pop_mul_int()
  303 {
  304 return(exp_stack[exp_stack_ptr--]);
  305 }
  306 /****************************************************************************/
  307 void    M_fmul_split(UCHAR *x1, UCHAR *x0, UCHAR *xin, int nbytes)
  308 {
  309 memcpy(x1, xin, nbytes);
  310 memcpy(x0, (xin + nbytes), nbytes);
  311 }
  312 /****************************************************************************/
  313 void    M_fast_multiply(M_APM rr, M_APM aa, M_APM bb)
  314 {
  315 void    *vp;
  316 int ii, k, nexp, sign;
  317 
  318 if (M_firsttimef)
  319   {
  320    M_firsttimef = FALSE;
  321 
  322    for (k=0; k < M_STACK_SIZE; k++)
  323      mul_stack_data_size[k] = 0;
  324 
  325    size_flag = M_get_sizeof_int();
  326    bit_limit = 8 * size_flag + 1;
  327 
  328    M_ain = m_apm_init();
  329    M_bin = m_apm_init();
  330   }
  331 
  332 exp_stack_ptr   = -1;
  333 M_mul_stack_ptr = -1;
  334 
  335 m_apm_copy(M_ain, aa);
  336 m_apm_copy(M_bin, bb);
  337 
  338 sign = M_ain->m_apm_sign * M_bin->m_apm_sign;
  339 nexp = M_ain->m_apm_exponent + M_bin->m_apm_exponent;
  340 
  341 if (M_ain->m_apm_datalength >= M_bin->m_apm_datalength)
  342   ii = M_ain->m_apm_datalength;
  343 else
  344   ii = M_bin->m_apm_datalength;
  345 
  346 ii = (ii + 1) >> 1;
  347 ii = M_next_power_of_2(ii);
  348 
  349 /* Note: 'ii' must be >= 4 here. this is guaranteed 
  350    by the caller: m_apm_multiply
  351 */
  352 
  353 k = 2 * ii;                   /* required size of result, in bytes  */
  354 
  355 M_apm_pad(M_ain, k);          /* fill out the data so the number of */
  356 M_apm_pad(M_bin, k);          /* bytes is an exact power of 2       */
  357 
  358 if (k > rr->m_apm_malloclength)
  359   {
  360    if ((vp = MAPM_REALLOC(rr->m_apm_data, (k + 32))) == NULL)
  361      {
  362       /* fatal, this does not return */
  363 
  364       M_apm_log_error_msg(M_APM_FATAL, "\'M_fast_multiply\', Out of memory");
  365      }
  366   
  367    rr->m_apm_malloclength = k + 28;
  368    rr->m_apm_data = (UCHAR *)vp;
  369   }
  370 
  371 #ifdef NO_FFT_MULTIPLY
  372 
  373 M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data, 
  374                                 M_bin->m_apm_data, ii);
  375 #else
  376 
  377 /*
  378  *     if the numbers are *really* big, use the divide-and-conquer
  379  *     routine first until the numbers are small enough to be handled 
  380  *     by the FFT algorithm. If the numbers are already small enough,
  381  *     call the FFT multiplication now.
  382  *
  383  *     Note that 'ii' here is (and must be) an exact power of 2.
  384  */
  385 
  386 if (size_flag == 2)   /* if still using 16 bit compilers .... */
  387   {
  388    M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data, 
  389                                   M_bin->m_apm_data, ii);
  390   }
  391 else                  /* >= 32 bit compilers */
  392   {
  393    if (ii > (MAX_FFT_BYTES + 2))
  394      {
  395       M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data, 
  396                                       M_bin->m_apm_data, ii);
  397      }
  398    else
  399      {
  400       M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data, 
  401                                      M_bin->m_apm_data, ii);
  402      }
  403   }
  404 
  405 #endif
  406 
  407 rr->m_apm_sign       = sign;
  408 rr->m_apm_exponent   = nexp;
  409 rr->m_apm_datalength = 4 * ii;
  410 
  411 M_apm_normalize(rr);
  412 }
  413 /****************************************************************************/
  414 /*
  415  *      This is the recursive function to perform the multiply. The 
  416  *      design intent here is to have no local variables. Any local
  417  *      data that needs to be saved is saved on one of the two stacks.
  418  */
  419 void    M_fmul_div_conq(UCHAR *rr, UCHAR *aa, UCHAR *bb, int sz)
  420 {
  421 
  422 #ifdef NO_FFT_MULTIPLY
  423 
  424 if (sz == 4)                /* multiply 4x4 yielding an 8 byte result */
  425   {
  426    M_4_byte_multiply(rr, aa, bb);
  427    return;
  428   }
  429 
  430 #else
  431 
  432 /*
  433  *  if the numbers are now small enough, let the FFT algorithm
  434  *  finish up.
  435  */
  436 
  437 if (sz == MAX_FFT_BYTES)     
  438   {
  439    M_fast_mul_fft(rr, aa, bb, sz);
  440    return;
  441   }
  442 
  443 #endif
  444 
  445 memset(rr, 0, (2 * sz));    /* zero out the result */
  446 mii = sz >> 1;
  447 
  448 itmp = M_get_stack_ptr(mii);
  449 M_push_mul_int(itmp);
  450 
  451 fmul_a1 = mul_stack_data[itmp];
  452 
  453 itmp    = M_get_stack_ptr(mii);
  454 fmul_a0 = mul_stack_data[itmp];
  455 
  456 itmp    = M_get_stack_ptr(2 * sz);
  457 fmul_a9 = mul_stack_data[itmp];
  458 
  459 itmp    = M_get_stack_ptr(mii);
  460 fmul_b1 = mul_stack_data[itmp];
  461 
  462 itmp    = M_get_stack_ptr(mii);
  463 fmul_b0 = mul_stack_data[itmp];
  464 
  465 itmp    = M_get_stack_ptr(2 * sz);
  466 fmul_b9 = mul_stack_data[itmp];
  467 
  468 itmp    = M_get_stack_ptr(2 * sz);
  469 fmul_t0 = mul_stack_data[itmp];
  470 
  471 M_fmul_split(fmul_a1, fmul_a0, aa, mii);
  472 M_fmul_split(fmul_b1, fmul_b0, bb, mii);
  473 
  474 stmp  = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);
  475 stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);
  476 
  477 M_push_mul_int(stmp);
  478 M_push_mul_int(mii);
  479 
  480 M_fmul_div_conq(fmul_t0, fmul_a0, fmul_b0, mii);
  481 
  482 mii  = M_pop_mul_int();
  483 stmp = M_pop_mul_int();
  484 itmp = M_pop_mul_int();
  485 
  486 M_push_mul_int(itmp);
  487 M_push_mul_int(stmp);
  488 M_push_mul_int(mii);
  489 
  490 /*   to restore all stack variables ...
  491 fmul_a1 = mul_stack_data[itmp];
  492 fmul_a0 = mul_stack_data[itmp+1];
  493 fmul_a9 = mul_stack_data[itmp+2];
  494 fmul_b1 = mul_stack_data[itmp+3];
  495 fmul_b0 = mul_stack_data[itmp+4];
  496 fmul_b9 = mul_stack_data[itmp+5];
  497 fmul_t0 = mul_stack_data[itmp+6];
  498 */
  499 
  500 fmul_a1 = mul_stack_data[itmp];
  501 fmul_b1 = mul_stack_data[itmp+3];
  502 fmul_t0 = mul_stack_data[itmp+6];
  503 
  504 memcpy((rr + sz), fmul_t0, sz);    /* first 'add', result is now zero */
  505                    /* so we just copy in the bytes    */
  506 M_fmul_add(rr, fmul_t0, mii, sz);
  507 
  508 M_fmul_div_conq(fmul_t0, fmul_a1, fmul_b1, mii);
  509 
  510 mii  = M_pop_mul_int();
  511 stmp = M_pop_mul_int();
  512 itmp = M_pop_mul_int();
  513 
  514 M_push_mul_int(itmp);
  515 M_push_mul_int(stmp);
  516 M_push_mul_int(mii);
  517 
  518 fmul_a9 = mul_stack_data[itmp+2];
  519 fmul_b9 = mul_stack_data[itmp+5];
  520 fmul_t0 = mul_stack_data[itmp+6];
  521 
  522 M_fmul_add(rr, fmul_t0, 0, sz);
  523 M_fmul_add(rr, fmul_t0, mii, sz);
  524 
  525 if (stmp != 0)
  526   M_fmul_div_conq(fmul_t0, fmul_a9, fmul_b9, mii);
  527 
  528 mii  = M_pop_mul_int();
  529 stmp = M_pop_mul_int();
  530 itmp = M_pop_mul_int();
  531 
  532 fmul_t0 = mul_stack_data[itmp+6];
  533 
  534 /*
  535  *  if the sign of (A1 - A0)(B0 - B1) is positive, ADD to
  536  *  the result. if it is negative, SUBTRACT from the result.
  537  */
  538 
  539 if (stmp < 0)
  540   {
  541    fmul_a9 = mul_stack_data[itmp+2];
  542    fmul_b9 = mul_stack_data[itmp+5];
  543 
  544    memset(fmul_b9, 0, (2 * sz)); 
  545    memcpy((fmul_b9 + mii), fmul_t0, sz); 
  546    M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz));
  547    memcpy(rr, fmul_a9, (2 * sz));
  548   }
  549 
  550 if (stmp > 0)
  551   M_fmul_add(rr, fmul_t0, mii, sz);
  552 
  553 M_mul_stack_ptr -= 7;
  554 }
  555 /****************************************************************************/
  556 /*
  557  *  special addition function for use with the fast multiply operation
  558  */
  559 void    M_fmul_add(UCHAR *r, UCHAR *a, int offset, int sz)
  560 {
  561 int i, j;
  562 UCHAR   carry;
  563 
  564 carry = 0;
  565 j = offset + sz;
  566 i = sz;
  567 
  568 while (TRUE)
  569   {
  570    r[--j] += carry + a[--i];
  571 
  572    if (r[j] >= 100)
  573      {
  574       r[j] -= 100;
  575       carry = 1;
  576      }
  577    else
  578       carry = 0;
  579 
  580    if (i == 0)
  581      break;
  582   }
  583 
  584 if (carry)
  585   {
  586    while (TRUE)
  587      {
  588       r[--j] += 1;
  589    
  590       if (r[j] < 100)
  591         break;
  592 
  593       r[j] -= 100;
  594      }
  595   }
  596 }
  597 /****************************************************************************/
  598 /*
  599  *  special subtraction function for use with the fast multiply operation
  600  */
  601 int     M_fmul_subtract(UCHAR *r, UCHAR *a, UCHAR *b, int sz)
  602 {
  603 int k, jtmp, sflag, nb, borrow;
  604 
  605 nb    = sz;
  606 sflag = 0;      /* sign flag: assume the numbers are equal */
  607 
  608 /*
  609  *   find if a > b (so we perform a-b)
  610  *   or      a < b (so we perform b-a)
  611  */
  612 
  613 for (k=0; k < nb; k++)
  614   {
  615    if (a[k] < b[k])
  616      {
  617       sflag = -1;
  618       break;
  619      }
  620 
  621    if (a[k] > b[k])
  622      {
  623       sflag = 1;
  624       break;
  625      }
  626   }
  627 
  628 if (sflag == 0)
  629   {
  630    memset(r, 0, nb);            /* zero out the result */
  631   }
  632 else
  633   {
  634    k = nb;
  635    borrow = 0;
  636 
  637    while (TRUE)
  638      {
  639       k--;
  640 
  641       if (sflag == 1)
  642         jtmp = (int)a[k] - ((int)b[k] + borrow);
  643       else
  644         jtmp = (int)b[k] - ((int)a[k] + borrow);
  645 
  646       if (jtmp >= 0)
  647         {
  648          r[k] = (UCHAR)jtmp;
  649          borrow = 0;
  650         }
  651       else
  652         {
  653          r[k] = (UCHAR)(100 + jtmp);
  654          borrow = 1;
  655         }
  656 
  657       if (k == 0)
  658         break;
  659      }
  660   }
  661 
  662 return(sflag);
  663 }
  664 /****************************************************************************/
  665 int     M_next_power_of_2(int n)
  666 {
  667 int     ct, k;
  668 
  669 if (n <= 2)
  670   return(n);
  671 
  672 k  = 2;
  673 ct = 0;
  674 
  675 while (TRUE)
  676   {
  677    if (k >= n)
  678      break;
  679 
  680    k = k << 1;
  681 
  682    if (++ct == bit_limit)
  683      {
  684       /* fatal, this does not return */
  685 
  686       M_apm_log_error_msg(M_APM_FATAL, 
  687                "\'M_next_power_of_2\', ERROR :sizeof(int) too small ??");
  688      }
  689   }
  690 
  691 return(k);
  692 }
  693 /****************************************************************************/
  694 int M_get_stack_ptr(int sz)
  695 {
  696 int i, k;
  697 UCHAR   *cp;
  698 
  699 k = ++M_mul_stack_ptr;
  700 
  701 /* if size is 0, just need to malloc and return */
  702 if (mul_stack_data_size[k] == 0)
  703   {
  704    if ((i = sz) < 16)
  705      i = 16;
  706 
  707    if ((cp = (UCHAR *)MAPM_MALLOC(i + 4)) == NULL)
  708      {
  709       /* fatal, this does not return */
  710 
  711       M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
  712      }
  713 
  714    mul_stack_data[k]      = cp;
  715    mul_stack_data_size[k] = i;
  716   }
  717 else        /* it has been malloc'ed, see if it's big enough */
  718   {
  719    if (sz > mul_stack_data_size[k])
  720      {
  721       cp = mul_stack_data[k];
  722 
  723       if ((cp = (UCHAR *)MAPM_REALLOC(cp, (sz + 4))) == NULL)
  724         {
  725          /* fatal, this does not return */
  726    
  727          M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
  728         }
  729    
  730       mul_stack_data[k]      = cp;
  731       mul_stack_data_size[k] = sz;
  732      }
  733   }
  734 
  735 return(k);
  736 }
  737 /****************************************************************************/
  738 
  739 #ifdef NO_FFT_MULTIPLY
  740 
  741 /*
  742  *      multiply a 4 byte number by a 4 byte number
  743  *      yielding an 8 byte result. each byte contains
  744  *      a base 100 'digit', i.e.: range from 0-99.
  745  *
  746  *             MSB         LSB
  747  *
  748  *      a,b    [0] [1] [2] [3]
  749  *   result    [0]  .....  [7]
  750  */
  751 
  752 void    M_4_byte_multiply(UCHAR *r, UCHAR *a, UCHAR *b)
  753 {
  754 int       jj;
  755 unsigned int  *ip, t1, rr[8];
  756 
  757 memset(rr, 0, (8 * sizeof(int)));        /* zero out result */
  758 jj = 3;
  759 ip = rr + 5;
  760 
  761 /*
  762  *   loop for one number [b], un-roll the inner 'loop' [a]
  763  *
  764  *   accumulate partial sums in UINT array, release carries 
  765  *   and convert back to base 100 at the end
  766  */
  767 
  768 while (1)
  769   {
  770    t1  = (unsigned int)b[jj];
  771    ip += 2;
  772 
  773    *ip-- += t1 * a[3];
  774    *ip-- += t1 * a[2];
  775    *ip-- += t1 * a[1];
  776    *ip   += t1 * a[0];
  777    
  778    if (jj-- == 0)
  779      break;
  780   }
  781 
  782 jj = 7;
  783 
  784 while (1)
  785   {
  786    t1 = rr[jj] / 100;
  787    r[jj] = (UCHAR)(rr[jj] - 100 * t1);
  788 
  789    if (jj == 0)
  790      break;
  791 
  792    rr[--jj] += t1;
  793   }
  794 }
  795 
  796 #endif
  797 
  798 /****************************************************************************/