"Fossies" - the Fresh Open Source Software Archive 
Member "mapm_4.9.5a/mapmfmul.c" (21 Feb 2010, 19871 Bytes) of package /linux/misc/old/mapm-4.9.5a.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "mapmfmul.c" see the
Fossies "Dox" file reference documentation.
1
2 /*
3 * M_APM - mapmfmul.c
4 *
5 * Copyright (C) 1999 - 2007 Michael C. Ring
6 *
7 * Permission to use, copy, and distribute this software and its
8 * documentation for any purpose with or without fee is hereby granted,
9 * provided that the above copyright notice appear in all copies and
10 * that both that copyright notice and this permission notice appear
11 * in supporting documentation.
12 *
13 * Permission to modify the software is granted. Permission to distribute
14 * the modified code is granted. Modifications are to be distributed by
15 * using the file 'license.txt' as a template to modify the file header.
16 * 'license.txt' is available in the official MAPM distribution.
17 *
18 * This software is provided "as is" without express or implied warranty.
19 */
20
21 /*
22 * $Id: mapmfmul.c,v 1.33 2007/12/03 01:52:22 mike Exp $
23 *
24 * This file contains the divide-and-conquer FAST MULTIPLICATION
25 * function as well as its support functions.
26 *
27 * $Log: mapmfmul.c,v $
28 * Revision 1.33 2007/12/03 01:52:22 mike
29 * Update license
30 *
31 * Revision 1.32 2004/02/18 03:16:15 mike
32 * optimize 4 byte multiply (when FFT is disabled)
33 *
34 * Revision 1.31 2003/12/04 01:14:06 mike
35 * redo math on 'borrow'
36 *
37 * Revision 1.30 2003/07/21 20:34:18 mike
38 * Modify error messages to be in a consistent format.
39 *
40 * Revision 1.29 2003/03/31 21:55:07 mike
41 * call generic error handling function
42 *
43 * Revision 1.28 2002/11/03 22:38:15 mike
44 * Updated function parameters to use the modern style
45 *
46 * Revision 1.27 2002/02/14 19:53:32 mike
47 * add conditional compiler option to disable use
48 * of FFT multiply if the user so chooses.
49 *
50 * Revision 1.26 2001/07/26 20:56:38 mike
51 * fix comment, no code change
52 *
53 * Revision 1.25 2001/07/16 19:43:45 mike
54 * add function M_free_all_fmul
55 *
56 * Revision 1.24 2001/02/11 22:34:47 mike
57 * modify parameters to REALLOC
58 *
59 * Revision 1.23 2000/10/20 19:23:26 mike
60 * adjust power_of_2 function so it should work with
61 * 64 bit processors and beyond.
62 *
63 * Revision 1.22 2000/08/23 22:27:34 mike
64 * no real code change, re-named 2 local functions
65 * so they make more sense.
66 *
67 * Revision 1.21 2000/08/01 22:24:38 mike
68 * use sizeof(int) function call to stop some
69 * compilers from complaining.
70 *
71 * Revision 1.20 2000/07/19 17:12:00 mike
72 * lower the number of bytes that the FFT can handle. worst case
73 * testing indicated math overflow when >= 1048576
74 *
75 * Revision 1.19 2000/07/08 18:29:03 mike
76 * increase define so FFT can handle bigger numbers
77 *
78 * Revision 1.18 2000/07/06 23:20:12 mike
79 * changed my mind. use static local MAPM numbers
80 * for temp data storage
81 *
82 * Revision 1.17 2000/07/06 20:52:34 mike
83 * use init function to get local writable copies
84 * instead of using the stack
85 *
86 * Revision 1.16 2000/07/04 17:25:09 mike
87 * guarantee 16 bit compilers still work OK
88 *
89 * Revision 1.15 2000/07/04 15:40:02 mike
90 * add call to use FFT algorithm
91 *
92 * Revision 1.14 2000/05/05 21:10:46 mike
93 * add comment indicating availability of assembly language
94 * version of M_4_byte_multiply for Linux on x86 platforms.
95 *
96 * Revision 1.13 2000/04/20 19:30:45 mike
97 * minor optimization to 4 byte multiply
98 *
99 * Revision 1.12 2000/04/14 15:39:30 mike
100 * optimize the fast multiply function. don't re-curse down
101 * to a size of 1. recurse down to a size of '4' and then
102 * call a special 4 byte multiply function.
103 *
104 * Revision 1.11 2000/02/03 23:02:13 mike
105 * put in RCS for real...
106 *
107 * Revision 1.10 2000/02/03 22:59:08 mike
108 * remove the extra recursive function. not needed any
109 * longer since all current compilers should not have
110 * any problem with true recursive calls.
111 *
112 * Revision 1.9 2000/02/03 22:47:39 mike
113 * use MAPM_* generic memory function
114 *
115 * Revision 1.8 1999/09/19 21:13:44 mike
116 * eliminate unneeded local int in _split
117 *
118 * Revision 1.7 1999/08/12 22:36:23 mike
119 * move the 3 'simple' function to the top of file
120 * so GCC can in-line the code.
121 *
122 * Revision 1.6 1999/08/12 22:01:14 mike
123 * more minor optimizations
124 *
125 * Revision 1.5 1999/08/12 02:02:06 mike
126 * minor optimization
127 *
128 * Revision 1.4 1999/08/10 22:51:59 mike
129 * minor tweak
130 *
131 * Revision 1.3 1999/08/10 00:45:47 mike
132 * added more comments and a few minor tweaks
133 *
134 * Revision 1.2 1999/08/09 02:50:02 mike
135 * add some comments
136 *
137 * Revision 1.1 1999/08/08 18:27:57 mike
138 * Initial revision
139 */
140
141 #include "m_apm_lc.h"
142
143 static int M_firsttimef = TRUE;
144
145 /*
146 * specify the max size the FFT routine can handle
147 * (in MAPM, #digits = 2 * #bytes)
148 *
149 * this number *must* be an exact power of 2.
150 *
151 * **WORST** case input numbers (all 9's) has shown that
152 * the FFT math will overflow if the #define here is
153 * >= 1048576. On my system, 524,288 worked OK. I will
154 * factor down another factor of 2 to safeguard against
155 * other computers have less precise floating point math.
156 * If you are confident in your system, 524288 will
157 * theoretically work fine.
158 *
159 * the define here allows the FFT algorithm to multiply two
160 * 524,288 digit numbers yielding a 1,048,576 digit result.
161 */
162
163 #define MAX_FFT_BYTES 262144
164
165 /*
166 * the Divide-and-Conquer multiplication kicks in when the size of
167 * the numbers exceed the capability of the FFT (#define just above).
168 *
169 * #bytes D&C call depth
170 * ------ --------------
171 * 512K 1
172 * 1M 2
173 * 2M 3
174 * 4M 4
175 * ... ...
176 * 2.1990E+12 23
177 *
178 * the following stack sizes are sized to meet the
179 * above 2.199E+12 example, though I wouldn't want to
180 * wait for it to finish...
181 *
182 * Each call requires 7 stack variables to be saved so
183 * we need a stack depth of 23 * 7 + PAD. (we use 164)
184 *
185 * For 'exp_stack', 3 integers also are required to be saved
186 * for each recursive call so we need a stack depth of
187 * 23 * 3 + PAD. (we use 72)
188 *
189 *
190 * If the FFT multiply is disabled, resize the arrays
191 * as follows:
192 *
193 * the following stack sizes are sized to meet the
194 * worst case expected assuming we are multiplying
195 * numbers with 2.14E+9 (2 ^ 31) digits.
196 *
197 * For sizeof(int) == 4 (32 bits) there may be up to 32 recursive
198 * calls. Each call requires 7 stack variables so we need a
199 * stack depth of 32 * 7 + PAD. (we use 240)
200 *
201 * For 'exp_stack', 3 integers also are required to be saved
202 * for each recursive call so we need a stack depth of
203 * 32 * 3 + PAD. (we use 100)
204 */
205
206 #ifdef NO_FFT_MULTIPLY
207 #define M_STACK_SIZE 240
208 #define M_ISTACK_SIZE 100
209 #else
210 #define M_STACK_SIZE 164
211 #define M_ISTACK_SIZE 72
212 #endif
213
214 static int exp_stack[M_ISTACK_SIZE];
215 static int exp_stack_ptr;
216
217 static UCHAR *mul_stack_data[M_STACK_SIZE];
218 static int mul_stack_data_size[M_STACK_SIZE];
219 static int M_mul_stack_ptr;
220
221 static UCHAR *fmul_a1, *fmul_a0, *fmul_a9, *fmul_b1, *fmul_b0,
222 *fmul_b9, *fmul_t0;
223
224 static int size_flag, bit_limit, stmp, itmp, mii;
225
226 static M_APM M_ain;
227 static M_APM M_bin;
228
229 static char *M_stack_ptr_error_msg = "\'M_get_stack_ptr\', Out of memory";
230
231 extern void M_fast_multiply(M_APM, M_APM, M_APM);
232 extern void M_fmul_div_conq(UCHAR *, UCHAR *, UCHAR *, int);
233 extern void M_fmul_add(UCHAR *, UCHAR *, int, int);
234 extern int M_fmul_subtract(UCHAR *, UCHAR *, UCHAR *, int);
235 extern void M_fmul_split(UCHAR *, UCHAR *, UCHAR *, int);
236 extern int M_next_power_of_2(int);
237 extern int M_get_stack_ptr(int);
238 extern void M_push_mul_int(int);
239 extern int M_pop_mul_int(void);
240
241 #ifdef NO_FFT_MULTIPLY
242 extern void M_4_byte_multiply(UCHAR *, UCHAR *, UCHAR *);
243 #else
244 extern void M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
245 #endif
246
247 /*
248 * the following algorithm is used in this fast multiply routine
249 * (sometimes called the divide-and-conquer technique.)
250 *
251 * assume we have 2 numbers (a & b) with 2N digits.
252 *
253 * let : a = (2^N) * A1 + A0 , b = (2^N) * B1 + B0
254 *
255 * where 'A1' is the 'most significant half' of 'a' and
256 * 'A0' is the 'least significant half' of 'a'. Same for
257 * B1 and B0.
258 *
259 * Now use the identity :
260 *
261 * 2N N N N
262 * ab = (2 + 2 ) A1B1 + 2 (A1-A0)(B0-B1) + (2 + 1)A0B0
263 *
264 *
265 * The original problem of multiplying 2 (2N) digit numbers has
266 * been reduced to 3 multiplications of N digit numbers plus some
267 * additions, subtractions, and shifts.
268 *
269 * The fast multiplication algorithm used here uses the above
270 * identity in a recursive process. This algorithm results in
271 * O(n ^ 1.585) growth.
272 */
273
274
275 /****************************************************************************/
276 void M_free_all_fmul()
277 {
278 int k;
279
280 if (M_firsttimef == FALSE)
281 {
282 m_apm_free(M_ain);
283 m_apm_free(M_bin);
284
285 for (k=0; k < M_STACK_SIZE; k++)
286 {
287 if (mul_stack_data_size[k] != 0)
288 {
289 MAPM_FREE(mul_stack_data[k]);
290 }
291 }
292
293 M_firsttimef = TRUE;
294 }
295 }
296 /****************************************************************************/
297 void M_push_mul_int(int val)
298 {
299 exp_stack[++exp_stack_ptr] = val;
300 }
301 /****************************************************************************/
302 int M_pop_mul_int()
303 {
304 return(exp_stack[exp_stack_ptr--]);
305 }
306 /****************************************************************************/
307 void M_fmul_split(UCHAR *x1, UCHAR *x0, UCHAR *xin, int nbytes)
308 {
309 memcpy(x1, xin, nbytes);
310 memcpy(x0, (xin + nbytes), nbytes);
311 }
312 /****************************************************************************/
313 void M_fast_multiply(M_APM rr, M_APM aa, M_APM bb)
314 {
315 void *vp;
316 int ii, k, nexp, sign;
317
318 if (M_firsttimef)
319 {
320 M_firsttimef = FALSE;
321
322 for (k=0; k < M_STACK_SIZE; k++)
323 mul_stack_data_size[k] = 0;
324
325 size_flag = M_get_sizeof_int();
326 bit_limit = 8 * size_flag + 1;
327
328 M_ain = m_apm_init();
329 M_bin = m_apm_init();
330 }
331
332 exp_stack_ptr = -1;
333 M_mul_stack_ptr = -1;
334
335 m_apm_copy(M_ain, aa);
336 m_apm_copy(M_bin, bb);
337
338 sign = M_ain->m_apm_sign * M_bin->m_apm_sign;
339 nexp = M_ain->m_apm_exponent + M_bin->m_apm_exponent;
340
341 if (M_ain->m_apm_datalength >= M_bin->m_apm_datalength)
342 ii = M_ain->m_apm_datalength;
343 else
344 ii = M_bin->m_apm_datalength;
345
346 ii = (ii + 1) >> 1;
347 ii = M_next_power_of_2(ii);
348
349 /* Note: 'ii' must be >= 4 here. this is guaranteed
350 by the caller: m_apm_multiply
351 */
352
353 k = 2 * ii; /* required size of result, in bytes */
354
355 M_apm_pad(M_ain, k); /* fill out the data so the number of */
356 M_apm_pad(M_bin, k); /* bytes is an exact power of 2 */
357
358 if (k > rr->m_apm_malloclength)
359 {
360 if ((vp = MAPM_REALLOC(rr->m_apm_data, (k + 32))) == NULL)
361 {
362 /* fatal, this does not return */
363
364 M_apm_log_error_msg(M_APM_FATAL, "\'M_fast_multiply\', Out of memory");
365 }
366
367 rr->m_apm_malloclength = k + 28;
368 rr->m_apm_data = (UCHAR *)vp;
369 }
370
371 #ifdef NO_FFT_MULTIPLY
372
373 M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data,
374 M_bin->m_apm_data, ii);
375 #else
376
377 /*
378 * if the numbers are *really* big, use the divide-and-conquer
379 * routine first until the numbers are small enough to be handled
380 * by the FFT algorithm. If the numbers are already small enough,
381 * call the FFT multiplication now.
382 *
383 * Note that 'ii' here is (and must be) an exact power of 2.
384 */
385
386 if (size_flag == 2) /* if still using 16 bit compilers .... */
387 {
388 M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data,
389 M_bin->m_apm_data, ii);
390 }
391 else /* >= 32 bit compilers */
392 {
393 if (ii > (MAX_FFT_BYTES + 2))
394 {
395 M_fmul_div_conq(rr->m_apm_data, M_ain->m_apm_data,
396 M_bin->m_apm_data, ii);
397 }
398 else
399 {
400 M_fast_mul_fft(rr->m_apm_data, M_ain->m_apm_data,
401 M_bin->m_apm_data, ii);
402 }
403 }
404
405 #endif
406
407 rr->m_apm_sign = sign;
408 rr->m_apm_exponent = nexp;
409 rr->m_apm_datalength = 4 * ii;
410
411 M_apm_normalize(rr);
412 }
413 /****************************************************************************/
414 /*
415 * This is the recursive function to perform the multiply. The
416 * design intent here is to have no local variables. Any local
417 * data that needs to be saved is saved on one of the two stacks.
418 */
419 void M_fmul_div_conq(UCHAR *rr, UCHAR *aa, UCHAR *bb, int sz)
420 {
421
422 #ifdef NO_FFT_MULTIPLY
423
424 if (sz == 4) /* multiply 4x4 yielding an 8 byte result */
425 {
426 M_4_byte_multiply(rr, aa, bb);
427 return;
428 }
429
430 #else
431
432 /*
433 * if the numbers are now small enough, let the FFT algorithm
434 * finish up.
435 */
436
437 if (sz == MAX_FFT_BYTES)
438 {
439 M_fast_mul_fft(rr, aa, bb, sz);
440 return;
441 }
442
443 #endif
444
445 memset(rr, 0, (2 * sz)); /* zero out the result */
446 mii = sz >> 1;
447
448 itmp = M_get_stack_ptr(mii);
449 M_push_mul_int(itmp);
450
451 fmul_a1 = mul_stack_data[itmp];
452
453 itmp = M_get_stack_ptr(mii);
454 fmul_a0 = mul_stack_data[itmp];
455
456 itmp = M_get_stack_ptr(2 * sz);
457 fmul_a9 = mul_stack_data[itmp];
458
459 itmp = M_get_stack_ptr(mii);
460 fmul_b1 = mul_stack_data[itmp];
461
462 itmp = M_get_stack_ptr(mii);
463 fmul_b0 = mul_stack_data[itmp];
464
465 itmp = M_get_stack_ptr(2 * sz);
466 fmul_b9 = mul_stack_data[itmp];
467
468 itmp = M_get_stack_ptr(2 * sz);
469 fmul_t0 = mul_stack_data[itmp];
470
471 M_fmul_split(fmul_a1, fmul_a0, aa, mii);
472 M_fmul_split(fmul_b1, fmul_b0, bb, mii);
473
474 stmp = M_fmul_subtract(fmul_a9, fmul_a1, fmul_a0, mii);
475 stmp *= M_fmul_subtract(fmul_b9, fmul_b0, fmul_b1, mii);
476
477 M_push_mul_int(stmp);
478 M_push_mul_int(mii);
479
480 M_fmul_div_conq(fmul_t0, fmul_a0, fmul_b0, mii);
481
482 mii = M_pop_mul_int();
483 stmp = M_pop_mul_int();
484 itmp = M_pop_mul_int();
485
486 M_push_mul_int(itmp);
487 M_push_mul_int(stmp);
488 M_push_mul_int(mii);
489
490 /* to restore all stack variables ...
491 fmul_a1 = mul_stack_data[itmp];
492 fmul_a0 = mul_stack_data[itmp+1];
493 fmul_a9 = mul_stack_data[itmp+2];
494 fmul_b1 = mul_stack_data[itmp+3];
495 fmul_b0 = mul_stack_data[itmp+4];
496 fmul_b9 = mul_stack_data[itmp+5];
497 fmul_t0 = mul_stack_data[itmp+6];
498 */
499
500 fmul_a1 = mul_stack_data[itmp];
501 fmul_b1 = mul_stack_data[itmp+3];
502 fmul_t0 = mul_stack_data[itmp+6];
503
504 memcpy((rr + sz), fmul_t0, sz); /* first 'add', result is now zero */
505 /* so we just copy in the bytes */
506 M_fmul_add(rr, fmul_t0, mii, sz);
507
508 M_fmul_div_conq(fmul_t0, fmul_a1, fmul_b1, mii);
509
510 mii = M_pop_mul_int();
511 stmp = M_pop_mul_int();
512 itmp = M_pop_mul_int();
513
514 M_push_mul_int(itmp);
515 M_push_mul_int(stmp);
516 M_push_mul_int(mii);
517
518 fmul_a9 = mul_stack_data[itmp+2];
519 fmul_b9 = mul_stack_data[itmp+5];
520 fmul_t0 = mul_stack_data[itmp+6];
521
522 M_fmul_add(rr, fmul_t0, 0, sz);
523 M_fmul_add(rr, fmul_t0, mii, sz);
524
525 if (stmp != 0)
526 M_fmul_div_conq(fmul_t0, fmul_a9, fmul_b9, mii);
527
528 mii = M_pop_mul_int();
529 stmp = M_pop_mul_int();
530 itmp = M_pop_mul_int();
531
532 fmul_t0 = mul_stack_data[itmp+6];
533
534 /*
535 * if the sign of (A1 - A0)(B0 - B1) is positive, ADD to
536 * the result. if it is negative, SUBTRACT from the result.
537 */
538
539 if (stmp < 0)
540 {
541 fmul_a9 = mul_stack_data[itmp+2];
542 fmul_b9 = mul_stack_data[itmp+5];
543
544 memset(fmul_b9, 0, (2 * sz));
545 memcpy((fmul_b9 + mii), fmul_t0, sz);
546 M_fmul_subtract(fmul_a9, rr, fmul_b9, (2 * sz));
547 memcpy(rr, fmul_a9, (2 * sz));
548 }
549
550 if (stmp > 0)
551 M_fmul_add(rr, fmul_t0, mii, sz);
552
553 M_mul_stack_ptr -= 7;
554 }
555 /****************************************************************************/
556 /*
557 * special addition function for use with the fast multiply operation
558 */
559 void M_fmul_add(UCHAR *r, UCHAR *a, int offset, int sz)
560 {
561 int i, j;
562 UCHAR carry;
563
564 carry = 0;
565 j = offset + sz;
566 i = sz;
567
568 while (TRUE)
569 {
570 r[--j] += carry + a[--i];
571
572 if (r[j] >= 100)
573 {
574 r[j] -= 100;
575 carry = 1;
576 }
577 else
578 carry = 0;
579
580 if (i == 0)
581 break;
582 }
583
584 if (carry)
585 {
586 while (TRUE)
587 {
588 r[--j] += 1;
589
590 if (r[j] < 100)
591 break;
592
593 r[j] -= 100;
594 }
595 }
596 }
597 /****************************************************************************/
598 /*
599 * special subtraction function for use with the fast multiply operation
600 */
601 int M_fmul_subtract(UCHAR *r, UCHAR *a, UCHAR *b, int sz)
602 {
603 int k, jtmp, sflag, nb, borrow;
604
605 nb = sz;
606 sflag = 0; /* sign flag: assume the numbers are equal */
607
608 /*
609 * find if a > b (so we perform a-b)
610 * or a < b (so we perform b-a)
611 */
612
613 for (k=0; k < nb; k++)
614 {
615 if (a[k] < b[k])
616 {
617 sflag = -1;
618 break;
619 }
620
621 if (a[k] > b[k])
622 {
623 sflag = 1;
624 break;
625 }
626 }
627
628 if (sflag == 0)
629 {
630 memset(r, 0, nb); /* zero out the result */
631 }
632 else
633 {
634 k = nb;
635 borrow = 0;
636
637 while (TRUE)
638 {
639 k--;
640
641 if (sflag == 1)
642 jtmp = (int)a[k] - ((int)b[k] + borrow);
643 else
644 jtmp = (int)b[k] - ((int)a[k] + borrow);
645
646 if (jtmp >= 0)
647 {
648 r[k] = (UCHAR)jtmp;
649 borrow = 0;
650 }
651 else
652 {
653 r[k] = (UCHAR)(100 + jtmp);
654 borrow = 1;
655 }
656
657 if (k == 0)
658 break;
659 }
660 }
661
662 return(sflag);
663 }
664 /****************************************************************************/
665 int M_next_power_of_2(int n)
666 {
667 int ct, k;
668
669 if (n <= 2)
670 return(n);
671
672 k = 2;
673 ct = 0;
674
675 while (TRUE)
676 {
677 if (k >= n)
678 break;
679
680 k = k << 1;
681
682 if (++ct == bit_limit)
683 {
684 /* fatal, this does not return */
685
686 M_apm_log_error_msg(M_APM_FATAL,
687 "\'M_next_power_of_2\', ERROR :sizeof(int) too small ??");
688 }
689 }
690
691 return(k);
692 }
693 /****************************************************************************/
694 int M_get_stack_ptr(int sz)
695 {
696 int i, k;
697 UCHAR *cp;
698
699 k = ++M_mul_stack_ptr;
700
701 /* if size is 0, just need to malloc and return */
702 if (mul_stack_data_size[k] == 0)
703 {
704 if ((i = sz) < 16)
705 i = 16;
706
707 if ((cp = (UCHAR *)MAPM_MALLOC(i + 4)) == NULL)
708 {
709 /* fatal, this does not return */
710
711 M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
712 }
713
714 mul_stack_data[k] = cp;
715 mul_stack_data_size[k] = i;
716 }
717 else /* it has been malloc'ed, see if it's big enough */
718 {
719 if (sz > mul_stack_data_size[k])
720 {
721 cp = mul_stack_data[k];
722
723 if ((cp = (UCHAR *)MAPM_REALLOC(cp, (sz + 4))) == NULL)
724 {
725 /* fatal, this does not return */
726
727 M_apm_log_error_msg(M_APM_FATAL, M_stack_ptr_error_msg);
728 }
729
730 mul_stack_data[k] = cp;
731 mul_stack_data_size[k] = sz;
732 }
733 }
734
735 return(k);
736 }
737 /****************************************************************************/
738
739 #ifdef NO_FFT_MULTIPLY
740
741 /*
742 * multiply a 4 byte number by a 4 byte number
743 * yielding an 8 byte result. each byte contains
744 * a base 100 'digit', i.e.: range from 0-99.
745 *
746 * MSB LSB
747 *
748 * a,b [0] [1] [2] [3]
749 * result [0] ..... [7]
750 */
751
752 void M_4_byte_multiply(UCHAR *r, UCHAR *a, UCHAR *b)
753 {
754 int jj;
755 unsigned int *ip, t1, rr[8];
756
757 memset(rr, 0, (8 * sizeof(int))); /* zero out result */
758 jj = 3;
759 ip = rr + 5;
760
761 /*
762 * loop for one number [b], un-roll the inner 'loop' [a]
763 *
764 * accumulate partial sums in UINT array, release carries
765 * and convert back to base 100 at the end
766 */
767
768 while (1)
769 {
770 t1 = (unsigned int)b[jj];
771 ip += 2;
772
773 *ip-- += t1 * a[3];
774 *ip-- += t1 * a[2];
775 *ip-- += t1 * a[1];
776 *ip += t1 * a[0];
777
778 if (jj-- == 0)
779 break;
780 }
781
782 jj = 7;
783
784 while (1)
785 {
786 t1 = rr[jj] / 100;
787 r[jj] = (UCHAR)(rr[jj] - 100 * t1);
788
789 if (jj == 0)
790 break;
791
792 rr[--jj] += t1;
793 }
794 }
795
796 #endif
797
798 /****************************************************************************/