w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

sqrtrem.c
Go to the documentation of this file.
1 /* mpn_sqrtrem -- square root and remainder
2 
3  Contributed to the GNU project by Paul Zimmermann (most code),
4  Torbjorn Granlund (mpn_sqrtrem1) and Marco Bodrato (mpn_dc_sqrt).
5 
6  THE FUNCTIONS IN THIS FILE EXCEPT mpn_sqrtrem ARE INTERNAL WITH MUTABLE
7  INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
8  IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A
9  FUTURE GMP RELEASE.
10 
11 Copyright 1999-2002, 2004, 2005, 2008, 2010, 2012, 2015, 2017 Free Software
12 Foundation, Inc.
13 
14 This file is part of the GNU MP Library.
15 
16 The GNU MP Library is free software; you can redistribute it and/or modify
17 it under the terms of either:
18 
19  * the GNU Lesser General Public License as published by the Free
20  Software Foundation; either version 3 of the License, or (at your
21  option) any later version.
22 
23 or
24 
25  * the GNU General Public License as published by the Free Software
26  Foundation; either version 2 of the License, or (at your option) any
27  later version.
28 
29 or both in parallel, as here.
30 
31 The GNU MP Library is distributed in the hope that it will be useful, but
32 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
33 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
34 for more details.
35 
36 You should have received copies of the GNU General Public License and the
37 GNU Lesser General Public License along with the GNU MP Library. If not,
38 see https://www.gnu.org/licenses/. */
39 
40 
41 /* See "Karatsuba Square Root", reference in gmp.texi. */
42 
43 
44 #include <stdio.h>
45 #include <stdlib.h>
46 
47 #include "gmp-impl.h"
48 #include "longlong.h"
49 #define USE_DIVAPPR_Q 1
50 #define TRACE(x)
51 
52 static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
53 {
54  0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
55  0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
56  0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
57  0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
58  0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
59  0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
60  0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
61  0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
62  0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
63  0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
64  0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
65  0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
66  0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
67  0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
68  0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
69  0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
70  0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
71  0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
72  0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
73  0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
74  0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
75  0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
76  0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
77  0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
78  0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
79  0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
80  0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
81  0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
82  0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
83  0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
84  0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
85  0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
86  0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
87  0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
88  0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
89  0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
90  0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
91  0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
92  0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
93  0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
94  0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
95  0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
96  0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
97  0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
98  0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
99  0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
100  0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
101  0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00 /* sqrt(1/1f8)..sqrt(1/1ff) */
102 };
103 
104 /* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2. */
105 
106 #if GMP_NUMB_BITS > 32
107 #define MAGIC CNST_LIMB(0x10000000000) /* 0xffe7debbfc < MAGIC < 0x232b1850f410 */
108 #else
109 #define MAGIC CNST_LIMB(0x100000) /* 0xfee6f < MAGIC < 0x29cbc8 */
110 #endif
111 
112 static mp_limb_t
114 {
115 #if GMP_NUMB_BITS > 32
116  mp_limb_t a1;
117 #endif
118  mp_limb_t x0, t2, t, x2;
119  unsigned abits;
120 
122  ASSERT_ALWAYS (GMP_LIMB_BITS == 32 || GMP_LIMB_BITS == 64);
123  ASSERT (a0 >= GMP_NUMB_HIGHBIT / 2);
124 
125  /* Use Newton iterations for approximating 1/sqrt(a) instead of sqrt(a),
126  since we can do the former without division. As part of the last
127  iteration convert from 1/sqrt(a) to sqrt(a). */
128 
129  abits = a0 >> (GMP_LIMB_BITS - 1 - 8); /* extract bits for table lookup */
130  x0 = 0x100 | invsqrttab[abits - 0x80]; /* initial 1/sqrt(a) */
131 
132  /* x0 is now an 8 bits approximation of 1/sqrt(a0) */
133 
134 #if GMP_NUMB_BITS > 32
135  a1 = a0 >> (GMP_LIMB_BITS - 1 - 32);
136  t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000 - a1 * x0 * x0) >> 16;
137  x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
138 
139  /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
140 
141  t2 = x0 * (a0 >> (32-8));
142  t = t2 >> 25;
143  t = ((mp_limb_signed_t) ((a0 << 14) - t * t - MAGIC) >> (32-8));
144  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 15);
145  x0 >>= 32;
146 #else
147  t2 = x0 * (a0 >> (16-8));
148  t = t2 >> 13;
149  t = ((mp_limb_signed_t) ((a0 << 6) - t * t - MAGIC) >> (16-8));
150  x0 = t2 + ((mp_limb_signed_t) (x0 * t) >> 7);
151  x0 >>= 16;
152 #endif
153 
154  /* x0 is now a full limb approximation of sqrt(a0) */
155 
156  x2 = x0 * x0;
157  if (x2 + 2*x0 <= a0 - 1)
158  {
159  x2 += 2*x0 + 1;
160  x0++;
161  }
162 
163  *rp = a0 - x2;
164  return x0;
165 }
166 
167 
168 #define Prec (GMP_NUMB_BITS >> 1)
169 #if ! defined(SQRTREM2_INPLACE)
170 #define SQRTREM2_INPLACE 0
171 #endif
172 
173 /* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
174  return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
175 #if SQRTREM2_INPLACE
176 #define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp)
177 static mp_limb_t
179 {
180  mp_srcptr np = rp;
181 #else
182 #define CALL_SQRTREM2_INPLACE(sp,rp) mpn_sqrtrem2 (sp, rp, rp)
183 static mp_limb_t
185 {
186 #endif
187  mp_limb_t q, u, np0, sp0, rp0, q2;
188  int cc;
189 
190  ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
191 
192  np0 = np[0];
193  sp0 = mpn_sqrtrem1 (rp, np[1]);
194  rp0 = rp[0];
195  /* rp0 <= 2*sp0 < 2^(Prec + 1) */
196  rp0 = (rp0 << (Prec - 1)) + (np0 >> (Prec + 1));
197  q = rp0 / sp0;
198  /* q <= 2^Prec, if q = 2^Prec, reduce the overestimate. */
199  q -= q >> Prec;
200  /* now we have q < 2^Prec */
201  u = rp0 - q * sp0;
202  /* now we have (rp[0]<<Prec + np0>>Prec)/2 = q * sp0 + u */
203  sp0 = (sp0 << Prec) | q;
204  cc = u >> (Prec - 1);
205  rp0 = ((u << (Prec + 1)) & GMP_NUMB_MASK) + (np0 & ((CNST_LIMB (1) << (Prec + 1)) - 1));
206  /* subtract q * q from rp */
207  q2 = q * q;
208  cc -= rp0 < q2;
209  rp0 -= q2;
210  if (cc < 0)
211  {
212  rp0 += sp0;
213  cc += rp0 < sp0;
214  --sp0;
215  rp0 += sp0;
216  cc += rp0 < sp0;
217  }
218 
219  rp[0] = rp0;
220  sp[0] = sp0;
221  return cc;
222 }
223 
224 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
225  and in {np, n} the low n limbs of the remainder, returns the high
226  limb of the remainder (which is 0 or 1).
227  Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
228  where B=2^GMP_NUMB_BITS.
229  Needs a scratch of n/2+1 limbs. */
230 static mp_limb_t
232 {
233  mp_limb_t q; /* carry out of {sp, n} */
234  int c, b; /* carry out of remainder */
235  mp_size_t l, h;
236 
237  ASSERT (n > 1);
238  ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
239 
240  l = n / 2;
241  h = n - l;
242  if (h == 1)
243  q = CALL_SQRTREM2_INPLACE (sp + l, np + 2 * l);
244  else
245  q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h, 0, scratch);
246  if (q != 0)
247  ASSERT_CARRY (mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h));
248  TRACE(printf("tdiv_qr(,,,,%u,,%u) -> %u\n", (unsigned) n, (unsigned) h, (unsigned) (n - h + 1)));
249  mpn_tdiv_qr (scratch, np + l, 0, np + l, n, sp + l, h);
250  q += scratch[l];
251  c = scratch[0] & 1;
252  mpn_rshift (sp, scratch, l, 1);
253  sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
254  if (UNLIKELY ((sp[0] & approx) != 0)) /* (sp[0] & mask) > 1 */
255  return 1; /* Remainder is non-zero */
256  q >>= 1;
257  if (c != 0)
258  c = mpn_add_n (np + l, np + l, sp + l, h);
259  TRACE(printf("sqr(,,%u)\n", (unsigned) l));
260  mpn_sqr (np + n, sp, l);
261  b = q + mpn_sub_n (np, np, np + n, 2 * l);
262  c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
263 
264  if (c < 0)
265  {
266  q = mpn_add_1 (sp + l, sp + l, h, q);
267 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
268  c += mpn_addlsh1_n_ip1 (np, sp, n) + 2 * q;
269 #else
270  c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
271 #endif
272  c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
273  q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
274  }
275 
276  return c;
277 }
278 
279 #if USE_DIVAPPR_Q
280 static void
282 {
283  gmp_pi1_t inv;
284  mp_limb_t qh;
285  ASSERT (dn > 2);
286  ASSERT (nn >= dn);
287  ASSERT ((dp[dn-1] & GMP_NUMB_HIGHBIT) != 0);
288 
289  MPN_COPY (scratch, np, nn);
290  invert_pi1 (inv, dp[dn-1], dp[dn-2]);
292  qh = mpn_sbpi1_divappr_q (qp, scratch, nn, dp, dn, inv.inv32);
294  qh = mpn_dcpi1_divappr_q (qp, scratch, nn, dp, dn, &inv);
295  else
296  {
297  mp_size_t itch = mpn_mu_divappr_q_itch (nn, dn, 0);
298  TMP_DECL;
299  TMP_MARK;
300  /* Sadly, scratch is too small. */
301  qh = mpn_mu_divappr_q (qp, np, nn, dp, dn, TMP_ALLOC_LIMBS (itch));
302  TMP_FREE;
303  }
304  qp [nn - dn] = qh;
305 }
306 #endif
307 
308 /* writes in {sp, n} the square root (rounded towards zero) of {np, 2n-odd},
309  returns zero if the operand was a perfect square, one otherwise.
310  Assumes {np, 2n-odd}*4^nsh is normalized, i.e. B > np[2n-1-odd]*4^nsh >= B/4
311  where B=2^GMP_NUMB_BITS.
312  THINK: In the odd case, three more (dummy) limbs are taken into account,
313  when nsh is maximal, two limbs are discarded from the result of the
314  division. Too much? Is a single dummy limb enough? */
315 static int
316 mpn_dc_sqrt (mp_ptr sp, mp_srcptr np, mp_size_t n, unsigned nsh, unsigned odd)
317 {
318  mp_limb_t q; /* carry out of {sp, n} */
319  int c; /* carry out of remainder */
320  mp_size_t l, h;
321  mp_ptr qp, tp, scratch;
322  TMP_DECL;
323  TMP_MARK;
324 
325  ASSERT (np[2 * n - 1 - odd] != 0);
326  ASSERT (n > 4);
327  ASSERT (nsh < GMP_NUMB_BITS / 2);
328 
329  l = (n - 1) / 2;
330  h = n - l;
331  ASSERT (n >= l + 2 && l + 2 >= h && h > l && l >= 1 + odd);
332  scratch = TMP_ALLOC_LIMBS (l + 2 * n + 5 - USE_DIVAPPR_Q); /* n + 2-USE_DIVAPPR_Q */
333  tp = scratch + n + 2 - USE_DIVAPPR_Q; /* n + h + 1, but tp [-1] is writable */
334  if (nsh != 0)
335  {
336  /* o is used to exactly set the lowest bits of the dividend, is it needed? */
337  int o = l > (1 + odd);
338  ASSERT_NOCARRY (mpn_lshift (tp - o, np + l - 1 - o - odd, n + h + 1 + o, 2 * nsh));
339  }
340  else
341  MPN_COPY (tp, np + l - 1 - odd, n + h + 1);
342  q = mpn_dc_sqrtrem (sp + l, tp + l + 1, h, 0, scratch);
343  if (q != 0)
344  ASSERT_CARRY (mpn_sub_n (tp + l + 1, tp + l + 1, sp + l, h));
345  qp = tp + n + 1; /* l + 2 */
346  TRACE(printf("div(appr)_q(,,%u,,%u) -> %u \n", (unsigned) n+1, (unsigned) h, (unsigned) (n + 1 - h + 1)));
347 #if USE_DIVAPPR_Q
348  mpn_divappr_q (qp, tp, n + 1, sp + l, h, scratch);
349 #else
350  mpn_div_q (qp, tp, n + 1, sp + l, h, scratch);
351 #endif
352  q += qp [l + 1];
353  c = 1;
354  if (q > 1)
355  {
356  /* FIXME: if s!=0 we will shift later, a noop on this area. */
358  }
359  else
360  {
361  /* FIXME: if s!=0 we will shift again later, shift just once. */
362  mpn_rshift (sp, qp + 1, l, 1);
363  sp[l - 1] |= q << (GMP_NUMB_BITS - 1);
364  if (((qp[0] >> (2 + USE_DIVAPPR_Q)) | /* < 3 + 4*USE_DIVAPPR_Q */
365  (qp[1] & (GMP_NUMB_MASK >> ((GMP_NUMB_BITS >> odd)- nsh - 1)))) == 0)
366  {
367  mp_limb_t cy;
368  /* Approximation is not good enough, the extra limb(+ nsh bits)
369  is smaller than needed to absorb the possible error. */
370  /* {qp + 1, l + 1} equals 2*{sp, l} */
371  /* FIXME: use mullo or wrap-around, or directly evaluate
372  remainder with a single sqrmod_bnm1. */
373  TRACE(printf("mul(,,%u,,%u)\n", (unsigned) h, (unsigned) (l+1)));
374  ASSERT_NOCARRY (mpn_mul (scratch, sp + l, h, qp + 1, l + 1));
375  /* Compute the remainder of the previous mpn_div(appr)_q. */
376  cy = mpn_sub_n (tp + 1, tp + 1, scratch, h);
377 #if USE_DIVAPPR_Q || WANT_ASSERT
378  MPN_DECR_U (tp + 1 + h, l, cy);
379 #if USE_DIVAPPR_Q
380  ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) <= 0);
381  if (mpn_cmp (tp + 1 + h, scratch + h, l) < 0)
382  {
383  /* May happen only if div result was not exact. */
384 #if HAVE_NATIVE_mpn_addlsh1_n_ip1 || HAVE_NATIVE_mpn_addlsh1_n
385  cy = mpn_addlsh1_n_ip1 (tp + 1, sp + l, h);
386 #else
387  cy = mpn_addmul_1 (tp + 1, sp + l, h, CNST_LIMB(2));
388 #endif
389  ASSERT_NOCARRY (mpn_add_1 (tp + 1 + h, tp + 1 + h, l, cy));
390  MPN_DECR_U (sp, l, 1);
391  }
392  /* Can the root be exact when a correction was needed? We
393  did not find an example, but it depends on divappr
394  internals, and we can not assume it true in general...*/
395  /* else */
396 #else /* WANT_ASSERT */
397  ASSERT (mpn_cmp (tp + 1 + h, scratch + h, l) == 0);
398 #endif
399 #endif
400  if (mpn_zero_p (tp + l + 1, h - l))
401  {
402  TRACE(printf("sqr(,,%u)\n", (unsigned) l));
403  mpn_sqr (scratch, sp, l);
404  c = mpn_cmp (tp + 1, scratch + l, l);
405  if (c == 0)
406  {
407  if (nsh != 0)
408  {
409  mpn_lshift (tp, np, l, 2 * nsh);
410  np = tp;
411  }
412  c = mpn_cmp (np, scratch + odd, l - odd);
413  }
414  if (c < 0)
415  {
416  MPN_DECR_U (sp, l, 1);
417  c = 1;
418  }
419  }
420  }
421  }
422  TMP_FREE;
423 
424  if ((odd | nsh) != 0)
425  mpn_rshift (sp, sp, n, nsh + (odd ? GMP_NUMB_BITS / 2 : 0));
426  return c;
427 }
428 
429 
430 mp_size_t
432 {
433  mp_limb_t cc, high, rl;
434  int c;
435  mp_size_t rn, tn;
436  TMP_DECL;
437 
438  ASSERT (nn > 0);
439  ASSERT_MPN (np, nn);
440 
441  ASSERT (np[nn - 1] != 0);
443  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
444  ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
445 
446  high = np[nn - 1];
447  if (high & (GMP_NUMB_HIGHBIT | (GMP_NUMB_HIGHBIT / 2)))
448  c = 0;
449  else
450  {
452  c -= GMP_NAIL_BITS;
453 
454  c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
455  }
456  if (nn == 1) {
457  if (c == 0)
458  {
459  sp[0] = mpn_sqrtrem1 (&rl, high);
460  if (rp != NULL)
461  rp[0] = rl;
462  }
463  else
464  {
465  cc = mpn_sqrtrem1 (&rl, high << (2*c)) >> c;
466  sp[0] = cc;
467  if (rp != NULL)
468  rp[0] = rl = high - cc*cc;
469  }
470  return rl != 0;
471  }
472  if (nn == 2) {
473  mp_limb_t tp [2];
474  if (rp == NULL) rp = tp;
475  if (c == 0)
476  {
477 #if SQRTREM2_INPLACE
478  rp[1] = high;
479  rp[0] = np[0];
480  cc = CALL_SQRTREM2_INPLACE (sp, rp);
481 #else
482  cc = mpn_sqrtrem2 (sp, rp, np);
483 #endif
484  rp[1] = cc;
485  return ((rp[0] | cc) != 0) + cc;
486  }
487  else
488  {
489  rl = np[0];
490  rp[1] = (high << (2*c)) | (rl >> (GMP_NUMB_BITS - 2*c));
491  rp[0] = rl << (2*c);
493  cc = sp[0] >>= c; /* c != 0, the highest bit of the root cc is 0. */
494  rp[0] = rl -= cc*cc; /* Computed modulo 2^GMP_LIMB_BITS, because it's smaller. */
495  return rl != 0;
496  }
497  }
498  tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
499 
500  if ((rp == NULL) && (nn > 8))
501  return mpn_dc_sqrt (sp, np, tn, c, nn & 1);
502  TMP_MARK;
503  if (((nn & 1) | c) != 0)
504  {
505  mp_limb_t s0[1], mask;
506  mp_ptr tp, scratch;
507  TMP_ALLOC_LIMBS_2 (tp, 2 * tn, scratch, tn / 2 + 1);
508  tp[0] = 0; /* needed only when 2*tn > nn, but saves a test */
509  if (c != 0)
510  mpn_lshift (tp + (nn & 1), np, nn, 2 * c);
511  else
512  MPN_COPY (tp + (nn & 1), np, nn);
513  c += (nn & 1) ? GMP_NUMB_BITS / 2 : 0; /* c now represents k */
514  mask = (CNST_LIMB (1) << c) - 1;
515  rl = mpn_dc_sqrtrem (sp, tp, tn, (rp == NULL) ? mask - 1 : 0, scratch);
516  /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
517  thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
518  s0[0] = sp[0] & mask; /* S mod 2^k */
519  rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2*s0*S */
520  cc = mpn_submul_1 (tp, s0, 1, s0[0]);
521  rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
522  mpn_rshift (sp, sp, tn, c);
523  tp[tn] = rl;
524  if (rp == NULL)
525  rp = tp;
526  c = c << 1;
527  if (c < GMP_NUMB_BITS)
528  tn++;
529  else
530  {
531  tp++;
532  c -= GMP_NUMB_BITS;
533  }
534  if (c != 0)
535  mpn_rshift (rp, tp, tn, c);
536  else
537  MPN_COPY_INCR (rp, tp, tn);
538  rn = tn;
539  }
540  else
541  {
542  if (rp != np)
543  {
544  if (rp == NULL) /* nn <= 8 */
545  rp = TMP_SALLOC_LIMBS (nn);
546  MPN_COPY (rp, np, nn);
547  }
548  rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn, 0, TMP_ALLOC_LIMBS(tn / 2 + 1)));
549  }
550 
551  MPN_NORMALIZE (rp, rn);
552 
553  TMP_FREE;
554  return rn;
555 }
rp
Definition: action.c:992
q
Definition: afm2pl.c:2287
#define x0
#define GMP_LIMB_BITS
Definition: asl.h:44
#define GMP_NUMB_MASK
Definition: asl.h:47
Definition: asl.h:63
#define n
Definition: t4ht.c:1290
#define b
Definition: jpegint.h:372
#define dn
Definition: devnag.c:324
#define q2
int h
Definition: dviconv.c:9
static int np
Definition: bifont.c:64
int printf()
#define tp
#define a0
#define a1
#define t
Definition: afcover.h:96
#define TMP_ALLOC_LIMBS(n)
Definition: gmp-impl.h:487
#define mpn_mu_divappr_q
Definition: gmp-impl.h:1554
#define TMP_MARK
Definition: gmp-impl.h:375
#define ASSERT_ALWAYS(expr)
Definition: gmp-impl.h:2435
#define MU_DIVAPPR_Q_THRESHOLD
Definition: gmp-impl.h:2245
#define ASSERT_NOCARRY(expr)
Definition: gmp-impl.h:2459
#define mpn_mu_divappr_q_itch
Definition: gmp-impl.h:1556
#define MPN_COPY(d, s, n)
Definition: gmp-impl.h:1849
#define invert_pi1(dinv, d1, d0)
Definition: gmp-impl.h:3064
#define MPN_DECR_U(ptr, size, n)
Definition: gmp-impl.h:2880
#define MPN_FILL(dst, n, f)
Definition: gmp-impl.h:1908
#define MPN_NORMALIZE(DST, NLIMBS)
Definition: gmp-impl.h:1939
#define MPN_OVERLAP_P(xp, xsize, yp, ysize)
Definition: gmp-impl.h:2387
#define mpn_addlsh1_n_ip1
Definition: gmp-impl.h:977
#define TMP_DECL
Definition: gmp-impl.h:373
#define TMP_ALLOC_LIMBS_2(xp, xsize, yp, ysize)
Definition: gmp-impl.h:499
#define ASSERT_CARRY(expr)
Definition: gmp-impl.h:2458
#define UNLIKELY(cond)
Definition: gmp-impl.h:532
#define DC_DIVAPPR_Q_THRESHOLD
Definition: gmp-impl.h:2213
#define TMP_FREE
Definition: gmp-impl.h:382
#define mpn_sbpi1_divappr_q
Definition: gmp-impl.h:1530
#define BELOW_THRESHOLD(size, thresh)
Definition: gmp-impl.h:1368
#define mpn_dcpi1_divappr_q
Definition: gmp-impl.h:1541
#define CNST_LIMB(C)
Definition: gmp-impl.h:3935
#define MPN_SAME_OR_SEPARATE_P(xp, yp, size)
Definition: gmp-impl.h:2395
#define ASSERT_MPN(ptr, size)
Definition: gmp-impl.h:2519
#define mpn_div_q
Definition: gmp-impl.h:1564
#define TMP_SALLOC_LIMBS(n)
Definition: gmp-impl.h:488
#define MPN_COPY_INCR(dst, src, n)
Definition: gmp-impl.h:1769
#define GMP_NUMB_HIGHBIT
Definition: gmp-impl.h:593
#define mpn_addmul_1
Definition: gmp.h:1476
#define mpn_add_n
Definition: gmp.h:1473
#define mpn_cmp
Definition: gmp.h:1479
#define GMP_NAIL_BITS
Definition: gmp.h:44
#define mpn_sqrtrem
Definition: gmp.h:1600
#define mpn_sub_n
Definition: gmp.h:1613
#define mpn_sqr
Definition: gmp.h:1552
#define mpn_zero_p
Definition: gmp.h:1484
long long int mp_limb_signed_t
Definition: gmp.h:139
#define mpn_submul_1
Definition: gmp.h:1616
#define mpn_add_1
Definition: gmp.h:1468
#define mpn_mul
Definition: gmp.h:1543
#define mpn_rshift
Definition: gmp.h:1585
#define GMP_NUMB_MAX
Definition: gmp.h:48
#define GMP_NUMB_BITS
Definition: gmp.h:46
#define mpn_sub_1
Definition: gmp.h:1608
long int mp_size_t
Definition: gmp.h:175
#define mpn_lshift
Definition: gmp.h:1537
#define c(n)
Definition: gpos-common.c:150
#define NULL
Definition: ftobjs.h:61
small capitals from c petite p scientific f u
Definition: afcover.h:88
int int cy
Definition: gdfx.h:13
#define ASSERT(e)
Definition: error.h:48
#define count_leading_zeros(count, x)
Definition: longlong.h:2202
int high
Definition: combiners.h:904
void mpn_tdiv_qr(mp_limb_t *, mp_limb_t *, mp_size_t, const mp_limb_t *, mp_size_t, const mp_limb_t *, mp_size_t)
#define CALL_SQRTREM2_INPLACE(sp, rp)
Definition: sqrtrem.c:182
#define TRACE(x)
Definition: sqrtrem.c:50
static mp_limb_t mpn_dc_sqrtrem(mp_ptr sp, mp_ptr np, mp_size_t n, mp_limb_t approx, mp_ptr scratch)
Definition: sqrtrem.c:231
#define MAGIC
Definition: sqrtrem.c:107
#define USE_DIVAPPR_Q
Definition: sqrtrem.c:49
static mp_limb_t mpn_sqrtrem2(mp_ptr sp, mp_ptr rp, mp_srcptr np)
Definition: sqrtrem.c:184
static const unsigned char invsqrttab[384]
Definition: sqrtrem.c:52
static int mpn_dc_sqrt(mp_ptr sp, mp_srcptr np, mp_size_t n, unsigned nsh, unsigned odd)
Definition: sqrtrem.c:316
#define Prec
Definition: sqrtrem.c:168
static mp_limb_t mpn_sqrtrem1(mp_ptr rp, mp_limb_t a0)
Definition: sqrtrem.c:113
static void mpn_divappr_q(mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_ptr scratch)
Definition: sqrtrem.c:281
int tn
Definition: fc-lang.py:225
#define odd(n)
Definition: pbmplus.h:227
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported abits
#define t2
integer nn[24]
Definition: pmxab.c:90
#define x2
#define mask(n)
Definition: lbitlib.c:93
RETTYPE mp_ptr mp_size_t mp_srcptr dp
Definition: sec_div.c:70
mp_limb_t inv32
Definition: gmp-impl.h:256
Definition: dvips.h:235
#define sp
Definition: stack.c:11
#define s0
Definition: tokst.h:45