w32tex About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.   Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)
exp3.c
Go to the documentation of this file.
1 /* mpfr_exp -- exponential of a floating-point number
2
3 Copyright 1999, 2001-2020 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramba projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 #define MPFR_NEED_LONGLONG_H /* for MPFR_MPZ_SIZEINBASE2 */
24 #include "mpfr-impl.h"
25
26 /* y <- exp(p/2^r) within 1 ulp, using 2^m terms from the series
27  Assume |p/2^r| < 1.
28  We use the following binary splitting formula:
29  P(a,b) = p if a+1=b, P(a,c)*P(c,b) otherwise
30  Q(a,b) = a*2^r if a+1=b [except Q(0,1)=1], Q(a,c)*Q(c,b) otherwise
31  T(a,b) = P(a,b) if a+1=b, Q(c,b)*T(a,c)+P(a,c)*T(c,b) otherwise
32  Then exp(p/2^r) ~ T(0,i)/Q(0,i) for i so that (p/2^r)^i/i! is small enough.
33
34  Since P(a,b) = p^(b-a), and we consider only values of b-a of the form 2^j,
35  we don't need to compute P(), we only precompute p^(2^j) in the ptoj[] array
36  below.
37
38  Since Q(a,b) is divisible by 2^(r*(b-a-1)), we don't compute the power of
39  two part.
40 */
41 static void
43  mpz_t *Q, mpfr_prec_t *mult)
44 {
45  mp_bitcnt_t n, h, i, j; /* unsigned type, which is >= unsigned long */
46  mpz_t *S, *ptoj;
47  mpfr_prec_t *log2_nb_terms;
48  mpfr_exp_t diff, expo;
49  mpfr_prec_t precy = MPFR_PREC(y), prec_i_have, prec_ptoj;
50  int k, l;
51
52  MPFR_ASSERTN ((size_t) m < sizeof (long) * CHAR_BIT - 1);
53
54  S = Q + (m+1);
55  ptoj = Q + 2*(m+1); /* ptoj[i] = mantissa^(2^i) */
56  log2_nb_terms = mult + (m+1);
57
58  /* Normalize p */
59  MPFR_ASSERTD (mpz_cmp_ui (p, 0) != 0);
60  n = mpz_scan1 (p, 0); /* number of trailing zeros in p */
61  MPFR_ASSERTN (n <= LONG_MAX); /* This is a limitation. */
62  mpz_tdiv_q_2exp (p, p, n);
63  r -= (long) n; /* since |p/2^r| < 1 and p >= 1, r >= 1 */
64
65  /* Set initial var */
66  mpz_set (ptoj[0], p);
67  for (k = 1; k < m; k++)
68  mpz_mul (ptoj[k], ptoj[k-1], ptoj[k-1]); /* ptoj[k] = p^(2^k) */
69  mpz_set_ui (Q[0], 1);
70  mpz_set_ui (S[0], 1);
71  k = 0;
72  mult[0] = 0; /* the multiplier P[k]/Q[k] for the remaining terms
73  satisfies P[k]/Q[k] <= 2^(-mult[k]) */
74  log2_nb_terms[0] = 0; /* log2(#terms) [exact in 1st loop where 2^k] */
75  prec_i_have = 0;
76
77  /* Main Loop */
78  n = 1UL << m;
79  MPFR_ASSERTN (n != 0); /* no overflow */
80  for (i = 1; (prec_i_have < precy) && (i < n); i++)
81  {
82  /* invariant: Q[0]*Q[1]*...*Q[k] equals i! */
83  k++;
84  log2_nb_terms[k] = 0; /* 1 term */
85  mpz_set_ui (Q[k], i + 1);
86  mpz_set_ui (S[k], i + 1);
87  j = i + 1; /* we have computed j = i+1 terms so far */
88  l = 0;
89  while ((j & 1) == 0) /* combine and reduce */
90  {
91  /* invariant: S[k] corresponds to 2^l consecutive terms */
92  mpz_mul (S[k], S[k], ptoj[l]);
93  mpz_mul (S[k-1], S[k-1], Q[k]);
94  /* Q[k] corresponds to 2^l consecutive terms too.
95  Since it does not contains the factor 2^(r*2^l),
96  when going from l to l+1 we need to multiply
97  by 2^(r*2^(l+1))/2^(r*2^l) = 2^(r*2^l) */
98  mpz_mul_2exp (S[k-1], S[k-1], r << l);
100  mpz_mul (Q[k-1], Q[k-1], Q[k]);
101  log2_nb_terms[k-1] ++; /* number of terms in S[k-1]
102  is a power of 2 by construction */
103  MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[k]);
104  MPFR_MPZ_SIZEINBASE2 (prec_ptoj, ptoj[l]);
105  mult[k-1] += prec_i_have + (r << l) - prec_ptoj - 1;
106  prec_i_have = mult[k] = mult[k-1];
107  /* since mult[k] >= mult[k-1] + nbits(Q[k]),
108  we have Q[0]*...*Q[k] <= 2^mult[k] = 2^prec_i_have */
109  l ++;
110  j >>= 1;
111  k --;
112  }
113  }
114
115  /* accumulate all products in S[0] and Q[0]. Warning: contrary to above,
116  here we do not have log2_nb_terms[k-1] = log2_nb_terms[k]+1. */
117  h = 0; /* number of accumulated terms in the right part S[k]/Q[k] */
118  while (k > 0)
119  {
120  j = log2_nb_terms[k-1];
121  mpz_mul (S[k], S[k], ptoj[j]);
122  mpz_mul (S[k-1], S[k-1], Q[k]);
123  h += (mp_bitcnt_t) 1 << log2_nb_terms[k];
124  mpz_mul_2exp (S[k-1], S[k-1], r * h);
126  mpz_mul (Q[k-1], Q[k-1], Q[k]);
127  k--;
128  }
129
130  /* Q[0] now equals i! */
131  MPFR_MPZ_SIZEINBASE2 (prec_i_have, S[0]);
132  diff = (mpfr_exp_t) prec_i_have - 2 * (mpfr_exp_t) precy;
133  expo = diff;
134  if (diff >= 0)
135  mpz_fdiv_q_2exp (S[0], S[0], diff);
136  else
137  mpz_mul_2exp (S[0], S[0], -diff);
138
139  MPFR_MPZ_SIZEINBASE2 (prec_i_have, Q[0]);
140  diff = (mpfr_exp_t) prec_i_have - (mpfr_prec_t) precy;
141  expo -= diff;
142  if (diff > 0)
143  mpz_fdiv_q_2exp (Q[0], Q[0], diff);
144  else
145  mpz_mul_2exp (Q[0], Q[0], -diff);
146
147  mpz_tdiv_q (S[0], S[0], Q[0]);
148  mpfr_set_z (y, S[0], MPFR_RNDD);
149  /* TODO: Check/prove that the following expression doesn't overflow. */
150  expo = MPFR_GET_EXP (y) + expo - r * (i - 1);
151  MPFR_SET_EXP (y, expo);
152 }
153
154 #define shift (GMP_NUMB_BITS/2)
155
156 int
158 {
159  mpfr_t t, x_copy, tmp;
160  mpz_t uk;
161  mpfr_exp_t ttt, shift_x;
162  unsigned long twopoweri;
163  mpz_t *P;
164  mpfr_prec_t *mult;
165  int i, k, loop;
166  int prec_x;
167  mpfr_prec_t realprec, Prec;
168  int iter;
169  int inexact = 0;
170  MPFR_SAVE_EXPO_DECL (expo);
171  MPFR_ZIV_DECL (ziv_loop);
172
174  (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec(x), mpfr_log_prec, x, rnd_mode),
175  ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec(y), mpfr_log_prec, y,
176  inexact));
177
178  MPFR_SAVE_EXPO_MARK (expo);
179
180  /* decompose x */
181  /* we first write x = 1.xxxxxxxxxxxxx
182  ----- k bits -- */
183  prec_x = MPFR_INT_CEIL_LOG2 (MPFR_PREC (x)) - MPFR_LOG2_GMP_NUMB_BITS;
184  if (prec_x < 0)
185  prec_x = 0;
186
187  ttt = MPFR_GET_EXP (x);
188  mpfr_init2 (x_copy, MPFR_PREC(x));
189  mpfr_set (x_copy, x, MPFR_RNDD);
190
191  /* we shift to get a number less than 1 */
192  if (ttt > 0)
193  {
194  shift_x = ttt;
195  mpfr_div_2ui (x_copy, x, ttt, MPFR_RNDN);
196  ttt = MPFR_GET_EXP (x_copy);
197  }
198  else
199  shift_x = 0;
200  MPFR_ASSERTD (ttt <= 0);
201
202  /* Init prec and vars */
203  realprec = MPFR_PREC (y) + MPFR_INT_CEIL_LOG2 (prec_x + MPFR_PREC (y));
204  Prec = realprec + shift + 2 + shift_x;
205  mpfr_init2 (t, Prec);
206  mpfr_init2 (tmp, Prec);
207  mpz_init (uk);
208
209  /* Main loop */
210  MPFR_ZIV_INIT (ziv_loop, realprec);
211  for (;;)
212  {
213  int scaled = 0;
215
216  k = MPFR_INT_CEIL_LOG2 (Prec) - MPFR_LOG2_GMP_NUMB_BITS;
217
218  /* now we have to extract */
219  twopoweri = GMP_NUMB_BITS;
220
221  /* Allocate tables */
222  P = (mpz_t*) mpfr_allocate_func (3*(k+2)*sizeof(mpz_t));
223  for (i = 0; i < 3*(k+2); i++)
224  mpz_init (P[i]);
225  mult = (mpfr_prec_t*) mpfr_allocate_func (2*(k+2)*sizeof(mpfr_prec_t));
226
227  /* Particular case for i==0 */
228  mpfr_extract (uk, x_copy, 0);
229  MPFR_ASSERTD (mpz_cmp_ui (uk, 0) != 0);
230  mpfr_exp_rational (tmp, uk, shift + twopoweri - ttt, k + 1, P, mult);
231  for (loop = 0; loop < shift; loop++)
232  mpfr_sqr (tmp, tmp, MPFR_RNDD);
233  twopoweri *= 2;
234
235  /* General case */
236  iter = (k <= prec_x) ? k : prec_x;
237  for (i = 1; i <= iter; i++)
238  {
239  mpfr_extract (uk, x_copy, i);
240  if (MPFR_LIKELY (mpz_cmp_ui (uk, 0) != 0))
241  {
242  mpfr_exp_rational (t, uk, twopoweri - ttt, k - i + 1, P, mult);
243  mpfr_mul (tmp, tmp, t, MPFR_RNDD);
244  }
245  MPFR_ASSERTN (twopoweri <= LONG_MAX/2);
246  twopoweri *=2;
247  }
248
249  /* Clear tables */
250  for (i = 0; i < 3*(k+2); i++)
251  mpz_clear (P[i]);
252  mpfr_free_func (P, 3*(k+2)*sizeof(mpz_t));
253  mpfr_free_func (mult, 2*(k+2)*sizeof(mpfr_prec_t));
254
255  if (shift_x > 0)
256  {
257  MPFR_BLOCK (flags, {
258  for (loop = 0; loop < shift_x - 1; loop++)
259  mpfr_sqr (tmp, tmp, MPFR_RNDD);
260  mpfr_sqr (t, tmp, MPFR_RNDD);
261  } );
262
264  {
265  /* tmp <= exact result, so that it is a real overflow. */
266  inexact = mpfr_overflow (y, rnd_mode, 1);
268  break;
269  }
270
272  {
273  /* This may be a spurious underflow. So, let's scale
274  the result. */
275  mpfr_mul_2ui (tmp, tmp, 1, MPFR_RNDD); /* no overflow, exact */
276  mpfr_sqr (t, tmp, MPFR_RNDD);
277  if (MPFR_IS_ZERO (t))
278  {
279  /* approximate result < 2^(emin - 3), thus
280  exact result < 2^(emin - 2). */
281  inexact = mpfr_underflow (y, (rnd_mode == MPFR_RNDN) ?
282  MPFR_RNDZ : rnd_mode, 1);
284  break;
285  }
286  scaled = 1;
287  }
288  }
289
290  if (MPFR_CAN_ROUND (shift_x > 0 ? t : tmp, realprec,
291  MPFR_PREC(y), rnd_mode))
292  {
293  inexact = mpfr_set (y, shift_x > 0 ? t : tmp, rnd_mode);
295  {
296  int inex2;
297  mpfr_exp_t ey;
298
299  /* The result has been scaled and needs to be corrected. */
300  ey = MPFR_GET_EXP (y);
301  inex2 = mpfr_mul_2si (y, y, -2, rnd_mode);
302  if (inex2) /* underflow */
303  {
304  if (rnd_mode == MPFR_RNDN && inexact < 0 &&
305  MPFR_IS_ZERO (y) && ey == __gmpfr_emin + 1)
306  {
307  /* Double rounding case: in MPFR_RNDN, the scaled
308  result has been rounded downward to 2^emin.
309  As the exact result is > 2^(emin - 2), correct
310  rounding must be done upward. */
311  /* TODO: make sure in coverage tests that this line
312  is reached. */
313  inexact = mpfr_underflow (y, MPFR_RNDU, 1);
314  }
315  else
316  {
317  /* No double rounding. */
318  inexact = inex2;
319  }
321  }
322  }
323  break;
324  }
325
326  MPFR_ZIV_NEXT (ziv_loop, realprec);
327  Prec = realprec + shift + 2 + shift_x;
328  mpfr_set_prec (t, Prec);
329  mpfr_set_prec (tmp, Prec);
330  }
331  MPFR_ZIV_FREE (ziv_loop);
332
333  mpz_clear (uk);
334  mpfr_clear (tmp);
335  mpfr_clear (t);
336  mpfr_clear (x_copy);
337  MPFR_SAVE_EXPO_FREE (expo);
338  return inexact;
339 }
#define CHAR_BIT
Definition: ChkTeX.h:91
#define LONG_MAX
Definition: ChkTeX.h:87
#define n
Definition: t4ht.c:1290
int mpfr_div_2ui(mpfr_ptr y, mpfr_srcptr x, unsigned long n, mpfr_rnd_t rnd_mode)
Definition: div_2ui.c:26
int h
Definition: dviconv.c:9
int mpfr_underflow(mpfr_ptr x, mpfr_rnd_t rnd_mode, int sign)
Definition: exceptions.c:378
int mpfr_overflow(mpfr_ptr x, mpfr_rnd_t rnd_mode, int sign)
Definition: exceptions.c:406
mpfr_exp_t __gmpfr_emin
Definition: exceptions.c:26
static void mpfr_exp_rational(mpfr_ptr y, mpz_ptr p, long r, int m, mpz_t *Q, mpfr_prec_t *mult)
Definition: exp3.c:42
#define shift
Definition: exp3.c:154
int mpfr_exp_3(mpfr_ptr y, mpfr_srcptr x, mpfr_rnd_t rnd_mode)
Definition: exp3.c:157
void mpfr_extract(mpz_ptr y, mpfr_srcptr p, unsigned int i)
Definition: extract.c:34
#define t
Definition: afcover.h:96
paragraph P
#define mpz_init
Definition: gmp.h:851
#define mpz_scan1
Definition: gmp.h:1012
#define mpz_tdiv_q
Definition: gmp.h:1076
#define mpz_clear
Definition: gmp.h:679
#define mpz_fdiv_q_2exp
Definition: gmp.h:760
Definition: gmp.h:628
#define mpz_cmp_ui(Z, UI)
Definition: gmp.h:2268
#define mpz_tdiv_q_2exp
Definition: gmp.h:1079
#define mpz_set_ui
Definition: gmp.h:1035
__mpz_struct mpz_t[1]
Definition: gmp.h:164
#define mpz_mul
Definition: gmp.h:930
unsigned long int mp_bitcnt_t
Definition: gmp.h:145
#define mpz_mul_2exp
Definition: gmp.h:933
#define mpz_set
Definition: gmp.h:1015
#define GMP_NUMB_BITS
Definition: gmp.h:46
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
kerning y
Definition: ttdriver.c:212
#define loop
Definition: tie.c:8
void mpfr_free_func(void *ptr, size_t size)
Definition: mpfr-gmp.c:330
void * mpfr_allocate_func(size_t alloc_size)
Definition: mpfr-gmp.c:308
#define MPFR_SET_EXP(x, e)
Definition: mpfr-impl.h:1059
#define MPFR_UNLIKELY(x)
Definition: mpfr-impl.h:1490
#define MPFR_LIKELY(x)
Definition: mpfr-impl.h:1489
#define MPFR_OVERFLOW(_flags)
Definition: mpfr-impl.h:448
#define MPFR_INT_CEIL_LOG2(x)
Definition: mpfr-impl.h:1558
#define MPFR_IS_PURE_FP(x)
Definition: mpfr-impl.h:1108
#define MPFR_ZIV_INIT(_x, _p)
Definition: mpfr-impl.h:2053
#define MPFR_SAVE_EXPO_UPDATE_FLAGS(x, flags)
Definition: mpfr-impl.h:1740
#define MPFR_SAVE_EXPO_MARK(x)
Definition: mpfr-impl.h:1730
#define MPFR_MPZ_SIZEINBASE2(r, z)
Definition: mpfr-impl.h:1636
#define MPFR_PREC(x)
Definition: mpfr-impl.h:958
#define MPFR_GET_EXP(x)
Definition: mpfr-impl.h:1058
#define MPFR_UNDERFLOW(_flags)
Definition: mpfr-impl.h:447
#define MPFR_SAVE_EXPO_FREE(x)
Definition: mpfr-impl.h:1736
#define MPFR_IS_ZERO(x)
Definition: mpfr-impl.h:1084
#define MPFR_BLOCK_DECL(_flags)
Definition: mpfr-impl.h:427
#define MPFR_ZIV_NEXT(_x, _p)
Definition: mpfr-impl.h:2054
#define MPFR_CAN_ROUND(b, err, prec, rnd)
Definition: mpfr-impl.h:1934
#define MPFR_SAVE_EXPO_DECL(x)
Definition: mpfr-impl.h:1729
#define MPFR_ASSERTN(expr)
Definition: mpfr-impl.h:495
#define MPFR_ZIV_DECL(_x)
Definition: mpfr-impl.h:2052
#define MPFR_ZIV_FREE(x)
Definition: mpfr-impl.h:2055
#define MPFR_LOG_FUNC(x, y)
Definition: mpfr-impl.h:2227
#define MPFR_ASSERTD(expr)
Definition: mpfr-impl.h:516
#define MPFR_BLOCK(_flags, _op)
Definition: mpfr-impl.h:431
void mpfr_clear(mpfr_ptr m)
Definition: clear.c:26
void mpfr_init2(mpfr_ptr x, mpfr_prec_t p)
Definition: init2.c:26
#define MPFR_FLAGS_UNDERFLOW
Definition: mpfr.h:77
int mpfr_sqr(mpfr_ptr, mpfr_srcptr, mpfr_rnd_t)
Definition: sqr.c:508
long mpfr_prec_t
Definition: mpfr.h:168
#define mpfr_set(a, b, r)
Definition: mpfr.h:864
__mpfr_struct mpfr_t[1]
Definition: mpfr.h:250
int mpfr_mul_2si(mpfr_ptr, mpfr_srcptr, long, mpfr_rnd_t)
#define MPFR_FLAGS_OVERFLOW
Definition: mpfr.h:78
#define mpfr_get_prec(_x)
Definition: mpfr.h:848
int mpfr_mul_2ui(mpfr_ptr, mpfr_srcptr, unsigned long, mpfr_rnd_t)
mpfr_rnd_t
Definition: mpfr.h:102
@ MPFR_RNDN
Definition: mpfr.h:103
@ MPFR_RNDU
Definition: mpfr.h:105
@ MPFR_RNDD
Definition: mpfr.h:106
@ MPFR_RNDZ
Definition: mpfr.h:104
long mpfr_exp_t
Definition: mpfr.h:195
int mpfr_set_z(mpfr_ptr, mpz_srcptr, mpfr_rnd_t)
Definition: set_z.c:27
int mpfr_mul(mpfr_ptr, mpfr_srcptr, mpfr_srcptr, mpfr_rnd_t)
Definition: mul.c:732
void mpfr_set_prec(mpfr_ptr, mpfr_prec_t)
Definition: set_prec.c:26
#define Prec
Definition: sqrtrem.c:168
float x
Definition: cordic.py:15
int k
Definition: otp-parser.c:70
integer mult[4800]
Definition: pmxab.c:199
int r
Definition: ppmqvga.c:68
#define flags
Definition: dvips.h:235
int j
Definition: t4ht.c:1589
int diff
Definition: tex4ht.c:3815
m
Definition: tex4ht.c:3990
@ S
Definition: ubidiimp.h:53
long long scaled
Definition: vf_ng.c:93