gsasl  1.10.0
About: GNU SASL is an implementation of the Simple Authentication and Security Layer (SASL). Development version.
  Fossies Dox: gsasl-1.10.0.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

base64.c
Go to the documentation of this file.
1 /* base64.c -- Encode binary data using printable characters.
2  Copyright (C) 1999-2001, 2004-2006, 2009-2021 Free Software Foundation, Inc.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU Lesser General Public License as published by
6  the Free Software Foundation; either version 2.1, or (at your option)
7  any later version.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public License
15  along with this program; if not, see <https://www.gnu.org/licenses/>. */
16 
17 /* Written by Simon Josefsson. Partially adapted from GNU MailUtils
18  * (mailbox/filter_trans.c, as of 2004-11-28). Improved by review
19  * from Paul Eggert, Bruno Haible, and Stepan Kasal.
20  *
21  * See also RFC 4648 <https://www.ietf.org/rfc/rfc4648.txt>.
22  *
23  * Be careful with error checking. Here is how you would typically
24  * use these functions:
25  *
26  * bool ok = base64_decode_alloc (in, inlen, &out, &outlen);
27  * if (!ok)
28  * FAIL: input was not valid base64
29  * if (out == NULL)
30  * FAIL: memory allocation error
31  * OK: data in OUT/OUTLEN
32  *
33  * size_t outlen = base64_encode_alloc (in, inlen, &out);
34  * if (out == NULL && outlen == 0 && inlen != 0)
35  * FAIL: input too long
36  * if (out == NULL)
37  * FAIL: memory allocation error
38  * OK: data in OUT/OUTLEN.
39  *
40  */
41 
42 #include <config.h>
43 
44 /* Get prototype. */
45 #include "base64.h"
46 
47 /* Get malloc. */
48 #include <stdlib.h>
49 
50 /* Get UCHAR_MAX. */
51 #include <limits.h>
52 
53 #include <string.h>
54 
55 /* C89 compliant way to cast 'char' to 'unsigned char'. */
56 static unsigned char
57 to_uchar (char ch)
58 {
59  return ch;
60 }
61 
62 static const char b64c[64] =
63  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
64 
65 /* Base64 encode IN array of size INLEN into OUT array. OUT needs
66  to be of length >= BASE64_LENGTH(INLEN), and INLEN needs to be
67  a multiple of 3. */
68 static void
69 base64_encode_fast (const char *restrict in, size_t inlen, char *restrict out)
70 {
71  while (inlen)
72  {
73  *out++ = b64c[(to_uchar (in[0]) >> 2) & 0x3f];
74  *out++ = b64c[((to_uchar (in[0]) << 4) + (to_uchar (in[1]) >> 4)) & 0x3f];
75  *out++ = b64c[((to_uchar (in[1]) << 2) + (to_uchar (in[2]) >> 6)) & 0x3f];
76  *out++ = b64c[to_uchar (in[2]) & 0x3f];
77 
78  inlen -= 3;
79  in += 3;
80  }
81 }
82 
83 /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN.
84  If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as
85  possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero
86  terminate the output buffer. */
87 void
88 base64_encode (const char *restrict in, size_t inlen,
89  char *restrict out, size_t outlen)
90 {
91  /* Note this outlen constraint can be enforced at compile time.
92  I.E. that the output buffer is exactly large enough to hold
93  the encoded inlen bytes. The inlen constraints (of corresponding
94  to outlen, and being a multiple of 3) can change at runtime
95  at the end of input. However the common case when reading
96  large inputs is to have both constraints satisfied, so we depend
97  on both in base_encode_fast(). */
98  if (outlen % 4 == 0 && inlen == outlen / 4 * 3)
99  {
100  base64_encode_fast (in, inlen, out);
101  return;
102  }
103 
104  while (inlen && outlen)
105  {
106  *out++ = b64c[(to_uchar (in[0]) >> 2) & 0x3f];
107  if (!--outlen)
108  break;
109  *out++ = b64c[((to_uchar (in[0]) << 4)
110  + (--inlen ? to_uchar (in[1]) >> 4 : 0))
111  & 0x3f];
112  if (!--outlen)
113  break;
114  *out++ =
115  (inlen
116  ? b64c[((to_uchar (in[1]) << 2)
117  + (--inlen ? to_uchar (in[2]) >> 6 : 0))
118  & 0x3f]
119  : '=');
120  if (!--outlen)
121  break;
122  *out++ = inlen ? b64c[to_uchar (in[2]) & 0x3f] : '=';
123  if (!--outlen)
124  break;
125  if (inlen)
126  inlen--;
127  if (inlen)
128  in += 3;
129  }
130 
131  if (outlen)
132  *out = '\0';
133 }
134 
135 /* Allocate a buffer and store zero terminated base64 encoded data
136  from array IN of size INLEN, returning BASE64_LENGTH(INLEN), i.e.,
137  the length of the encoded data, excluding the terminating zero. On
138  return, the OUT variable will hold a pointer to newly allocated
139  memory that must be deallocated by the caller. If output string
140  length would overflow, 0 is returned and OUT is set to NULL. If
141  memory allocation failed, OUT is set to NULL, and the return value
142  indicates length of the requested memory block, i.e.,
143  BASE64_LENGTH(inlen) + 1. */
144 size_t
145 base64_encode_alloc (const char *in, size_t inlen, char **out)
146 {
147  size_t outlen = 1 + BASE64_LENGTH (inlen);
148 
149  /* Check for overflow in outlen computation.
150  *
151  * If there is no overflow, outlen >= inlen.
152  *
153  * If the operation (inlen + 2) overflows then it yields at most +1, so
154  * outlen is 0.
155  *
156  * If the multiplication overflows, we lose at least half of the
157  * correct value, so the result is < ((inlen + 2) / 3) * 2, which is
158  * less than (inlen + 2) * 0.66667, which is less than inlen as soon as
159  * (inlen > 4).
160  */
161  if (inlen > outlen)
162  {
163  *out = NULL;
164  return 0;
165  }
166 
167  *out = malloc (outlen);
168  if (!*out)
169  return outlen;
170 
171  base64_encode (in, inlen, *out, outlen);
172 
173  return outlen - 1;
174 }
175 
176 /* With this approach this file works independent of the charset used
177  (think EBCDIC). However, it does assume that the characters in the
178  Base64 alphabet (A-Za-z0-9+/) are encoded in 0..255. POSIX
179  1003.1-2001 require that char and unsigned char are 8-bit
180  quantities, though, taking care of that problem. But this may be a
181  potential problem on non-POSIX C99 platforms.
182 
183  IBM C V6 for AIX mishandles "#define B64(x) ...'x'...", so use "_"
184  as the formal parameter rather than "x". */
185 #define B64(_) \
186  ((_) == 'A' ? 0 \
187  : (_) == 'B' ? 1 \
188  : (_) == 'C' ? 2 \
189  : (_) == 'D' ? 3 \
190  : (_) == 'E' ? 4 \
191  : (_) == 'F' ? 5 \
192  : (_) == 'G' ? 6 \
193  : (_) == 'H' ? 7 \
194  : (_) == 'I' ? 8 \
195  : (_) == 'J' ? 9 \
196  : (_) == 'K' ? 10 \
197  : (_) == 'L' ? 11 \
198  : (_) == 'M' ? 12 \
199  : (_) == 'N' ? 13 \
200  : (_) == 'O' ? 14 \
201  : (_) == 'P' ? 15 \
202  : (_) == 'Q' ? 16 \
203  : (_) == 'R' ? 17 \
204  : (_) == 'S' ? 18 \
205  : (_) == 'T' ? 19 \
206  : (_) == 'U' ? 20 \
207  : (_) == 'V' ? 21 \
208  : (_) == 'W' ? 22 \
209  : (_) == 'X' ? 23 \
210  : (_) == 'Y' ? 24 \
211  : (_) == 'Z' ? 25 \
212  : (_) == 'a' ? 26 \
213  : (_) == 'b' ? 27 \
214  : (_) == 'c' ? 28 \
215  : (_) == 'd' ? 29 \
216  : (_) == 'e' ? 30 \
217  : (_) == 'f' ? 31 \
218  : (_) == 'g' ? 32 \
219  : (_) == 'h' ? 33 \
220  : (_) == 'i' ? 34 \
221  : (_) == 'j' ? 35 \
222  : (_) == 'k' ? 36 \
223  : (_) == 'l' ? 37 \
224  : (_) == 'm' ? 38 \
225  : (_) == 'n' ? 39 \
226  : (_) == 'o' ? 40 \
227  : (_) == 'p' ? 41 \
228  : (_) == 'q' ? 42 \
229  : (_) == 'r' ? 43 \
230  : (_) == 's' ? 44 \
231  : (_) == 't' ? 45 \
232  : (_) == 'u' ? 46 \
233  : (_) == 'v' ? 47 \
234  : (_) == 'w' ? 48 \
235  : (_) == 'x' ? 49 \
236  : (_) == 'y' ? 50 \
237  : (_) == 'z' ? 51 \
238  : (_) == '0' ? 52 \
239  : (_) == '1' ? 53 \
240  : (_) == '2' ? 54 \
241  : (_) == '3' ? 55 \
242  : (_) == '4' ? 56 \
243  : (_) == '5' ? 57 \
244  : (_) == '6' ? 58 \
245  : (_) == '7' ? 59 \
246  : (_) == '8' ? 60 \
247  : (_) == '9' ? 61 \
248  : (_) == '+' ? 62 \
249  : (_) == '/' ? 63 \
250  : -1)
251 
252 static const signed char b64[0x100] = {
253  B64 (0), B64 (1), B64 (2), B64 (3),
254  B64 (4), B64 (5), B64 (6), B64 (7),
255  B64 (8), B64 (9), B64 (10), B64 (11),
256  B64 (12), B64 (13), B64 (14), B64 (15),
257  B64 (16), B64 (17), B64 (18), B64 (19),
258  B64 (20), B64 (21), B64 (22), B64 (23),
259  B64 (24), B64 (25), B64 (26), B64 (27),
260  B64 (28), B64 (29), B64 (30), B64 (31),
261  B64 (32), B64 (33), B64 (34), B64 (35),
262  B64 (36), B64 (37), B64 (38), B64 (39),
263  B64 (40), B64 (41), B64 (42), B64 (43),
264  B64 (44), B64 (45), B64 (46), B64 (47),
265  B64 (48), B64 (49), B64 (50), B64 (51),
266  B64 (52), B64 (53), B64 (54), B64 (55),
267  B64 (56), B64 (57), B64 (58), B64 (59),
268  B64 (60), B64 (61), B64 (62), B64 (63),
269  B64 (64), B64 (65), B64 (66), B64 (67),
270  B64 (68), B64 (69), B64 (70), B64 (71),
271  B64 (72), B64 (73), B64 (74), B64 (75),
272  B64 (76), B64 (77), B64 (78), B64 (79),
273  B64 (80), B64 (81), B64 (82), B64 (83),
274  B64 (84), B64 (85), B64 (86), B64 (87),
275  B64 (88), B64 (89), B64 (90), B64 (91),
276  B64 (92), B64 (93), B64 (94), B64 (95),
277  B64 (96), B64 (97), B64 (98), B64 (99),
278  B64 (100), B64 (101), B64 (102), B64 (103),
279  B64 (104), B64 (105), B64 (106), B64 (107),
280  B64 (108), B64 (109), B64 (110), B64 (111),
281  B64 (112), B64 (113), B64 (114), B64 (115),
282  B64 (116), B64 (117), B64 (118), B64 (119),
283  B64 (120), B64 (121), B64 (122), B64 (123),
284  B64 (124), B64 (125), B64 (126), B64 (127),
285  B64 (128), B64 (129), B64 (130), B64 (131),
286  B64 (132), B64 (133), B64 (134), B64 (135),
287  B64 (136), B64 (137), B64 (138), B64 (139),
288  B64 (140), B64 (141), B64 (142), B64 (143),
289  B64 (144), B64 (145), B64 (146), B64 (147),
290  B64 (148), B64 (149), B64 (150), B64 (151),
291  B64 (152), B64 (153), B64 (154), B64 (155),
292  B64 (156), B64 (157), B64 (158), B64 (159),
293  B64 (160), B64 (161), B64 (162), B64 (163),
294  B64 (164), B64 (165), B64 (166), B64 (167),
295  B64 (168), B64 (169), B64 (170), B64 (171),
296  B64 (172), B64 (173), B64 (174), B64 (175),
297  B64 (176), B64 (177), B64 (178), B64 (179),
298  B64 (180), B64 (181), B64 (182), B64 (183),
299  B64 (184), B64 (185), B64 (186), B64 (187),
300  B64 (188), B64 (189), B64 (190), B64 (191),
301  B64 (192), B64 (193), B64 (194), B64 (195),
302  B64 (196), B64 (197), B64 (198), B64 (199),
303  B64 (200), B64 (201), B64 (202), B64 (203),
304  B64 (204), B64 (205), B64 (206), B64 (207),
305  B64 (208), B64 (209), B64 (210), B64 (211),
306  B64 (212), B64 (213), B64 (214), B64 (215),
307  B64 (216), B64 (217), B64 (218), B64 (219),
308  B64 (220), B64 (221), B64 (222), B64 (223),
309  B64 (224), B64 (225), B64 (226), B64 (227),
310  B64 (228), B64 (229), B64 (230), B64 (231),
311  B64 (232), B64 (233), B64 (234), B64 (235),
312  B64 (236), B64 (237), B64 (238), B64 (239),
313  B64 (240), B64 (241), B64 (242), B64 (243),
314  B64 (244), B64 (245), B64 (246), B64 (247),
315  B64 (248), B64 (249), B64 (250), B64 (251),
316  B64 (252), B64 (253), B64 (254), B64 (255)
317 };
318 
319 #if UCHAR_MAX == 255
320 # define uchar_in_range(c) true
321 #else
322 # define uchar_in_range(c) ((c) <= 255)
323 #endif
324 
325 /* Return true if CH is a character from the Base64 alphabet, and
326  false otherwise. Note that '=' is padding and not considered to be
327  part of the alphabet. */
328 bool
329 isbase64 (char ch)
330 {
331  return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];
332 }
333 
334 /* Initialize decode-context buffer, CTX. */
335 void
337 {
338  ctx->i = 0;
339 }
340 
341 /* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and
342  none of those four is a newline, then return *IN. Otherwise, copy up to
343  4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
344  index CTX->i and setting CTX->i to reflect the number of bytes copied,
345  and return CTX->buf. In either case, advance *IN to point to the byte
346  after the last one processed, and set *N_NON_NEWLINE to the number of
347  verified non-newline bytes accessible through the returned pointer. */
348 static char *
350  char const *restrict *in, char const *restrict in_end,
351  size_t *n_non_newline)
352 {
353  if (ctx->i == 4)
354  ctx->i = 0;
355 
356  if (ctx->i == 0)
357  {
358  char const *t = *in;
359  if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL)
360  {
361  /* This is the common case: no newline. */
362  *in += 4;
363  *n_non_newline = 4;
364  return (char *) t;
365  }
366  }
367 
368  {
369  /* Copy non-newline bytes into BUF. */
370  char const *p = *in;
371  while (p < in_end)
372  {
373  char c = *p++;
374  if (c != '\n')
375  {
376  ctx->buf[ctx->i++] = c;
377  if (ctx->i == 4)
378  break;
379  }
380  }
381 
382  *in = p;
383  *n_non_newline = ctx->i;
384  return ctx->buf;
385  }
386 }
387 
388 #define return_false \
389  do \
390  { \
391  *outp = out; \
392  return false; \
393  } \
394  while (false)
395 
396 /* Decode up to four bytes of base64-encoded data, IN, of length INLEN
397  into the output buffer, *OUT, of size *OUTLEN bytes. Return true if
398  decoding is successful, false otherwise. If *OUTLEN is too small,
399  as many bytes as possible are written to *OUT. On return, advance
400  *OUT to point to the byte after the last one written, and decrement
401  *OUTLEN to reflect the number of bytes remaining in *OUT. */
402 static bool
403 decode_4 (char const *restrict in, size_t inlen,
404  char *restrict *outp, size_t *outleft)
405 {
406  char *out = *outp;
407  if (inlen < 2)
408  return false;
409 
410  if (!isbase64 (in[0]) || !isbase64 (in[1]))
411  return false;
412 
413  if (*outleft)
414  {
415  *out++ = ((b64[to_uchar (in[0])] << 2)
416  | (b64[to_uchar (in[1])] >> 4));
417  --*outleft;
418  }
419 
420  if (inlen == 2)
421  return_false;
422 
423  if (in[2] == '=')
424  {
425  if (inlen != 4)
426  return_false;
427 
428  if (in[3] != '=')
429  return_false;
430  }
431  else
432  {
433  if (!isbase64 (in[2]))
434  return_false;
435 
436  if (*outleft)
437  {
438  *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
439  | (b64[to_uchar (in[2])] >> 2));
440  --*outleft;
441  }
442 
443  if (inlen == 3)
444  return_false;
445 
446  if (in[3] == '=')
447  {
448  if (inlen != 4)
449  return_false;
450  }
451  else
452  {
453  if (!isbase64 (in[3]))
454  return_false;
455 
456  if (*outleft)
457  {
458  *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
459  | b64[to_uchar (in[3])]);
460  --*outleft;
461  }
462  }
463  }
464 
465  *outp = out;
466  return true;
467 }
468 
469 /* Decode base64-encoded input array IN of length INLEN to output array
470  OUT that can hold *OUTLEN bytes. The input data may be interspersed
471  with newlines. Return true if decoding was successful, i.e. if the
472  input was valid base64 data, false otherwise. If *OUTLEN is too
473  small, as many bytes as possible will be written to OUT. On return,
474  *OUTLEN holds the length of decoded bytes in OUT. Note that as soon
475  as any non-alphabet, non-newline character is encountered, decoding
476  is stopped and false is returned. If INLEN is zero, then process
477  only whatever data is stored in CTX.
478 
479  Initially, CTX must have been initialized via base64_decode_ctx_init.
480  Subsequent calls to this function must reuse whatever state is recorded
481  in that buffer. It is necessary for when a quadruple of base64 input
482  bytes spans two input buffers.
483 
484  If CTX is NULL then newlines are treated as garbage and the input
485  buffer is processed as a unit. */
486 
487 bool
489  const char *restrict in, size_t inlen,
490  char *restrict out, size_t *outlen)
491 {
492  size_t outleft = *outlen;
493  bool ignore_newlines = ctx != NULL;
494  bool flush_ctx = false;
495  unsigned int ctx_i = 0;
496 
497  if (ignore_newlines)
498  {
499  ctx_i = ctx->i;
500  flush_ctx = inlen == 0;
501  }
502 
503 
504  while (true)
505  {
506  size_t outleft_save = outleft;
507  if (ctx_i == 0 && !flush_ctx)
508  {
509  while (true)
510  {
511  /* Save a copy of outleft, in case we need to re-parse this
512  block of four bytes. */
513  outleft_save = outleft;
514  if (!decode_4 (in, inlen, &out, &outleft))
515  break;
516 
517  in += 4;
518  inlen -= 4;
519  }
520  }
521 
522  if (inlen == 0 && !flush_ctx)
523  break;
524 
525  /* Handle the common case of 72-byte wrapped lines.
526  This also handles any other multiple-of-4-byte wrapping. */
527  if (inlen && *in == '\n' && ignore_newlines)
528  {
529  ++in;
530  --inlen;
531  continue;
532  }
533 
534  /* Restore OUT and OUTLEFT. */
535  out -= outleft_save - outleft;
536  outleft = outleft_save;
537 
538  {
539  char const *in_end = in + inlen;
540  char const *non_nl;
541 
542  if (ignore_newlines)
543  non_nl = get_4 (ctx, &in, in_end, &inlen);
544  else
545  non_nl = in; /* Might have nl in this case. */
546 
547  /* If the input is empty or consists solely of newlines (0 non-newlines),
548  then we're done. Likewise if there are fewer than 4 bytes when not
549  flushing context and not treating newlines as garbage. */
550  if (inlen == 0 || (inlen < 4 && !flush_ctx && ignore_newlines))
551  {
552  inlen = 0;
553  break;
554  }
555  if (!decode_4 (non_nl, inlen, &out, &outleft))
556  break;
557 
558  inlen = in_end - in;
559  }
560  }
561 
562  *outlen -= outleft;
563 
564  return inlen == 0;
565 }
566 
567 /* Allocate an output buffer in *OUT, and decode the base64 encoded
568  data stored in IN of size INLEN to the *OUT buffer. On return, the
569  size of the decoded data is stored in *OUTLEN. OUTLEN may be NULL,
570  if the caller is not interested in the decoded length. *OUT may be
571  NULL to indicate an out of memory error, in which case *OUTLEN
572  contains the size of the memory block needed. The function returns
573  true on successful decoding and memory allocation errors. (Use the
574  *OUT and *OUTLEN parameters to differentiate between successful
575  decoding and memory error.) The function returns false if the
576  input was invalid, in which case *OUT is NULL and *OUTLEN is
577  undefined. */
578 bool
580  const char *in, size_t inlen, char **out,
581  size_t *outlen)
582 {
583  /* This may allocate a few bytes too many, depending on input,
584  but it's not worth the extra CPU time to compute the exact size.
585  The exact size is 3 * (inlen + (ctx ? ctx->i : 0)) / 4, minus 1 if the
586  input ends with "=" and minus another 1 if the input ends with "==".
587  Dividing before multiplying avoids the possibility of overflow. */
588  size_t needlen = 3 * (inlen / 4) + 3;
589 
590  *out = malloc (needlen);
591  if (!*out)
592  return true;
593 
594  if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen))
595  {
596  free (*out);
597  *out = NULL;
598  return false;
599  }
600 
601  if (outlen)
602  *outlen = needlen;
603 
604  return true;
605 }
#define BASE64_LENGTH(inlen)
Definition: base64.h:33
#define restrict
Definition: config.h:69
#define uchar_in_range(c)
Definition: base64.c:322
#define return_false
Definition: base64.c:388
static unsigned char to_uchar(char ch)
Definition: base64.c:57
static char * get_4(struct base64_decode_context *ctx, char const **in, char const *in_end, size_t *n_non_newline)
Definition: base64.c:349
static signed char decode_4(char const *in, size_t inlen, char **outp, size_t *outleft)
Definition: base64.c:403
#define B64(_)
Definition: base64.c:185
signed char base64_decode_ctx(struct base64_decode_context *ctx, const char *in, size_t inlen, char *out, size_t *outlen)
Definition: base64.c:488
signed char isbase64(char ch)
Definition: base64.c:329
static void base64_encode_fast(const char *in, size_t inlen, char *out)
Definition: base64.c:69
size_t base64_encode_alloc(const char *in, size_t inlen, char **out)
Definition: base64.c:145
signed char base64_decode_alloc_ctx(struct base64_decode_context *ctx, const char *in, size_t inlen, char **out, size_t *outlen)
Definition: base64.c:579
void base64_decode_ctx_init(struct base64_decode_context *ctx)
Definition: base64.c:336
void base64_encode(const char *in, size_t inlen, char *out, size_t outlen)
Definition: base64.c:88
static const char b64c[64]
Definition: base64.c:62
static const signed char b64[0x100]
Definition: base64.c:252
void * memchr(void const *s, int c_in, size_t n)
Definition: memchr.c:59
#define NULL
Definition: stddef.in.h:72
unsigned char c
const char * p
Definition: mbrtowc-impl.h:42
unsigned int i
Definition: base64.h:37