gsasl  1.10.0
About: GNU SASL is an implementation of the Simple Authentication and Security Layer (SASL). Development version.
  Fossies Dox: gsasl-1.10.0.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

iconv.c
Go to the documentation of this file.
1 /* Character set conversion.
2  Copyright (C) 1999-2001, 2007, 2009-2021 Free Software Foundation, Inc.
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 3, or (at your option)
7  any later version.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License along
15  with this program; if not, see <https://www.gnu.org/licenses/>. */
16 
17 #include <config.h>
18 
19 /* Specification. */
20 #include <iconv.h>
21 
22 #include <stddef.h>
23 
24 #if REPLACE_ICONV_UTF
25 # include <errno.h>
26 # include <stdint.h>
27 # include <stdlib.h>
28 # include "unistr.h"
29 #endif
30 
31 #if REPLACE_ICONV_UTF
32 
33 /* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11. */
34 
35 /* Return code if invalid. (xxx_mbtowc) */
36 # define RET_ILSEQ -1
37 /* Return code if no bytes were read. (xxx_mbtowc) */
38 # define RET_TOOFEW -2
39 
40 /* Return code if invalid. (xxx_wctomb) */
41 # define RET_ILUNI -1
42 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
43 # define RET_TOOSMALL -2
44 
45 /*
46  * UTF-16BE
47  */
48 
49 /* Specification: RFC 2781 */
50 
51 static int
52 utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
53 {
54  if (n >= 2)
55  {
56  ucs4_t wc = (s[0] << 8) + s[1];
57  if (wc >= 0xd800 && wc < 0xdc00)
58  {
59  if (n >= 4)
60  {
61  ucs4_t wc2 = (s[2] << 8) + s[3];
62  if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
63  return RET_ILSEQ;
64  *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
65  return 4;
66  }
67  }
68  else if (wc >= 0xdc00 && wc < 0xe000)
69  {
70  return RET_ILSEQ;
71  }
72  else
73  {
74  *pwc = wc;
75  return 2;
76  }
77  }
78  return RET_TOOFEW;
79 }
80 
81 static int
82 utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
83 {
84  if (!(wc >= 0xd800 && wc < 0xe000))
85  {
86  if (wc < 0x10000)
87  {
88  if (n >= 2)
89  {
90  r[0] = (unsigned char) (wc >> 8);
91  r[1] = (unsigned char) wc;
92  return 2;
93  }
94  else
95  return RET_TOOSMALL;
96  }
97  else if (wc < 0x110000)
98  {
99  if (n >= 4)
100  {
101  ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
102  ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
103  r[0] = (unsigned char) (wc1 >> 8);
104  r[1] = (unsigned char) wc1;
105  r[2] = (unsigned char) (wc2 >> 8);
106  r[3] = (unsigned char) wc2;
107  return 4;
108  }
109  else
110  return RET_TOOSMALL;
111  }
112  }
113  return RET_ILUNI;
114 }
115 
116 /*
117  * UTF-16LE
118  */
119 
120 /* Specification: RFC 2781 */
121 
122 static int
123 utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
124 {
125  if (n >= 2)
126  {
127  ucs4_t wc = s[0] + (s[1] << 8);
128  if (wc >= 0xd800 && wc < 0xdc00)
129  {
130  if (n >= 4)
131  {
132  ucs4_t wc2 = s[2] + (s[3] << 8);
133  if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
134  return RET_ILSEQ;
135  *pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
136  return 4;
137  }
138  }
139  else if (wc >= 0xdc00 && wc < 0xe000)
140  {
141  return RET_ILSEQ;
142  }
143  else
144  {
145  *pwc = wc;
146  return 2;
147  }
148  }
149  return RET_TOOFEW;
150 }
151 
152 static int
153 utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
154 {
155  if (!(wc >= 0xd800 && wc < 0xe000))
156  {
157  if (wc < 0x10000)
158  {
159  if (n >= 2)
160  {
161  r[0] = (unsigned char) wc;
162  r[1] = (unsigned char) (wc >> 8);
163  return 2;
164  }
165  else
166  return RET_TOOSMALL;
167  }
168  else if (wc < 0x110000)
169  {
170  if (n >= 4)
171  {
172  ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
173  ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
174  r[0] = (unsigned char) wc1;
175  r[1] = (unsigned char) (wc1 >> 8);
176  r[2] = (unsigned char) wc2;
177  r[3] = (unsigned char) (wc2 >> 8);
178  return 4;
179  }
180  else
181  return RET_TOOSMALL;
182  }
183  }
184  return RET_ILUNI;
185 }
186 
187 /*
188  * UTF-32BE
189  */
190 
191 /* Specification: Unicode 3.1 Standard Annex #19 */
192 
193 static int
194 utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
195 {
196  if (n >= 4)
197  {
198  ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
199  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
200  {
201  *pwc = wc;
202  return 4;
203  }
204  else
205  return RET_ILSEQ;
206  }
207  return RET_TOOFEW;
208 }
209 
210 static int
211 utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
212 {
213  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
214  {
215  if (n >= 4)
216  {
217  r[0] = 0;
218  r[1] = (unsigned char) (wc >> 16);
219  r[2] = (unsigned char) (wc >> 8);
220  r[3] = (unsigned char) wc;
221  return 4;
222  }
223  else
224  return RET_TOOSMALL;
225  }
226  return RET_ILUNI;
227 }
228 
229 /*
230  * UTF-32LE
231  */
232 
233 /* Specification: Unicode 3.1 Standard Annex #19 */
234 
235 static int
236 utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
237 {
238  if (n >= 4)
239  {
240  ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
241  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
242  {
243  *pwc = wc;
244  return 4;
245  }
246  else
247  return RET_ILSEQ;
248  }
249  return RET_TOOFEW;
250 }
251 
252 static int
253 utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
254 {
255  if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
256  {
257  if (n >= 4)
258  {
259  r[0] = (unsigned char) wc;
260  r[1] = (unsigned char) (wc >> 8);
261  r[2] = (unsigned char) (wc >> 16);
262  r[3] = 0;
263  return 4;
264  }
265  else
266  return RET_TOOSMALL;
267  }
268  return RET_ILUNI;
269 }
270 
271 #endif
272 
273 size_t
274 rpl_iconv (iconv_t cd,
275  ICONV_CONST char **inbuf, size_t *inbytesleft,
276  char **outbuf, size_t *outbytesleft)
277 #undef iconv
278 {
279 #if REPLACE_ICONV_UTF
280  switch ((uintptr_t) cd)
281  {
282  {
283  int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
284 
285  case (uintptr_t) _ICONV_UTF8_UTF16BE:
286  xxx_wctomb = utf16be_wctomb;
287  goto loop_from_utf8;
288  case (uintptr_t) _ICONV_UTF8_UTF16LE:
289  xxx_wctomb = utf16le_wctomb;
290  goto loop_from_utf8;
291  case (uintptr_t) _ICONV_UTF8_UTF32BE:
292  xxx_wctomb = utf32be_wctomb;
293  goto loop_from_utf8;
294  case (uintptr_t) _ICONV_UTF8_UTF32LE:
295  xxx_wctomb = utf32le_wctomb;
296  goto loop_from_utf8;
297 
298  loop_from_utf8:
299  if (inbuf == NULL || *inbuf == NULL)
300  return 0;
301  {
302  ICONV_CONST char *inptr = *inbuf;
303  size_t inleft = *inbytesleft;
304  char *outptr = *outbuf;
305  size_t outleft = *outbytesleft;
306  size_t res = 0;
307  while (inleft > 0)
308  {
309  ucs4_t uc;
310  int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
311  if (m <= 0)
312  {
313  if (m == -1)
314  {
315  errno = EILSEQ;
316  res = (size_t)(-1);
317  break;
318  }
319  if (m == -2)
320  {
321  errno = EINVAL;
322  res = (size_t)(-1);
323  break;
324  }
325  abort ();
326  }
327  else
328  {
329  int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
330  if (n < 0)
331  {
332  if (n == RET_ILUNI)
333  {
334  errno = EILSEQ;
335  res = (size_t)(-1);
336  break;
337  }
338  if (n == RET_TOOSMALL)
339  {
340  errno = E2BIG;
341  res = (size_t)(-1);
342  break;
343  }
344  abort ();
345  }
346  else
347  {
348  inptr += m;
349  inleft -= m;
350  outptr += n;
351  outleft -= n;
352  }
353  }
354  }
355  *inbuf = inptr;
356  *inbytesleft = inleft;
357  *outbuf = outptr;
358  *outbytesleft = outleft;
359  return res;
360  }
361  }
362 
363  {
364  int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
365 
366  case (uintptr_t) _ICONV_UTF16BE_UTF8:
367  xxx_mbtowc = utf16be_mbtowc;
368  goto loop_to_utf8;
369  case (uintptr_t) _ICONV_UTF16LE_UTF8:
370  xxx_mbtowc = utf16le_mbtowc;
371  goto loop_to_utf8;
372  case (uintptr_t) _ICONV_UTF32BE_UTF8:
373  xxx_mbtowc = utf32be_mbtowc;
374  goto loop_to_utf8;
375  case (uintptr_t) _ICONV_UTF32LE_UTF8:
376  xxx_mbtowc = utf32le_mbtowc;
377  goto loop_to_utf8;
378 
379  loop_to_utf8:
380  if (inbuf == NULL || *inbuf == NULL)
381  return 0;
382  {
383  ICONV_CONST char *inptr = *inbuf;
384  size_t inleft = *inbytesleft;
385  char *outptr = *outbuf;
386  size_t outleft = *outbytesleft;
387  size_t res = 0;
388  while (inleft > 0)
389  {
390  ucs4_t uc;
391  int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
392  if (m <= 0)
393  {
394  if (m == RET_ILSEQ)
395  {
396  errno = EILSEQ;
397  res = (size_t)(-1);
398  break;
399  }
400  if (m == RET_TOOFEW)
401  {
402  errno = EINVAL;
403  res = (size_t)(-1);
404  break;
405  }
406  abort ();
407  }
408  else
409  {
410  int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
411  if (n < 0)
412  {
413  if (n == -1)
414  {
415  errno = EILSEQ;
416  res = (size_t)(-1);
417  break;
418  }
419  if (n == -2)
420  {
421  errno = E2BIG;
422  res = (size_t)(-1);
423  break;
424  }
425  abort ();
426  }
427  else
428  {
429  inptr += m;
430  inleft -= m;
431  outptr += n;
432  outleft -= n;
433  }
434  }
435  }
436  *inbuf = inptr;
437  *inbytesleft = inleft;
438  *outbuf = outptr;
439  *outbytesleft = outleft;
440  return res;
441  }
442  }
443  }
444 #endif
445  return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
446 }
#define EILSEQ
Definition: errno.in.h:274
#define NULL
Definition: stddef.in.h:72
#define uint8_t
Definition: stdint.in.h:154
#define uintptr_t
Definition: stdint.in.h:320
size_t rpl_iconv(iconv_t cd, ICONV_CONST char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Definition: iconv.c:274
size_t m
Definition: mbrtowc-impl.h:43
return() size_t(-2)
int res
Definition: mbrtowc-impl.h:45
int u8_mbtoucr(ucs4_t *puc, const unsigned char *s, size_t n)
Definition: u8-mbtoucr.c:24
int u8_uctomb(unsigned char *s, ucs4_t uc, ptrdiff_t n)
Definition: u8-uctomb.c:33
unsigned int ucs4_t
Definition: unitypes.in.h:24