gsasl  1.10.0
About: GNU SASL is an implementation of the Simple Authentication and Security Layer (SASL). Development version.
  Fossies Dox: gsasl-1.10.0.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

mbrtowc-impl.h
Go to the documentation of this file.
1 /* Convert multibyte character to wide character.
2  Copyright (C) 1999-2002, 2005-2021 Free Software Foundation, Inc.
3 
4  This program is free software: you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 3 of the License, or
7  (at your option) any later version.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with this program. If not, see <https://www.gnu.org/licenses/>. */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
18 
19 /* This file contains the body of the mbrtowc and mbrtoc32 functions,
20  when GNULIB_defined_mbstate_t is defined. */
21 
22  char *pstate = (char *)ps;
23 
24  if (s == NULL)
25  {
26  pwc = NULL;
27  s = "";
28  n = 1;
29  }
30 
31  if (n == 0)
32  return (size_t)(-2);
33 
34  /* Here n > 0. */
35 
36  if (pstate == NULL)
37  pstate = internal_state;
38 
39  {
40  size_t nstate = pstate[0];
41  char buf[4];
42  const char *p;
43  size_t m;
44  enc_t enc;
45  int res;
46 
47  switch (nstate)
48  {
49  case 0:
50  p = s;
51  m = n;
52  break;
53  case 3:
54  buf[2] = pstate[3];
56  case 2:
57  buf[1] = pstate[2];
59  case 1:
60  buf[0] = pstate[1];
61  p = buf;
62  m = nstate;
63  buf[m++] = s[0];
64  if (n >= 2 && m < 4)
65  {
66  buf[m++] = s[1];
67  if (n >= 3 && m < 4)
68  buf[m++] = s[2];
69  }
70  break;
71  default:
72  errno = EINVAL;
73  return (size_t)(-1);
74  }
75 
76  /* Here m > 0. */
77 
78  enc = locale_encoding_classification ();
79 
80  if (enc == enc_utf8) /* UTF-8 */
81  {
82  /* Achieve
83  - multi-thread safety and
84  - the ability to produce wide character values > WCHAR_MAX
85  by not calling mbtowc() at all. */
86 #include "mbrtowc-impl-utf8.h"
87  }
88  else
89  {
90  /* The hidden internal state of mbtowc would make this function not
91  multi-thread safe. Achieve multi-thread safety through a lock. */
92  wchar_t wc;
93  res = mbtowc_with_lock (&wc, p, m);
94 
95  if (res >= 0)
96  {
97  if ((wc == 0) != (res == 0))
98  abort ();
99  if (pwc != NULL)
100  *pwc = wc;
101  goto success;
102  }
103 
104  /* mbtowc does not distinguish between invalid and incomplete multibyte
105  sequences. But mbrtowc needs to make this distinction.
106  There are two possible approaches:
107  - Use iconv() and its return value.
108  - Use built-in knowledge about the possible encodings.
109  Given the low quality of implementation of iconv() on the systems
110  that lack mbrtowc(), we use the second approach.
111  The possible encodings are:
112  - 8-bit encodings,
113  - EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
114  - UTF-8 (already handled above).
115  Use specialized code for each. */
116  if (m >= 4 || m >= MB_CUR_MAX)
117  goto invalid;
118  /* Here MB_CUR_MAX > 1 and 0 < m < 4. */
119  switch (enc)
120  {
121  /* As a reference for this code, you can use the GNU libiconv
122  implementation. Look for uses of the RET_TOOFEW macro. */
123 
124  case enc_eucjp: /* EUC-JP */
125  {
126  if (m == 1)
127  {
128  unsigned char c = (unsigned char) p[0];
129 
130  if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
131  goto incomplete;
132  }
133  if (m == 2)
134  {
135  unsigned char c = (unsigned char) p[0];
136 
137  if (c == 0x8f)
138  {
139  unsigned char c2 = (unsigned char) p[1];
140 
141  if (c2 >= 0xa1 && c2 < 0xff)
142  goto incomplete;
143  }
144  }
145  goto invalid;
146  }
147 
148  case enc_94: /* EUC-KR, GB2312, BIG5 */
149  {
150  if (m == 1)
151  {
152  unsigned char c = (unsigned char) p[0];
153 
154  if (c >= 0xa1 && c < 0xff)
155  goto incomplete;
156  }
157  goto invalid;
158  }
159 
160  case enc_euctw: /* EUC-TW */
161  {
162  if (m == 1)
163  {
164  unsigned char c = (unsigned char) p[0];
165 
166  if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
167  goto incomplete;
168  }
169  else /* m == 2 || m == 3 */
170  {
171  unsigned char c = (unsigned char) p[0];
172 
173  if (c == 0x8e)
174  goto incomplete;
175  }
176  goto invalid;
177  }
178 
179  case enc_gb18030: /* GB18030 */
180  {
181  if (m == 1)
182  {
183  unsigned char c = (unsigned char) p[0];
184 
185  if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
186  goto incomplete;
187  }
188  else /* m == 2 || m == 3 */
189  {
190  unsigned char c = (unsigned char) p[0];
191 
192  if (c >= 0x90 && c <= 0xe3)
193  {
194  unsigned char c2 = (unsigned char) p[1];
195 
196  if (c2 >= 0x30 && c2 <= 0x39)
197  {
198  if (m == 2)
199  goto incomplete;
200  else /* m == 3 */
201  {
202  unsigned char c3 = (unsigned char) p[2];
203 
204  if (c3 >= 0x81 && c3 <= 0xfe)
205  goto incomplete;
206  }
207  }
208  }
209  }
210  goto invalid;
211  }
212 
213  case enc_sjis: /* SJIS */
214  {
215  if (m == 1)
216  {
217  unsigned char c = (unsigned char) p[0];
218 
219  if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
220  || (c >= 0xf0 && c <= 0xf9))
221  goto incomplete;
222  }
223  goto invalid;
224  }
225 
226  default:
227  /* An unknown multibyte encoding. */
228  goto incomplete;
229  }
230  }
231 
232  success:
233  /* res >= 0 is the corrected return value of
234  mbtowc_with_lock (&wc, p, m). */
235  if (nstate >= (res > 0 ? res : 1))
236  abort ();
237  res -= nstate;
238  pstate[0] = 0;
239  return res;
240 
241  incomplete:
242  {
243  size_t k = nstate;
244  /* Here 0 <= k < m < 4. */
245  pstate[++k] = s[0];
246  if (k < m)
247  {
248  pstate[++k] = s[1];
249  if (k < m)
250  pstate[++k] = s[2];
251  }
252  if (k != m)
253  abort ();
254  }
255  pstate[0] = m;
256  return (size_t)(-2);
257 
259  errno = EILSEQ;
260  /* The conversion state is undefined, says POSIX. */
261  return (size_t)(-1);
262  }
#define FALLTHROUGH
Definition: attribute.h:142
#define EILSEQ
Definition: errno.in.h:274
#define NULL
Definition: stddef.in.h:72
unsigned char c
goto invalid
size_t m
Definition: mbrtowc-impl.h:43
char * pstate
Definition: mbrtowc-impl.h:22
int res
Definition: mbrtowc-impl.h:45
const char * p
Definition: mbrtowc-impl.h:42
char buf[4]
Definition: mbrtowc-impl.h:39
enc_t enc
Definition: mbrtowc-impl.h:44