70 #ifndef ERRORS_TO_ISO8859_1 71 # define ERRORS_TO_ISO8859_1 1 80 #ifndef ERRORS_TO_CP1252 81 # define ERRORS_TO_CP1252 1 90 #ifndef STRICT_RFC3629 91 # define STRICT_RFC3629 0 99 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
100 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f,
101 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
102 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178
139 unsigned char c = *(
const unsigned char*)
p;
144 }
else if (c < 0xa0) {
148 }
else if (c < 0xc2) {
151 if ( (end &&
p+1 >= end) || (
p[1]&0xc0) != 0x80)
goto FAIL;
155 ((
p[0] & 0x1f) << 6) +
157 }
else if (c == 0xe0) {
158 if (((
const unsigned char*)
p)[1] < 0xa0)
goto FAIL;
161 }
else if (c == 0xed) {
163 if (((
const unsigned char*)
p)[1] >= 0xa0)
goto FAIL;
165 }
else if (c == 0xef) {
167 if (((
const unsigned char*)
p)[1]==0xbf &&
168 ((
const unsigned char*)
p)[2]>=0xbe)
goto FAIL;
171 }
else if (c < 0xf0) {
173 if ( (end &&
p+2 >= end) || (
p[2]&0xc0) != 0x80)
goto FAIL;
176 ((
p[0] & 0x0f) << 12) +
177 ((
p[1] & 0x3f) << 6) +
179 }
else if (c == 0xf0) {
180 if (((
const unsigned char*)
p)[1] < 0x90)
goto FAIL;
182 }
else if (c < 0xf4) {
184 if ( (end &&
p+3 >= end) || (
p[2]&0xc0) != 0x80 || (
p[3]&0xc0) != 0x80)
goto FAIL;
188 if ((
p[1]&0xf)==0xf &&
189 ((
const unsigned char*)
p)[2] == 0xbf &&
190 ((
const unsigned char*)
p)[3] >= 0xbe)
goto FAIL;
193 ((
p[0] & 0x07) << 18) +
194 ((
p[1] & 0x3f) << 12) +
195 ((
p[2] & 0x3f) << 6) +
197 }
else if (c == 0xf4) {
198 if (((
const unsigned char*)
p)[1] > 0x8f)
goto FAIL;
203 #if ERRORS_TO_ISO8859_1 234 if ((*
p&0xc0) != 0x80)
return p;
236 for (a =
p-1; ; --a) {
238 if (!(a[0]&0x80))
return p;
239 if ((a[0]&0x40))
break;
265 if ((*
p&0xc0) != 0x80)
return p;
267 for (a =
p-1; ; --a) {
269 if (!(a[0]&0x80))
return p;
270 if ((a[0]&0x40))
break;
273 if (a+len >
p)
return a;
280 if (ucs < 0x000080U) {
282 }
else if (ucs < 0x000800U) {
284 }
else if (ucs < 0x010000U) {
286 }
else if (ucs <= 0x10ffffU) {
310 if (ucs < 0x000080U) {
313 }
else if (ucs < 0x000800U) {
314 buf[0] = 0xc0 | (ucs >> 6);
315 buf[1] = 0x80 | (ucs & 0x3F);
317 }
else if (ucs < 0x010000U) {
318 buf[0] = 0xe0 | (ucs >> 12);
319 buf[1] = 0x80 | ((ucs >> 6) & 0x3F);
320 buf[2] = 0x80 | (ucs & 0x3F);
322 }
else if (ucs <= 0x0010ffffU) {
323 buf[0] = 0xf0 | (ucs >> 18);
324 buf[1] = 0x80 | ((ucs >> 12) & 0x3F);
325 buf[2] = 0x80 | ((ucs >> 6) & 0x3F);
326 buf[3] = 0x80 | (ucs & 0x3F);
363 unsigned fl_ucs_to_Utf16(
const unsigned ucs,
unsigned short *dst,
const unsigned dstlen)
375 unsigned short u16[4];
378 if((!dstlen) || (!dst)) {
384 if((ucs > 0x0010FFFF) ||
385 ((ucs > 0xD7FF) && (ucs < 0xE000))) {
388 }
else if(ucs < 0x00010000) {
389 out[0] = (
unsigned short)ucs;
391 }
else if(dstlen < 2) {
395 out[0] = (((ucs - 0x00010000) >> 10) & 0x3FF) + 0xD800;
396 out[1] = (ucs & 0x3FF) + 0xDC00;
400 if(count < dstlen) { out[count] = 0; }
433 unsigned short* dst,
unsigned dstlen)
436 const char* e = src+srclen;
438 if (dstlen)
for (;;) {
439 if (
p >= e) {dst[count] = 0;
return count;}
449 if (count+2 >= dstlen) {dst[count] = 0; count += 2;
break;}
450 dst[count] = (((ucs-0x10000u)>>10)&0x3ff) | 0xd800;
451 dst[++count] = (ucs&0x3ff) | 0xdc00;
454 if (++count == dstlen) {dst[count-1] = 0;
break;}
458 if (!(*
p & 0x80))
p++;
462 if (ucs >= 0x10000) ++count;
501 wchar_t* dst,
unsigned dstlen)
503 #if defined(WIN32) || defined(__CYGWIN__) 507 const char* e = src+srclen;
509 if (dstlen)
for (;;) {
519 dst[count] = (wchar_t)ucs;
521 if (++count == dstlen) {dst[count-1] = 0;
break;}
525 if (!(*
p & 0x80))
p++;
557 char* dst,
unsigned dstlen)
560 const char* e = src+srclen;
562 if (dstlen)
for (;;) {
564 if (
p >= e) {dst[count] = 0;
return count;}
565 c = *(
const unsigned char*)
p;
572 if (ucs < 0x100) dst[count] = ucs;
573 else dst[count] =
'?';
575 if (++count >= dstlen) {dst[count-1] = 0;
break;}
579 if (!(*
p & 0x80))
p++;
618 const wchar_t* src,
unsigned srclen) {
621 if (dstlen)
for (;;) {
623 if (i >= srclen) {dst[count] = 0;
return count;}
627 if (count >= dstlen) {dst[count-1] = 0;
break;}
628 }
else if (ucs < 0x800U) {
629 if (count+2 >= dstlen) {dst[count] = 0; count += 2;
break;}
630 dst[count++] = 0xc0 | (ucs >> 6);
631 dst[count++] = 0x80 | (ucs & 0x3F);
632 #if defined(WIN32) || defined(__CYGWIN__) 633 }
else if (ucs >= 0xd800 && ucs <= 0xdbff && i < srclen &&
634 src[i] >= 0xdc00 && src[i] <= 0xdfff) {
636 unsigned ucs2 = src[i++];
637 ucs = 0x10000U + ((ucs&0x3ff)<<10) + (ucs2&0x3ff);
640 }
else if (ucs >= 0x10000) {
641 if (ucs > 0x10ffff) {
646 if (count+4 >= dstlen) {dst[count] = 0; count += 4;
break;}
647 dst[count++] = 0xf0 | (ucs >> 18);
648 dst[count++] = 0x80 | ((ucs >> 12) & 0x3F);
649 dst[count++] = 0x80 | ((ucs >> 6) & 0x3F);
650 dst[count++] = 0x80 | (ucs & 0x3F);
652 #if !(defined(WIN32) || defined(__CYGWIN__)) 656 if (count+3 >= dstlen) {dst[count] = 0; count += 3;
break;}
657 dst[count++] = 0xe0 | (ucs >> 12);
658 dst[count++] = 0x80 | ((ucs >> 6) & 0x3F);
659 dst[count++] = 0x80 | (ucs & 0x3F);
664 unsigned ucs = src[i++];
667 }
else if (ucs < 0x800U) {
669 #if defined(WIN32) || defined(__CYGWIN__) 670 }
else if (ucs >= 0xd800 && ucs <= 0xdbff && i < srclen-1 &&
671 src[i+1] >= 0xdc00 && src[i+1] <= 0xdfff) {
675 }
else if (ucs >= 0x10000 && ucs <= 0x10ffff) {
706 const char* src,
unsigned srclen) {
708 const char* e = src+srclen;
710 if (dstlen)
for (;;) {
712 if (
p >= e) {dst[count] = 0;
return count;}
713 ucs = *(
const unsigned char*)
p++;
716 if (count >= dstlen) {dst[count-1] = 0;
break;}
718 if (count+2 >= dstlen) {dst[count] = 0; count += 2;
break;}
719 dst[count++] = 0xc0 | (ucs >> 6);
720 dst[count++] = 0x80 | (ucs & 0x3F);
725 unsigned char ucs = *(
const unsigned char*)
p++;
736 # include <windows.h> 755 ret = GetACP() == CP_UTF8;
759 if (((s = getenv(
"LC_CTYPE")) && *s) ||
760 ((s = getenv(
"LC_ALL")) && *s) ||
761 ((s = getenv(
"LANG")) && *s)) {
762 ret = (strstr(s,
"utf") || strstr(s,
"UTF"));
785 char* dst,
unsigned dstlen)
802 WideCharToMultiByte(GetACP(), 0,
buf,
length, dst, dstlen, 0, 0);
806 if (dstlen==0 || ret >= dstlen-1)
808 WideCharToMultiByte(GetACP(), 0,
buf,
length, 0, 0, 0, 0);
821 ret = wcstombs(dst,
buf, dstlen);
822 if (ret >= (
int)dstlen-1) ret = wcstombs(0,
buf,0);
824 ret = wcstombs(0,
buf,0);
827 if (ret >= 0)
return (
unsigned)ret;
832 if (srclen < dstlen) {
833 memcpy(dst, src, srclen);
859 const char* src,
unsigned srclen)
867 length = MultiByteToWideChar(GetACP(), 0, src, srclen,
buf, 1024);
868 if ((
length == 0)&&(GetLastError()==ERROR_INSUFFICIENT_BUFFER)) {
869 length = MultiByteToWideChar(GetACP(), 0, src, srclen, 0, 0);
871 MultiByteToWideChar(GetACP(), 0, src, srclen,
buf,
length);
883 length = mbstowcs(0, src, 0)+1;
896 if (srclen < dstlen) {
897 memcpy(dst, src, srclen);
928 const char* e = src+srclen;
932 if (len < 2)
return 0;
933 if (len > ret) ret = len;