17size_t UTF8Length(
const wchar_t *uptr,
size_t tlen)
noexcept;
18void UTF8FromUTF16(
const wchar_t *uptr,
size_t tlen,
char *putf,
size_t len)
noexcept;
20size_t UTF16Length(
const char *s,
size_t len)
noexcept;
21size_t UTF16FromUTF8(
const char *s,
size_t len,
wchar_t *tbuf,
size_t tlen);
22size_t UTF32Length(
const char *s,
size_t len)
noexcept;
23size_t UTF32FromUTF8(
const char *s,
size_t len,
unsigned int *tbuf,
size_t tlen);
28bool UTF8IsValid(
const char *s,
size_t len)
noexcept;
38 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
40 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
42 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
47 return (ch >= 0x80) && (ch < 0xc0);
55int UTF8Classify(
const unsigned char *us,
size_t len)
noexcept;
65 return (us[0] == 0xe2) && (us[1] == 0x80) && ((us[2] == 0xa8) || (us[2] == 0xa9));
70inline bool UTF8IsNEL(
const unsigned char *us)
noexcept {
71 return (us[0] == 0xc2) && (us[1] == 0x85);
77 ((ch0 == 0xe2) && (ch1 == 0x80) && ((ch2 == 0xa8) || (ch2 == 0xa9))) ||
78 ((ch1 == 0xc2) && (ch2 == 0x85));
92 return (byteCount < 4) ? 1 : 2;
Styling buffer using one element for each run rather than using a filled buffer.
constexpr unsigned int UTF16LengthFromUTF8ByteCount(unsigned int byteCount) noexcept
size_t UTF32FromUTF8(const char *s, size_t len, unsigned int *tbuf, size_t tlen)
constexpr int UTF8MaxBytes
void UTF8FromUTF32Character(int uch, char *putf) noexcept
@ SUPPLEMENTAL_PLANE_FIRST
size_t UTF16Length(const char *s, size_t len) noexcept
bool UTF8IsValid(const char *s, size_t len) noexcept
constexpr int unicodeReplacementChar
constexpr bool UTF8IsAscii(int ch) noexcept
unsigned int UTF16FromUTF32Character(unsigned int val, wchar_t *tbuf) noexcept
size_t UTF16FromUTF8(const char *s, size_t len, wchar_t *tbuf, size_t tlen)
bool UTF8IsSeparator(const unsigned char *us) noexcept
void UTF8FromUTF16(const wchar_t *uptr, size_t tlen, char *putf, size_t len) noexcept
size_t UTF8Length(const wchar_t *uptr, size_t tlen) noexcept
bool UTF8IsNEL(const unsigned char *us) noexcept
int UTF8DrawBytes(const unsigned char *us, int len) noexcept
size_t UTF32Length(const char *s, size_t len) noexcept
constexpr int UTF8SeparatorLength
constexpr int UTF8NELLength
int UTF8Classify(const unsigned char *us, size_t len) noexcept
int UnicodeFromUTF8(const unsigned char *us) noexcept
constexpr unsigned int UTF16CharLength(wchar_t uch) noexcept
std::wstring WStringFromUTF8(const char *s, size_t len)
std::string FixInvalidUTF8(const std::string &text)
constexpr bool UTF8IsMultibyteLineEnd(unsigned char ch0, unsigned char ch1, unsigned char ch2) noexcept
constexpr bool UTF8IsTrailByte(unsigned char ch) noexcept
const unsigned char UTF8BytesOfLead[256]