28 #ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29 #define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
58 template<
typename octet_type>
61 return static_cast<uint8_t>(0xff & oc);
63 template<
typename u16_type>
66 return static_cast<uint16_t>(0xffff & oc);
68 template<
typename octet_type>
71 return ((
mask8(oc) >> 6) == 0x2);
74 template <
typename u16>
80 template <
typename u16>
86 template <
typename u16>
92 template <
typename u32>
98 template <
typename octet_iterator>
99 inline typename std::iterator_traits<octet_iterator>::difference_type
105 else if ((lead >> 5) == 0x6)
107 else if ((lead >> 4) == 0xe)
109 else if ((lead >> 3) == 0x1e)
115 template <
typename octet_difference_type>
122 else if (cp < 0x800) {
126 else if (cp < 0x10000) {
138 template <
typename octet_iterator>
143 *code_point =
mask8(*it);
149 template <
typename octet_iterator>
158 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
174 template <
typename octet_iterator>
183 cp = ((cp << 12) & 0xffff) + ((
mask8(*it) << 6) & 0xfff);
208 template <
typename octet_iterator>
217 cp = ((cp << 18) & 0x1fffff) + ((
mask8(*it) << 12) & 0x3ffff);
220 cp += (
mask8(*it) << 6) & 0xfff;
251 template <
typename octet_iterator>
256 octet_iterator original_it = it;
260 typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
304 template <
typename octet_iterator>
316 template <
typename octet_iterator>
319 octet_iterator result = start;
320 while (result != end) {
328 template <
typename octet_iterator>
329 inline bool is_valid(octet_iterator start, octet_iterator end)
334 template <
typename octet_iterator>
345 template <
typename octet_iterator>
const uint32_t CODE_POINT_MAX
bool is_lead_surrogate(u16 cp)
utf_error get_sequence_4(octet_iterator &it, octet_iterator end, uint32_t *code_point)
bool is_trail_surrogate(u16 cp)
bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
bool is_surrogate(u16 cp)
const uint32_t SURROGATE_OFFSET
const uint16_t TRAIL_SURROGATE_MAX
const uint16_t LEAD_OFFSET
utf_error get_sequence_2(octet_iterator &it, octet_iterator end, uint32_t *code_point)
const uint16_t TRAIL_SURROGATE_MIN
utf_error get_sequence_3(octet_iterator &it, octet_iterator end, uint32_t *code_point)
bool is_code_point_valid(u32 cp)
std::iterator_traits< octet_iterator >::difference_type sequence_length(octet_iterator lead_it)
uint16_t mask16(u16_type oc)
bool is_trail(octet_type oc)
const uint16_t LEAD_SURROGATE_MIN
utf_error validate_next(octet_iterator &it, octet_iterator end, uint32_t *code_point)
uint8_t mask8(octet_type oc)
utf_error get_sequence_1(octet_iterator &it, octet_iterator end, uint32_t *code_point)
get_sequence_x functions decode utf-8 sequences of the length x
const uint16_t LEAD_SURROGATE_MAX
bool starts_with_bom(octet_iterator it, octet_iterator end)
const uint8_t bom[]
The library API - functions intended to be called by the users.
bool is_valid(octet_iterator start, octet_iterator end)
octet_iterator find_invalid(octet_iterator start, octet_iterator end)
bool is_bom(octet_iterator it)
void err(int eval, const char *fmt,...)