"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "jv_unicode.c" between
jq-1.5.tar.gz and jq-1.6.tar.gz

About: jq is a lightweight and flexible command-line JSON processor.

jv_unicode.c  (jq-1.5):jv_unicode.c  (jq-1.6)
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include "jv_unicode.h" #include "jv_unicode.h"
#include "jv_utf8_tables.h" #include "jv_utf8_tables.h"
// jvp_utf8_backtrack returns the beginning of the last codepoint in the
// string, assuming that start is the last byte in the string.
// If the last codepoint is incomplete, returns the number of missing bytes via
// *missing_bytes. If there are no leading bytes or an invalid byte is
// encountered, NULL is returned and *missing_bytes is not altered.
const char* jvp_utf8_backtrack(const char* start, const char* min, int *missing_
bytes) {
assert(min <= start);
if (min == start) {
return min;
}
int length = 0;
int seen = 1;
while (start >= min && (length = utf8_coding_length[(unsigned char)*start]) ==
UTF8_CONTINUATION_BYTE) {
start--;
seen++;
}
if (length == 0 || length == UTF8_CONTINUATION_BYTE || length - seen < 0) {
return NULL;
}
if (missing_bytes) *missing_bytes = length - seen;
return start;
}
const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) { const char* jvp_utf8_next(const char* in, const char* end, int* codepoint_ret) {
assert(in <= end); assert(in <= end);
if (in == end) { if (in == end) {
return 0; return 0;
} }
int codepoint = -1; int codepoint = -1;
unsigned char first = (unsigned char)in[0]; unsigned char first = (unsigned char)in[0];
int length = utf8_coding_length[first]; int length = utf8_coding_length[first];
if ((first & 0x80) == 0) { if ((first & 0x80) == 0) {
/* Fast-path for ASCII */ /* Fast-path for ASCII */
skipping to change at line 62 skipping to change at line 85
} }
int jvp_utf8_is_valid(const char* in, const char* end) { int jvp_utf8_is_valid(const char* in, const char* end) {
int codepoint; int codepoint;
while ((in = jvp_utf8_next(in, end, &codepoint))) { while ((in = jvp_utf8_next(in, end, &codepoint))) {
if (codepoint == -1) return 0; if (codepoint == -1) return 0;
} }
return 1; return 1;
} }
/* Assumes startchar is the first byte of a valid character sequence */
int jvp_utf8_decode_length(char startchar) { int jvp_utf8_decode_length(char startchar) {
if ((startchar & 0x80) == 0) return 1; if ((startchar & 0x80) == 0) return 1; // 0___ ____
else if ((startchar & 0xC0) == 0xC0) return 2; else if ((startchar & 0xE0) == 0xC0) return 2; // 110_ ____
else if ((startchar & 0xE0) == 0xE0) return 3; else if ((startchar & 0xF0) == 0xE0) return 3; // 1110 ____
else return 4; else return 4; // 1111 ____
} }
int jvp_utf8_encode_length(int codepoint) { int jvp_utf8_encode_length(int codepoint) {
if (codepoint <= 0x7F) return 1; if (codepoint <= 0x7F) return 1;
else if (codepoint <= 0x7FF) return 2; else if (codepoint <= 0x7FF) return 2;
else if (codepoint <= 0xFFFF) return 3; else if (codepoint <= 0xFFFF) return 3;
else return 4; else return 4;
} }
int jvp_utf8_encode(int codepoint, char* out) { int jvp_utf8_encode(int codepoint, char* out) {
 End of changes. 3 change blocks. 
4 lines changed or deleted 30 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)