"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/unicode.c" between
dwdiff-2.1.1.tar.bz2 and dwdiff-2.1.2.tar.bz2

About: dwdiff is a diff program that operates at the word level instead of the line level.

unicode.c  (dwdiff-2.1.1.tar.bz2):unicode.c  (dwdiff-2.1.2.tar.bz2)
skipping to change at line 18 skipping to change at line 18
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#ifdef USE_UNICODE #ifdef USE_UNICODE
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <unicode/unorm.h> #include <unicode/unorm2.h>
#include <unicode/ustring.h> #include <unicode/ustring.h>
#include "definitions.h" #include "definitions.h"
#include "unicode.h" #include "unicode.h"
#include "static_assert.h" #include "static_assert.h"
#include "option.h" #include "option.h"
/***************************************************************************** /*****************************************************************************
Input and output of UTF-8 streams Input and output of UTF-8 streams
*****************************************************************************/ *****************************************************************************/
skipping to change at line 233 skipping to change at line 233
if (bytes == 0) if (bytes == 0)
return 0; return 0;
return fileWrite(stream->data.file, encoded, bytes) < bytes ? EOF : 0; return fileWrite(stream->data.file, encoded, bytes) < bytes ? EOF : 0;
} }
#define MAX_GCB_CLASS 13 #define MAX_GCB_CLASS 13
/* This will warn us if we compile against a new library with more cluster /* This will warn us if we compile against a new library with more cluster
break classes. */ break classes. */
#ifdef DEBUG #ifdef DEBUG
static_assert(U_GCB_COUNT <= MAX_GCB_CLASS); static_assert(U_GCB_COUNT <= 19);
#endif #endif
/* Latest version of the algorithm, with all its classes, can be found at /* Latest version of the algorithm, with all its classes, can be found at
http://www.unicode.org/reports/tr29/ http://www.unicode.org/reports/tr29/
*/ */
/* The values of the constants _should_ not change from version to version, /* The values of the constants _should_ not change from version to version,
but better safe than sorry. */ but better safe than sorry. */
static_assert(U_GCB_OTHER == 0); static_assert(U_GCB_OTHER == 0);
static_assert(U_GCB_CONTROL == 1); static_assert(U_GCB_CONTROL == 1);
static_assert(U_GCB_CR == 2); static_assert(U_GCB_CR == 2);
static_assert(U_GCB_EXTEND == 3); static_assert(U_GCB_EXTEND == 3);
static_assert(U_GCB_L == 4); static_assert(U_GCB_L == 4);
static_assert(U_GCB_LF == 5); static_assert(U_GCB_LF == 5);
static_assert(U_GCB_LV == 6); static_assert(U_GCB_LV == 6);
static_assert(U_GCB_LVT == 7); static_assert(U_GCB_LVT == 7);
static_assert(U_GCB_T == 8); static_assert(U_GCB_T == 8);
static_assert(U_GCB_V == 9); static_assert(U_GCB_V == 9);
#if ICU_VERSION_MAJOR_NUM > 3 #if ICU_VERSION_MAJOR_NUM > 3 || defined(U_ICU_VERSION_MAJOR_NUM)
static_assert(U_GCB_SPACING_MARK == 10); static_assert(U_GCB_SPACING_MARK == 10);
static_assert(U_GCB_PREPEND == 11); static_assert(U_GCB_PREPEND == 11);
#if ICU_VERSION_MAJOR_NUM > 4 #if ICU_VERSION_MAJOR_NUM > 4 || defined(U_ICU_VERSION_MAJOR_NUM)
static_assert(U_GCB_REGIONAL_INDICATOR == 12); static_assert(U_GCB_REGIONAL_INDICATOR == 12);
#if U_ICU_VERSION_MAJOR_NUM > 57
static_assert(U_GCB_E_BASE == 13);
static_assert(U_GCB_E_BASE_GAZ == 14);
static_assert(U_GCB_E_MODIFIER == 15);
static_assert(U_GCB_GLUE_AFTER_ZWJ == 16);
static_assert(U_GCB_ZWJ == 17);
static_assert(U_GCB_COUNT == 18);
#endif
#endif #endif
#endif #endif
/** Table used for determining wheter a grapheme break exists between two charac ters. */ /** Table used for determining wheter a grapheme break exists between two charac ters. */
static char clusterContinuationTable[MAX_GCB_CLASS][MAX_GCB_CLASS] = { static char clusterContinuationTable[MAX_GCB_CLASS][MAX_GCB_CLASS] = {
{0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, CRLF_GRAPHEME_CLUSTER_BREAK, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, CRLF_GRAPHEME_CLUSTER_BREAK, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0}, {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0},
{0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0}, {0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0},
skipping to change at line 389 skipping to change at line 397
VECTOR_APPEND(*buffer, (UChar) c); VECTOR_APPEND(*buffer, (UChar) c);
} }
} }
/** Decompose a Grapheme Cluster according to the standard decomposition. /** Decompose a Grapheme Cluster according to the standard decomposition.
@param c The Grapheme Cluster to decompose. @param c The Grapheme Cluster to decompose.
*/ */
void decomposeChar(CharData *c) { void decomposeChar(CharData *c) {
UErrorCode error = U_ZERO_ERROR; UErrorCode error = U_ZERO_ERROR;
size_t requiredLength; size_t requiredLength;
requiredLength = unorm_normalize(c->UTF8Char.original.data, c->UTF8Char.o riginal.used, option.decomposition, 0, requiredLength = unorm2_normalize(option.decomposition, c->UTF8Char.origi nal.data, c->UTF8Char.original.used,
c->UTF8Char.converted.data, c->UTF8Char.converted.allocated, &err or); c->UTF8Char.converted.data, c->UTF8Char.converted.allocated, &err or);
if (requiredLength > c->UTF8Char.converted.allocated) { if (requiredLength > c->UTF8Char.converted.allocated) {
ASSERT(error == U_BUFFER_OVERFLOW_ERROR); ASSERT(error == U_BUFFER_OVERFLOW_ERROR);
error = U_ZERO_ERROR; error = U_ZERO_ERROR;
VECTOR_ALLOCATE(c->UTF8Char.converted, requiredLength * sizeof(UC har)); VECTOR_ALLOCATE(c->UTF8Char.converted, requiredLength * sizeof(UC har));
requiredLength = unorm_normalize(c->UTF8Char.original.data, c->UT F8Char.original.used, option.decomposition, 0, requiredLength = unorm2_normalize(option.decomposition, c->UTF8Ch ar.original.data, c->UTF8Char.original.used,
c->UTF8Char.converted.data, c->UTF8Char.converted.allocat ed, &error); c->UTF8Char.converted.data, c->UTF8Char.converted.allocat ed, &error);
} }
ASSERT(U_SUCCESS(error)); ASSERT(U_SUCCESS(error));
c->UTF8Char.converted.used = requiredLength; c->UTF8Char.converted.used = requiredLength;
} }
/** Fold the case of a Grapheme Cluster. /** Fold the case of a Grapheme Cluster.
@param c The Grapheme Cluster to case-fold. @param c The Grapheme Cluster to case-fold.
*/ */
void casefoldChar(CharData *c) { void casefoldChar(CharData *c) {
 End of changes. 7 change blocks. 
6 lines changed or deleted 14 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)