"Fossies" - the Fresh Open Source Software Archive

Member "ntfsprogs-1.12.1/libntfs/unistr.c" (28 Sep 2005, 19765 Bytes) of package /linux/misc/old/ntfsprogs-1.12.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "unistr.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * unistr.c - Unicode string handling. Part of the Linux-NTFS project.
    3  *
    4  * Copyright (c) 2000-2004 Anton Altaparmakov
    5  *
    6  * This program/include file is free software; you can redistribute it and/or
    7  * modify it under the terms of the GNU General Public License as published
    8  * by the Free Software Foundation; either version 2 of the License, or
    9  * (at your option) any later version.
   10  *
   11  * This program/include file is distributed in the hope that it will be
   12  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
   13  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   14  * GNU General Public License for more details.
   15  *
   16  * You should have received a copy of the GNU General Public License
   17  * along with this program (in the main directory of the Linux-NTFS
   18  * distribution in the file COPYING); if not, write to the Free Software
   19  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   20  */
   21 
   22 #include "config.h"
   23 
   24 #ifdef HAVE_STDIO_H
   25 #include <stdio.h>
   26 #endif
   27 #ifdef HAVE_STDLIB_H
   28 #include <stdlib.h>
   29 #endif
   30 #ifdef HAVE_WCHAR_H
   31 #include <wchar.h>
   32 #endif
   33 #ifdef HAVE_STRING_H
   34 #include <string.h>
   35 #endif
   36 #ifdef HAVE_ERRNO_H
   37 #include <errno.h>
   38 #endif
   39 
   40 #include "types.h"
   41 #include "unistr.h"
   42 #include "debug.h"
   43 
   44 /*
   45  * IMPORTANT
   46  * =========
   47  *
   48  * All these routines assume that the Unicode characters are in little endian
   49  * encoding inside the strings!!!
   50  */
   51 
   52 /*
   53  * This is used by the name collation functions to quickly determine what
   54  * characters are (in)valid.
   55  */
   56 const u8 legal_ansi_char_array[0x40] = {
   57     0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
   58     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
   59 
   60     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
   61     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
   62 
   63     0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
   64     0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
   65 
   66     0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
   67     0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
   68 };
   69 
   70 /**
   71  * ntfs_names_are_equal - compare two Unicode names for equality
   72  * @s1:         name to compare to @s2
   73  * @s1_len:     length in Unicode characters of @s1
   74  * @s2:         name to compare to @s1
   75  * @s2_len:     length in Unicode characters of @s2
   76  * @ic:         ignore case bool
   77  * @upcase:     upcase table (only if @ic == IGNORE_CASE)
   78  * @upcase_size:    length in Unicode characters of @upcase (if present)
   79  *
   80  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
   81  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
   82  * the @upcase table is used to perform a case insensitive comparison.
   83  */
   84 BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
   85         const ntfschar *s2, size_t s2_len,
   86         const IGNORE_CASE_BOOL ic,
   87         const ntfschar *upcase, const u32 upcase_size)
   88 {
   89     if (s1_len != s2_len)
   90         return FALSE;
   91     if (!s1_len)
   92         return TRUE;
   93     if (ic == CASE_SENSITIVE)
   94         return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
   95     return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
   96                                        TRUE;
   97 }
   98 
   99 /**
  100  * ntfs_names_collate - collate two Unicode names
  101  * @name1:  first Unicode name to compare
  102  * @name1_len:  length of first Unicode name to compare
  103  * @name2:  second Unicode name to compare
  104  * @name2_len:  length of second Unicode name to compare
  105  * @err_val:    if @name1 contains an invalid character return this value
  106  * @ic:     either CASE_SENSITIVE or IGNORE_CASE
  107  * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE)
  108  * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE)
  109  *
  110  * ntfs_names_collate() collates two Unicode names and returns:
  111  *
  112  *  -1 if the first name collates before the second one,
  113  *   0 if the names match,
  114  *   1 if the second name collates before the first one, or
  115  * @err_val if an invalid character is found in @name1 during the comparison.
  116  *
  117  * The following characters are considered invalid: '"', '*', '<', '>' and '?'.
  118  */
  119 int ntfs_names_collate(const ntfschar *name1, const u32 name1_len,
  120         const ntfschar *name2, const u32 name2_len,
  121         const int err_val, const IGNORE_CASE_BOOL ic,
  122         const ntfschar *upcase, const u32 upcase_len)
  123 {
  124     u32 cnt;
  125     ntfschar c1, c2;
  126 
  127 #ifdef DEBUG
  128     if (!name1 || !name2 || (ic && (!upcase || !upcase_len))) {
  129         Dputs("ntfs_names_collate received NULL pointer!");
  130         exit(1);
  131     }
  132 #endif
  133     for (cnt = 0; cnt < min(name1_len, name2_len); ++cnt)
  134     {
  135         c1 = le16_to_cpu(*name1);
  136         name1++;
  137         c2 = le16_to_cpu(*name2);
  138         name2++;
  139         if (ic) {
  140             if (c1 < upcase_len)
  141                 c1 = le16_to_cpu(upcase[c1]);
  142             if (c2 < upcase_len)
  143                 c2 = le16_to_cpu(upcase[c2]);
  144         }
  145         if (c1 < 64 && legal_ansi_char_array[c1] & 8)
  146             return err_val;
  147         if (c1 < c2)
  148             return -1;
  149         if (c1 > c2)
  150             return 1;
  151     }
  152     if (name1_len < name2_len)
  153         return -1;
  154     if (name1_len == name2_len)
  155         return 0;
  156     /* name1_len > name2_len */
  157     c1 = le16_to_cpu(*name1);
  158     if (c1 < 64 && legal_ansi_char_array[c1] & 8)
  159         return err_val;
  160     return 1;
  161 }
  162 
  163 /**
  164  * ntfs_ucsncmp - compare two little endian Unicode strings
  165  * @s1:     first string
  166  * @s2:     second string
  167  * @n:      maximum unicode characters to compare
  168  *
  169  * Compare the first @n characters of the Unicode strings @s1 and @s2,
  170  * The strings in little endian format and appropriate le16_to_cpu()
  171  * conversion is performed on non-little endian machines.
  172  *
  173  * The function returns an integer less than, equal to, or greater than zero
  174  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
  175  * to be less than, to match, or be greater than @s2.
  176  */
  177 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
  178 {
  179     ntfschar c1, c2;
  180     size_t i;
  181 
  182 #ifdef DEBUG
  183     if (!s1 || !s2) {
  184         Dputs("ntfs_wcsncmp() received NULL pointer!");
  185         exit(1);
  186     }
  187 #endif
  188     for (i = 0; i < n; ++i) {
  189         c1 = le16_to_cpu(s1[i]);
  190         c2 = le16_to_cpu(s2[i]);
  191         if (c1 < c2)
  192             return -1;
  193         if (c1 > c2)
  194             return 1;
  195         if (!c1)
  196             break;
  197     }
  198     return 0;
  199 }
  200 
  201 /**
  202  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
  203  * @s1:         first string
  204  * @s2:         second string
  205  * @n:          maximum unicode characters to compare
  206  * @upcase:     upcase table
  207  * @upcase_size:    upcase table size in Unicode characters
  208  *
  209  * Compare the first @n characters of the Unicode strings @s1 and @s2,
  210  * ignoring case. The strings in little endian format and appropriate
  211  * le16_to_cpu() conversion is performed on non-little endian machines.
  212  *
  213  * Each character is uppercased using the @upcase table before the comparison.
  214  *
  215  * The function returns an integer less than, equal to, or greater than zero
  216  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
  217  * to be less than, to match, or be greater than @s2.
  218  */
  219 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
  220         const ntfschar *upcase, const u32 upcase_size)
  221 {
  222     ntfschar c1, c2;
  223     size_t i;
  224 
  225 #ifdef DEBUG
  226     if (!s1 || !s2 || !upcase) {
  227         Dputs("ntfs_wcsncasecmp() received NULL pointer!");
  228         exit(1);
  229     }
  230 #endif
  231     for (i = 0; i < n; ++i) {
  232         if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
  233             c1 = le16_to_cpu(upcase[c1]);
  234         if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
  235             c2 = le16_to_cpu(upcase[c2]);
  236         if (c1 < c2)
  237             return -1;
  238         if (c1 > c2)
  239             return 1;
  240         if (!c1)
  241             break;
  242     }
  243     return 0;
  244 }
  245 
  246 /**
  247  * ntfs_ucsnlen - determine the length of a little endian Unicode string
  248  * @s:      pointer to Unicode string
  249  * @maxlen: maximum length of string @s
  250  *
  251  * Return the number of Unicode characters in the little endian Unicode
  252  * string @s up to a maximum of maxlen Unicode characters, not including
  253  * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s
  254  * and @s + @maxlen, @maxlen is returned.
  255  *
  256  * This function never looks beyond @s + @maxlen.
  257  */
  258 u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
  259 {
  260     u32 i;
  261 
  262     for (i = 0; i < maxlen; i++) {
  263         if (!le16_to_cpu(s[i]))
  264             break;
  265     }
  266     return i;
  267 }
  268 
  269 /**
  270  * ntfs_ucsndup - duplicate little endian Unicode string
  271  * @s:      pointer to Unicode string
  272  * @maxlen: maximum length of string @s
  273  *
  274  * Return a pointer to a new little endian Unicode string which is a duplicate
  275  * of the string s.  Memory for the new string is obtained with malloc(3), and
  276  * can be freed with free(3).
  277  *
  278  * A maximum of @maxlen Unicode characters are copied and a terminating
  279  * (ntfschar)'\0' little endian Unicode character is added.
  280  *
  281  * This function never looks beyond @s + @maxlen.
  282  *
  283  * Return a pointer to the new little endian Unicode string on success and NULL
  284  * on failure with errno set to the error code.
  285  */
  286 ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
  287 {
  288     ntfschar *dst;
  289     u32 len;
  290 
  291     len = ntfs_ucsnlen(s, maxlen);
  292     dst = malloc((len + 1) * sizeof(ntfschar));
  293     if (dst) {
  294         memcpy(dst, s, len * sizeof(ntfschar));
  295         dst[len] = cpu_to_le16(L'\0');
  296     }
  297     return dst;
  298 }
  299 
  300 /**
  301  * ntfs_name_upcase
  302  */
  303 void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
  304         const u32 upcase_len)
  305 {
  306     u32 i;
  307     ntfschar u;
  308 
  309     for (i = 0; i < name_len; i++)
  310         if ((u = le16_to_cpu(name[i])) < upcase_len)
  311             name[i] = upcase[u];
  312 }
  313 
  314 /**
  315  * ntfs_file_value_upcase
  316  */
  317 void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
  318         const ntfschar *upcase, const u32 upcase_len)
  319 {
  320     ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
  321             file_name_attr->file_name_length, upcase, upcase_len);
  322 }
  323 
  324 /**
  325  * ntfs_file_values_compare
  326  */
  327 int ntfs_file_values_compare(const FILE_NAME_ATTR *file_name_attr1,
  328         const FILE_NAME_ATTR *file_name_attr2,
  329         const int err_val, const IGNORE_CASE_BOOL ic,
  330         const ntfschar *upcase, const u32 upcase_len)
  331 {
  332     return ntfs_names_collate((ntfschar*)&file_name_attr1->file_name,
  333             file_name_attr1->file_name_length,
  334             (ntfschar*)&file_name_attr2->file_name,
  335             file_name_attr2->file_name_length,
  336             err_val, ic, upcase, upcase_len);
  337 }
  338 
  339 /**
  340  * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string
  341  * @ins:    input Unicode string buffer
  342  * @ins_len:    length of input string in Unicode characters
  343  * @outs:   on return contains the (allocated) output multibyte string
  344  * @outs_len:   length of output buffer in bytes
  345  *
  346  * Convert the input little endian, 2-byte Unicode string @ins, of length
  347  * @ins_len into the multibyte string format dictated by the current locale.
  348  *
  349  * If *@outs is NULL, the function allocates the string and the caller is
  350  * responsible for calling free(*@outs); when finished with it.
  351  *
  352  * On success the function returns the number of bytes written to the output
  353  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
  354  * string buffer was allocated, *@outs is set to it.
  355  *
  356  * On error, -1 is returned, and errno is set to the error code. The following
  357  * error codes can be expected:
  358  *  EINVAL      Invalid arguments (e.g. @ins or @outs is NULL).
  359  *  EILSEQ      The input string cannot be represented as a multibyte
  360  *          sequence according to the current locale.
  361  *  ENAMETOOLONG    Destination buffer is too small for input string.
  362  *  ENOMEM      Not enough memory to allocate destination buffer.
  363  */
  364 int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
  365         int outs_len)
  366 {
  367     char *mbs;
  368     wchar_t wc;
  369     int i, o, mbs_len;
  370     int cnt = 0;
  371 #ifdef HAVE_MBSINIT
  372     mbstate_t mbstate;
  373 #endif
  374 
  375     if (!ins || !outs) {
  376         errno = EINVAL;
  377         return -1;
  378     }
  379     mbs = *outs;
  380     mbs_len = outs_len;
  381     if (mbs && !mbs_len) {
  382         errno = ENAMETOOLONG;
  383         return -1;
  384     }
  385     if (!mbs) {
  386         mbs_len = (ins_len + 1) * MB_CUR_MAX;
  387         mbs = (char*)malloc(mbs_len);
  388         if (!mbs)
  389             return -1;
  390     }
  391 #ifdef HAVE_MBSINIT
  392     memset(&mbstate, 0, sizeof(mbstate));
  393 #else
  394     wctomb(NULL, 0);
  395 #endif
  396     for (i = o = 0; i < ins_len; i++) {
  397         /* Reallocate memory if necessary or abort. */
  398         if ((int)(o + MB_CUR_MAX) > mbs_len) {
  399             char *tc;
  400             if (mbs == *outs) {
  401                 errno = ENAMETOOLONG;
  402                 return -1;
  403             }
  404             tc = (char*)malloc((mbs_len + 64) & ~63);
  405             if (!tc)
  406                 goto err_out;
  407             memcpy(tc, mbs, mbs_len);
  408             mbs_len = (mbs_len + 64) & ~63;
  409             free(mbs);
  410             mbs = tc;
  411         }
  412         /* Convert the LE Unicode character to a CPU wide character. */
  413         wc = (wchar_t)le16_to_cpu(ins[i]);
  414         if (!wc)
  415             break;
  416         /* Convert the CPU endian wide character to multibyte. */
  417 #ifdef HAVE_MBSINIT
  418         cnt = wcrtomb(mbs + o, wc, &mbstate);
  419 #else
  420         cnt = wctomb(mbs + o, wc);
  421 #endif
  422         if (cnt == -1)
  423             goto err_out;
  424         if (cnt <= 0) {
  425             Dprintf("Eeek. cnt <= 0, cnt = %i\n", cnt);
  426             errno = EINVAL;
  427             goto err_out;
  428         }
  429         o += cnt;
  430     }
  431 #ifdef HAVE_MBSINIT
  432     /* Make sure we are back in the initial state. */
  433     if (!mbsinit(&mbstate)) {
  434         Dputs("Eeek. mbstate not in initial state!");
  435         errno = EILSEQ;
  436         goto err_out;
  437     }
  438 #endif
  439     /* Now write the NULL character. */
  440     mbs[o] = '\0';
  441     if (*outs != mbs)
  442         *outs = mbs;
  443     return o;
  444 err_out:
  445     if (mbs != *outs) {
  446         int eo = errno;
  447         free(mbs);
  448         errno = eo;
  449     }
  450     return -1;
  451 }
  452 
  453 /**
  454  * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
  455  * @ins:    input multibyte string buffer
  456  * @outs:   on return contains the (allocated) output Unicode string
  457  * @outs_len:   length of output buffer in Unicode characters
  458  *
  459  * Convert the input multibyte string @ins, from the current locale into the
  460  * corresponding little endian, 2-byte Unicode string.
  461  *
  462  * If *@outs is NULL, the function allocates the string and the caller is
  463  * responsible for calling free(*@outs); when finished with it.
  464  *
  465  * On success the function returns the number of Unicode characters written to
  466  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
  467  * character. If the output string buffer was allocated, *@outs is set to it.
  468  *
  469  * On error, -1 is returned, and errno is set to the error code. The following
  470  * error codes can be expected:
  471  *  EINVAL      Invalid arguments (e.g. @ins or @outs is NULL).
  472  *  EILSEQ      The input string cannot be represented as a Unicode
  473  *          string according to the current locale.
  474  *  ENAMETOOLONG    Destination buffer is too small for input string.
  475  *  ENOMEM      Not enough memory to allocate destination buffer.
  476  */
  477 int ntfs_mbstoucs(const char *ins, ntfschar **outs, int outs_len)
  478 {
  479     ntfschar *ucs;
  480     const char *s;
  481     wchar_t wc;
  482     int i, o, cnt, ins_len, ucs_len, ins_size;
  483 #ifdef HAVE_MBSINIT
  484     mbstate_t mbstate;
  485 #endif
  486 
  487     if (!ins || !outs) {
  488         errno = EINVAL;
  489         return -1;
  490     }
  491     ucs = *outs;
  492     ucs_len = outs_len;
  493     if (ucs && !ucs_len) {
  494         errno = ENAMETOOLONG;
  495         return -1;
  496     }
  497     /* Determine the size of the multi-byte string in bytes. */
  498     ins_size = strlen(ins);
  499     /* Determine the length of the multi-byte string. */
  500     s = ins;
  501 #if defined(HAVE_MBSINIT)
  502     memset(&mbstate, 0, sizeof(mbstate));
  503     ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
  504 #ifdef __CYGWIN32__
  505     if (!ins_len && *ins) {
  506         /* Older Cygwin had broken mbsrtowcs() implementation. */
  507         ins_len = strlen(ins);
  508     }
  509 #endif
  510 #elif !defined(DJGPP)
  511     ins_len = mbstowcs(NULL, s, 0);
  512 #else
  513     /* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
  514     ins_len = strlen(ins);
  515 #endif
  516     if (ins_len == -1)
  517         return ins_len;
  518 #ifdef HAVE_MBSINIT
  519     if ((s != ins) || !mbsinit(&mbstate)) {
  520 #else
  521     if (s != ins) {
  522 #endif
  523         errno = EILSEQ;
  524         return -1;
  525     }
  526     /* Add the NULL terminator. */
  527     ins_len++;
  528     if (!ucs) {
  529         ucs_len = ins_len;
  530         ucs = (ntfschar*)malloc(ucs_len * sizeof(ntfschar));
  531         if (!ucs)
  532             return -1;
  533     }
  534 #ifdef HAVE_MBSINIT
  535     memset(&mbstate, 0, sizeof(mbstate));
  536 #else
  537     mbtowc(NULL, NULL, 0);
  538 #endif
  539     for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
  540         /* Reallocate memory if necessary or abort. */
  541         if (o >= ucs_len) {
  542             ntfschar *tc;
  543             if (ucs == *outs) {
  544                 errno = ENAMETOOLONG;
  545                 return -1;
  546             }
  547             /*
  548              * We will never get here but hey, it's only a bit of
  549              * extra code...
  550              */
  551             ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
  552             tc = (ntfschar*)realloc(ucs, ucs_len);
  553             if (!tc)
  554                 goto err_out;
  555             ucs = tc;
  556             ucs_len /= sizeof(ntfschar);
  557         }
  558         /* Convert the multibyte character to a wide character. */
  559 #ifdef HAVE_MBSINIT
  560         cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
  561 #else
  562         cnt = mbtowc(&wc, ins + i, ins_size - i);
  563 #endif
  564         if (!cnt)
  565             break;
  566         if (cnt == -1)
  567             goto err_out;
  568         if (cnt < -1) {
  569             Dprintf("%s(): Eeek. cnt = %i\n", __FUNCTION__, cnt);
  570             errno = EINVAL;
  571             goto err_out;
  572         }
  573         /* Make sure we are not overflowing the NTFS Unicode set. */
  574         if ((unsigned long)wc >= (unsigned long)(1 <<
  575                 (8 * sizeof(ntfschar)))) {
  576             errno = EILSEQ;
  577             goto err_out;
  578         }
  579         /* Convert the CPU wide character to a LE Unicode character. */
  580         ucs[o] = cpu_to_le16(wc);
  581     }
  582 #ifdef HAVE_MBSINIT
  583     /* Make sure we are back in the initial state. */
  584     if (!mbsinit(&mbstate)) {
  585         Dprintf("%s(): Eeek. mbstate not in initial state!\n",
  586                 __FUNCTION__);
  587         errno = EILSEQ;
  588         goto err_out;
  589     }
  590 #endif
  591     /* Now write the NULL character. */
  592     ucs[o] = cpu_to_le16(L'\0');
  593     if (*outs != ucs)
  594         *outs = ucs;
  595     return o;
  596 err_out:
  597     if (ucs != *outs) {
  598         int eo = errno;
  599         free(ucs);
  600         errno = eo;
  601     }
  602     return -1;
  603 }
  604 
  605 /**
  606  * ntfs_upcase_table_build - build the default upcase table for NTFS
  607  * @uc:     destination buffer where to store the built table
  608  * @uc_len: size of destination buffer in bytes
  609  *
  610  * ntfs_upcase_table_build() builds the default upcase table for NTFS and
  611  * stores it in the caller supplied buffer @uc of size @uc_len.
  612  *
  613  * Note, @uc_len must be at least 128kiB in size or bad things will happen!
  614  */
  615 void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
  616 {
  617     static int uc_run_table[][3] = { /* Start, End, Add */
  618     {0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
  619     {0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
  620     {0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
  621     {0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
  622     {0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
  623     {0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
  624     {0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
  625     {0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
  626     {0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
  627     {0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
  628     {0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
  629     {0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
  630     {0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
  631     {0}
  632     };
  633     static int uc_dup_table[][2] = { /* Start, End */
  634     {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
  635     {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
  636     {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
  637     {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
  638     {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
  639     {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
  640     {0}
  641     };
  642     static int uc_byte_table[][2] = { /* Offset, Value */
  643     {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
  644     {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
  645     {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
  646     {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
  647     {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
  648     {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
  649     {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
  650     {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
  651     {0}
  652     };
  653     int i, r;
  654 
  655     memset((char*)uc, 0, uc_len);
  656     uc_len >>= 1;
  657     if (uc_len > 65536)
  658         uc_len = 65536;
  659     for (i = 0; (u32)i < uc_len; i++)
  660         uc[i] = i;
  661     for (r = 0; uc_run_table[r][0]; r++)
  662         for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
  663             uc[i] += uc_run_table[r][2];
  664     for (r = 0; uc_dup_table[r][0]; r++)
  665         for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
  666             uc[i + 1]--;
  667     for (r = 0; uc_byte_table[r][0]; r++)
  668         uc[uc_byte_table[r][0]] = uc_byte_table[r][1];
  669 }