"Fossies" - the Fresh Open Source Software Archive

Member "ntfs-3g_ntfsprogs-2017.3.23/libntfs-3g/unistr.c" (23 Mar 2017, 48777 Bytes) of package /linux/misc/ntfs-3g_ntfsprogs-2017.3.23.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "unistr.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3g_ntfsprogs-2016.2.22_vs_3g_ntfsprogs-2017.3.23.

    1 /**
    2  * unistr.c - Unicode string handling. Originated from the Linux-NTFS project.
    3  *
    4  * Copyright (c) 2000-2004 Anton Altaparmakov
    5  * Copyright (c) 2002-2009 Szabolcs Szakacsits
    6  * Copyright (c) 2008-2015 Jean-Pierre Andre
    7  * Copyright (c) 2008      Bernhard Kaindl
    8  *
    9  * This program/include file is free software; you can redistribute it and/or
   10  * modify it under the terms of the GNU General Public License as published
   11  * by the Free Software Foundation; either version 2 of the License, or
   12  * (at your option) any later version.
   13  *
   14  * This program/include file is distributed in the hope that it will be
   15  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
   16  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17  * GNU General Public License for more details.
   18  *
   19  * You should have received a copy of the GNU General Public License
   20  * along with this program (in the main directory of the NTFS-3G
   21  * distribution in the file COPYING); if not, write to the Free Software
   22  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23  */
   24 
   25 #ifdef HAVE_CONFIG_H
   26 #include "config.h"
   27 #endif
   28 
   29 #ifdef HAVE_STDIO_H
   30 #include <stdio.h>
   31 #endif
   32 #ifdef HAVE_STDLIB_H
   33 #include <stdlib.h>
   34 #endif
   35 #ifdef HAVE_WCHAR_H
   36 #include <wchar.h>
   37 #endif
   38 #ifdef HAVE_STRING_H
   39 #include <string.h>
   40 #endif
   41 #ifdef HAVE_ERRNO_H
   42 #include <errno.h>
   43 #endif
   44 #ifdef HAVE_LOCALE_H
   45 #include <locale.h>
   46 #endif
   47 
   48 #if defined(__APPLE__) || defined(__DARWIN__)
   49 #ifdef ENABLE_NFCONV
   50 #include <CoreFoundation/CoreFoundation.h>
   51 #endif /* ENABLE_NFCONV */
   52 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
   53 
   54 #include "compat.h"
   55 #include "attrib.h"
   56 #include "types.h"
   57 #include "unistr.h"
   58 #include "debug.h"
   59 #include "logging.h"
   60 #include "misc.h"
   61 
   62 #ifndef ALLOW_BROKEN_UNICODE
   63 /* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
   64  * open to debate. */
   65 #define ALLOW_BROKEN_UNICODE 1
   66 #endif /* !defined(ALLOW_BROKEN_UNICODE) */
   67 
   68 /*
   69  * IMPORTANT
   70  * =========
   71  *
   72  * All these routines assume that the Unicode characters are in little endian
   73  * encoding inside the strings!!!
   74  */
   75 
   76 static int use_utf8 = 1; /* use UTF-8 encoding for file names */
   77 
   78 #if defined(__APPLE__) || defined(__DARWIN__)
   79 #ifdef ENABLE_NFCONV
   80 /**
   81  * This variable controls whether or not automatic normalization form conversion
   82  * should be performed when translating NTFS unicode file names to UTF-8.
   83  * Defaults to on, but can be controlled from the outside using the function
   84  *   int ntfs_macosx_normalize_filenames(int normalize);
   85  */
   86 static int nfconvert_utf8 = 1;
   87 #endif /* ENABLE_NFCONV */
   88 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
   89 
   90 /*
   91  * This is used by the name collation functions to quickly determine what
   92  * characters are (in)valid.
   93  */
   94 #if 0
   95 static const u8 legal_ansi_char_array[0x40] = {
   96     0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
   97     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
   98 
   99     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
  100     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
  101 
  102     0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
  103     0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
  104 
  105     0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
  106     0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
  107 };
  108 #endif
  109 
  110 /**
  111  * ntfs_names_are_equal - compare two Unicode names for equality
  112  * @s1:         name to compare to @s2
  113  * @s1_len:     length in Unicode characters of @s1
  114  * @s2:         name to compare to @s1
  115  * @s2_len:     length in Unicode characters of @s2
  116  * @ic:         ignore case bool
  117  * @upcase:     upcase table (only if @ic == IGNORE_CASE)
  118  * @upcase_size:    length in Unicode characters of @upcase (if present)
  119  *
  120  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
  121  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
  122  * the @upcase table is used to perform a case insensitive comparison.
  123  */
  124 BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
  125         const ntfschar *s2, size_t s2_len,
  126         const IGNORE_CASE_BOOL ic,
  127         const ntfschar *upcase, const u32 upcase_size)
  128 {
  129     if (s1_len != s2_len)
  130         return FALSE;
  131     if (!s1_len)
  132         return TRUE;
  133     if (ic == CASE_SENSITIVE)
  134         return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
  135     return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
  136                                        TRUE;
  137 }
  138 
  139 /*
  140  * ntfs_names_full_collate() fully collate two Unicode names
  141  *
  142  * @name1:  first Unicode name to compare
  143  * @name1_len:  length of first Unicode name to compare
  144  * @name2:  second Unicode name to compare
  145  * @name2_len:  length of second Unicode name to compare
  146  * @ic:     either CASE_SENSITIVE or IGNORE_CASE (see below)
  147  * @upcase: upcase table
  148  * @upcase_len: upcase table size
  149  *
  150  * If @ic is CASE_SENSITIVE, then the names are compared primarily ignoring
  151  * case, but if the names are equal ignoring case, then they are compared
  152  * case-sensitively.  As an example, "abc" would collate before "BCD" (since
  153  * "abc" and "BCD" differ ignoring case and 'A' < 'B') but after "ABC" (since
  154  * "ABC" and "abc" are equal ignoring case and 'A' < 'a').  This matches the
  155  * collation order of filenames as indexed in NTFS directories.
  156  *
  157  * If @ic is IGNORE_CASE, then the names are only compared case-insensitively
  158  * and are considered to match if and only if they are equal ignoring case.
  159  *
  160  * Returns:
  161  *  -1 if the first name collates before the second one,
  162  *   0 if the names match, or
  163  *   1 if the second name collates before the first one
  164  */
  165 int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
  166         const ntfschar *name2, const u32 name2_len,
  167         const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
  168         const u32 upcase_len)
  169 {
  170     u32 cnt;
  171     u16 c1, c2;
  172     u16 u1, u2;
  173 
  174 #ifdef DEBUG
  175     if (!name1 || !name2 || !upcase || !upcase_len) {
  176         ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");
  177         exit(1);
  178     }
  179 #endif
  180     cnt = min(name1_len, name2_len);
  181     if (cnt > 0) {
  182         if (ic == CASE_SENSITIVE) {
  183             while (--cnt && (*name1 == *name2)) {
  184                 name1++;
  185                 name2++;
  186             }
  187             u1 = c1 = le16_to_cpu(*name1);
  188             u2 = c2 = le16_to_cpu(*name2);
  189             if (u1 < upcase_len)
  190                 u1 = le16_to_cpu(upcase[u1]);
  191             if (u2 < upcase_len)
  192                 u2 = le16_to_cpu(upcase[u2]);
  193             if ((u1 == u2) && cnt)
  194                 do {
  195                     name1++;
  196                     u1 = le16_to_cpu(*name1);
  197                     name2++;
  198                     u2 = le16_to_cpu(*name2);
  199                     if (u1 < upcase_len)
  200                         u1 = le16_to_cpu(upcase[u1]);
  201                     if (u2 < upcase_len)
  202                         u2 = le16_to_cpu(upcase[u2]);
  203                 } while ((u1 == u2) && --cnt);
  204             if (u1 < u2)
  205                 return -1;
  206             if (u1 > u2)
  207                 return 1;
  208             if (name1_len < name2_len)
  209                 return -1;
  210             if (name1_len > name2_len)
  211                 return 1;
  212             if (c1 < c2)
  213                 return -1;
  214             if (c1 > c2)
  215                 return 1;
  216         } else {
  217             do {
  218                 u1 = le16_to_cpu(*name1);
  219                 name1++;
  220                 u2 = le16_to_cpu(*name2);
  221                 name2++;
  222                 if (u1 < upcase_len)
  223                     u1 = le16_to_cpu(upcase[u1]);
  224                 if (u2 < upcase_len)
  225                     u2 = le16_to_cpu(upcase[u2]);
  226             } while ((u1 == u2) && --cnt);
  227             if (u1 < u2)
  228                 return -1;
  229             if (u1 > u2)
  230                 return 1;
  231             if (name1_len < name2_len)
  232                 return -1;
  233             if (name1_len > name2_len)
  234                 return 1;
  235         }
  236     } else {
  237         if (name1_len < name2_len)
  238             return -1;
  239         if (name1_len > name2_len)
  240             return 1;
  241     }
  242     return 0;
  243 }
  244 
  245 /**
  246  * ntfs_ucsncmp - compare two little endian Unicode strings
  247  * @s1:     first string
  248  * @s2:     second string
  249  * @n:      maximum unicode characters to compare
  250  *
  251  * Compare the first @n characters of the Unicode strings @s1 and @s2,
  252  * The strings in little endian format and appropriate le16_to_cpu()
  253  * conversion is performed on non-little endian machines.
  254  *
  255  * The function returns an integer less than, equal to, or greater than zero
  256  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
  257  * to be less than, to match, or be greater than @s2.
  258  */
  259 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
  260 {
  261     u16 c1, c2;
  262     size_t i;
  263 
  264 #ifdef DEBUG
  265     if (!s1 || !s2) {
  266         ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");
  267         exit(1);
  268     }
  269 #endif
  270     for (i = 0; i < n; ++i) {
  271         c1 = le16_to_cpu(s1[i]);
  272         c2 = le16_to_cpu(s2[i]);
  273         if (c1 < c2)
  274             return -1;
  275         if (c1 > c2)
  276             return 1;
  277         if (!c1)
  278             break;
  279     }
  280     return 0;
  281 }
  282 
  283 /**
  284  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
  285  * @s1:         first string
  286  * @s2:         second string
  287  * @n:          maximum unicode characters to compare
  288  * @upcase:     upcase table
  289  * @upcase_size:    upcase table size in Unicode characters
  290  *
  291  * Compare the first @n characters of the Unicode strings @s1 and @s2,
  292  * ignoring case. The strings in little endian format and appropriate
  293  * le16_to_cpu() conversion is performed on non-little endian machines.
  294  *
  295  * Each character is uppercased using the @upcase table before the comparison.
  296  *
  297  * The function returns an integer less than, equal to, or greater than zero
  298  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
  299  * to be less than, to match, or be greater than @s2.
  300  */
  301 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
  302         const ntfschar *upcase, const u32 upcase_size)
  303 {
  304     u16 c1, c2;
  305     size_t i;
  306 
  307 #ifdef DEBUG
  308     if (!s1 || !s2 || !upcase) {
  309         ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");
  310         exit(1);
  311     }
  312 #endif
  313     for (i = 0; i < n; ++i) {
  314         if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
  315             c1 = le16_to_cpu(upcase[c1]);
  316         if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
  317             c2 = le16_to_cpu(upcase[c2]);
  318         if (c1 < c2)
  319             return -1;
  320         if (c1 > c2)
  321             return 1;
  322         if (!c1)
  323             break;
  324     }
  325     return 0;
  326 }
  327 
  328 /**
  329  * ntfs_ucsnlen - determine the length of a little endian Unicode string
  330  * @s:      pointer to Unicode string
  331  * @maxlen: maximum length of string @s
  332  *
  333  * Return the number of Unicode characters in the little endian Unicode
  334  * string @s up to a maximum of maxlen Unicode characters, not including
  335  * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s
  336  * and @s + @maxlen, @maxlen is returned.
  337  *
  338  * This function never looks beyond @s + @maxlen.
  339  */
  340 u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
  341 {
  342     u32 i;
  343 
  344     for (i = 0; i < maxlen; i++) {
  345         if (!le16_to_cpu(s[i]))
  346             break;
  347     }
  348     return i;
  349 }
  350 
  351 /**
  352  * ntfs_ucsndup - duplicate little endian Unicode string
  353  * @s:      pointer to Unicode string
  354  * @maxlen: maximum length of string @s
  355  *
  356  * Return a pointer to a new little endian Unicode string which is a duplicate
  357  * of the string s.  Memory for the new string is obtained with ntfs_malloc(3),
  358  * and can be freed with free(3).
  359  *
  360  * A maximum of @maxlen Unicode characters are copied and a terminating
  361  * (ntfschar)'\0' little endian Unicode character is added.
  362  *
  363  * This function never looks beyond @s + @maxlen.
  364  *
  365  * Return a pointer to the new little endian Unicode string on success and NULL
  366  * on failure with errno set to the error code.
  367  */
  368 ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
  369 {
  370     ntfschar *dst;
  371     u32 len;
  372 
  373     len = ntfs_ucsnlen(s, maxlen);
  374     dst = ntfs_malloc((len + 1) * sizeof(ntfschar));
  375     if (dst) {
  376         memcpy(dst, s, len * sizeof(ntfschar));
  377         dst[len] = const_cpu_to_le16(L'\0');
  378     }
  379     return dst;
  380 }
  381 
  382 /**
  383  * ntfs_name_upcase - Map an Unicode name to its uppercase equivalent
  384  * @name:
  385  * @name_len:
  386  * @upcase:
  387  * @upcase_len:
  388  *
  389  * Description...
  390  *
  391  * Returns:
  392  */
  393 void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
  394         const u32 upcase_len)
  395 {
  396     u32 i;
  397     u16 u;
  398 
  399     for (i = 0; i < name_len; i++)
  400         if ((u = le16_to_cpu(name[i])) < upcase_len)
  401             name[i] = upcase[u];
  402 }
  403 
  404 /**
  405  * ntfs_name_locase - Map a Unicode name to its lowercase equivalent
  406  */
  407 void ntfs_name_locase(ntfschar *name, u32 name_len, const ntfschar *locase,
  408         const u32 locase_len)
  409 {
  410     u32 i;
  411     u16 u;
  412 
  413     if (locase)
  414         for (i = 0; i < name_len; i++)
  415             if ((u = le16_to_cpu(name[i])) < locase_len)
  416                 name[i] = locase[u];
  417 }
  418 
  419 /**
  420  * ntfs_file_value_upcase - Convert a filename to upper case
  421  * @file_name_attr:
  422  * @upcase:
  423  * @upcase_len:
  424  *
  425  * Description...
  426  *
  427  * Returns:
  428  */
  429 void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
  430         const ntfschar *upcase, const u32 upcase_len)
  431 {
  432     ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
  433             file_name_attr->file_name_length, upcase, upcase_len);
  434 }
  435 
  436 /*
  437    NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough
  438    for now]) for path names, but the Unicode code points need to be
  439    converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI,
  440    glibc does this even without a locale in a hard-coded fashion as that
  441    appears to be is easy because the low 7-bit ASCII range appears to be
  442    available in all charsets but it does not convert anything if
  443    there was some error with the locale setup or none set up like
  444    when mount is called during early boot where he (by policy) do
  445    not use locales (and may be not available if /usr is not yet mounted),
  446    so this patch fixes the resulting issues for systems which use
  447    UTF-8 and for others, specifying the locale in fstab brings them
  448    the encoding which they want.
  449   
  450    If no locale is defined or there was a problem with setting one
  451    up and whenever nl_langinfo(CODESET) returns a sting starting with
  452    "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix
  453    the bug where NTFS-3G does not show any path names which include
  454    international characters!!! (and also fails on creating them) as result.
  455   
  456    Author: Bernhard Kaindl <bk@suse.de>
  457    Jean-Pierre Andre made it compliant with RFC3629/RFC2781.
  458 */
  459  
  460 /* 
  461  * Return the number of bytes in UTF-8 needed (without the terminating null) to
  462  * store the given UTF-16LE string.
  463  *
  464  * On error, -1 is returned, and errno is set to the error code. The following
  465  * error codes can be expected:
  466  *  EILSEQ      The input string is not valid UTF-16LE (only possible
  467  *          if compiled without ALLOW_BROKEN_UNICODE).
  468  *  ENAMETOOLONG    The length of the UTF-8 string in bytes (without the
  469  *          terminating null) would exceed @outs_len.
  470  */
  471 static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
  472 {
  473     int i, ret = -1;
  474     int count = 0;
  475     BOOL surrog;
  476 
  477     surrog = FALSE;
  478     for (i = 0; i < ins_len && ins[i] && count <= outs_len; i++) {
  479         unsigned short c = le16_to_cpu(ins[i]);
  480         if (surrog) {
  481             if ((c >= 0xdc00) && (c < 0xe000)) {
  482                 surrog = FALSE;
  483                 count += 4;
  484             } else {
  485 #if ALLOW_BROKEN_UNICODE
  486                 /* The first UTF-16 unit of a surrogate pair has
  487                  * a value between 0xd800 and 0xdc00. It can be
  488                  * encoded as an individual UTF-8 sequence if we
  489                  * cannot combine it with the next UTF-16 unit
  490                  * unit as a surrogate pair. */
  491                 surrog = FALSE;
  492                 count += 3;
  493 
  494                 --i;
  495                 continue;
  496 #else
  497                 goto fail;
  498 #endif /* ALLOW_BROKEN_UNICODE */
  499             }
  500         } else
  501             if (c < 0x80)
  502                 count++;
  503             else if (c < 0x800)
  504                 count += 2;
  505             else if (c < 0xd800)
  506                 count += 3;
  507             else if (c < 0xdc00)
  508                 surrog = TRUE;
  509 #if ALLOW_BROKEN_UNICODE
  510             else if (c < 0xe000)
  511                 count += 3;
  512             else if (c >= 0xe000)
  513 #else
  514             else if ((c >= 0xe000) && (c < 0xfffe))
  515 #endif /* ALLOW_BROKEN_UNICODE */
  516                 count += 3;
  517             else 
  518                 goto fail;
  519     }
  520 
  521     if (surrog && count <= outs_len) {
  522 #if ALLOW_BROKEN_UNICODE
  523         count += 3; /* ending with a single surrogate */
  524 #else
  525         goto fail;
  526 #endif /* ALLOW_BROKEN_UNICODE */
  527     }
  528 
  529     if (count > outs_len) {
  530         errno = ENAMETOOLONG;
  531         goto out;
  532     }
  533 
  534     ret = count;
  535 out:
  536     return ret;
  537 fail:
  538     errno = EILSEQ;
  539     goto out;
  540 }
  541 
  542 /*
  543  * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string
  544  * @ins:    input utf16 string buffer
  545  * @ins_len:    length of input string in utf16 characters
  546  * @outs:   on return contains the (allocated) output multibyte string
  547  * @outs_len:   length of output buffer in bytes (ignored if *@outs is NULL)
  548  *
  549  * Return -1 with errno set if string has invalid byte sequence or too long.
  550  */
  551 static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
  552                   char **outs, int outs_len)
  553 {
  554 #if defined(__APPLE__) || defined(__DARWIN__)
  555 #ifdef ENABLE_NFCONV
  556     char *original_outs_value = *outs;
  557     int original_outs_len = outs_len;
  558 #endif /* ENABLE_NFCONV */
  559 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
  560 
  561     char *t;
  562     int i, size, ret = -1;
  563     int halfpair;
  564 
  565     halfpair = 0;
  566     if (!*outs) {
  567         /* If no output buffer was provided, we will allocate one and
  568          * limit its length to PATH_MAX.  Note: we follow the standard
  569          * convention of PATH_MAX including the terminating null. */
  570         outs_len = PATH_MAX;
  571     }
  572 
  573     /* The size *with* the terminating null is limited to @outs_len,
  574      * so the size *without* the terminating null is limited to one less. */
  575     size = utf16_to_utf8_size(ins, ins_len, outs_len - 1);
  576 
  577     if (size < 0)
  578         goto out;
  579 
  580     if (!*outs) {
  581         outs_len = size + 1;
  582         *outs = ntfs_malloc(outs_len);
  583         if (!*outs)
  584             goto out;
  585     }
  586 
  587     t = *outs;
  588 
  589     for (i = 0; i < ins_len && ins[i]; i++) {
  590         unsigned short c = le16_to_cpu(ins[i]);
  591             /* size not double-checked */
  592         if (halfpair) {
  593             if ((c >= 0xdc00) && (c < 0xe000)) {
  594                 *t++ = 0xf0 + (((halfpair + 64) >> 8) & 7);
  595                 *t++ = 0x80 + (((halfpair + 64) >> 2) & 63);
  596                 *t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
  597                 *t++ = 0x80 + (c & 63);
  598                 halfpair = 0;
  599             } else {
  600 #if ALLOW_BROKEN_UNICODE
  601                 /* The first UTF-16 unit of a surrogate pair has
  602                  * a value between 0xd800 and 0xdc00. It can be
  603                  * encoded as an individual UTF-8 sequence if we
  604                  * cannot combine it with the next UTF-16 unit
  605                  * unit as a surrogate pair. */
  606                 *t++ = 0xe0 | (halfpair >> 12);
  607                 *t++ = 0x80 | ((halfpair >> 6) & 0x3f);
  608                 *t++ = 0x80 | (halfpair & 0x3f);
  609                 halfpair = 0;
  610 
  611                 --i;
  612                 continue;
  613 #else
  614                 goto fail;
  615 #endif /* ALLOW_BROKEN_UNICODE */
  616             }
  617         } else if (c < 0x80) {
  618             *t++ = c;
  619             } else {
  620             if (c < 0x800) {
  621                 *t++ = (0xc0 | ((c >> 6) & 0x3f));
  622                     *t++ = 0x80 | (c & 0x3f);
  623             } else if (c < 0xd800) {
  624                 *t++ = 0xe0 | (c >> 12);
  625                 *t++ = 0x80 | ((c >> 6) & 0x3f);
  626                     *t++ = 0x80 | (c & 0x3f);
  627             } else if (c < 0xdc00)
  628                 halfpair = c;
  629 #if ALLOW_BROKEN_UNICODE
  630             else if (c < 0xe000) {
  631                 *t++ = 0xe0 | (c >> 12);
  632                 *t++ = 0x80 | ((c >> 6) & 0x3f);
  633                 *t++ = 0x80 | (c & 0x3f);
  634             }
  635 #endif /* ALLOW_BROKEN_UNICODE */
  636             else if (c >= 0xe000) {
  637                 *t++ = 0xe0 | (c >> 12);
  638                 *t++ = 0x80 | ((c >> 6) & 0x3f);
  639                     *t++ = 0x80 | (c & 0x3f);
  640             } else 
  641                 goto fail;
  642             }
  643     }
  644 #if ALLOW_BROKEN_UNICODE
  645     if (halfpair) { /* ending with a single surrogate */
  646         *t++ = 0xe0 | (halfpair >> 12);
  647         *t++ = 0x80 | ((halfpair >> 6) & 0x3f);
  648         *t++ = 0x80 | (halfpair & 0x3f);
  649     }
  650 #endif /* ALLOW_BROKEN_UNICODE */
  651     *t = '\0';
  652     
  653 #if defined(__APPLE__) || defined(__DARWIN__)
  654 #ifdef ENABLE_NFCONV
  655     if(nfconvert_utf8 && (t - *outs) > 0) {
  656         char *new_outs = NULL;
  657         int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
  658         if(new_outs_len >= 0 && new_outs != NULL) {
  659             if(original_outs_value != *outs) {
  660                 // We have allocated outs ourselves.
  661                 free(*outs);
  662                 *outs = new_outs;
  663                 t = *outs + new_outs_len;
  664             }
  665             else {
  666                 // We need to copy new_outs into the fixed outs buffer.
  667                 memset(*outs, 0, original_outs_len);
  668                 strncpy(*outs, new_outs, original_outs_len-1);
  669                 t = *outs + original_outs_len;
  670                 free(new_outs);
  671             }
  672         }
  673         else {
  674             ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
  675             ntfs_log_error("  new_outs=0x%p\n", new_outs);
  676             ntfs_log_error("  new_outs_len=%d\n", new_outs_len);
  677         }
  678     }
  679 #endif /* ENABLE_NFCONV */
  680 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
  681     
  682     ret = t - *outs;
  683 out:
  684     return ret;
  685 fail:
  686     errno = EILSEQ;
  687     goto out;
  688 }
  689 
  690 /* 
  691  * Return the amount of 16-bit elements in UTF-16LE needed 
  692  * (without the terminating null) to store given UTF-8 string.
  693  *
  694  * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
  695  *
  696  * Note: This does not check whether the input sequence is a valid utf8 string,
  697  *   and should be used only in context where such check is made!
  698  */
  699 static int utf8_to_utf16_size(const char *s)
  700 {
  701     int ret = -1;
  702     unsigned int byte;
  703     size_t count = 0;
  704 
  705     while ((byte = *((const unsigned char *)s++))) {
  706         if (++count >= PATH_MAX) 
  707             goto fail;
  708         if (byte >= 0xc0) {
  709             if (byte >= 0xF5) {
  710                 errno = EILSEQ;
  711                 goto out;
  712             }
  713             if (!*s) 
  714                 break;
  715             if (byte >= 0xC0) 
  716                 s++;
  717             if (!*s) 
  718                 break;
  719             if (byte >= 0xE0) 
  720                 s++;
  721             if (!*s) 
  722                 break;
  723             if (byte >= 0xF0) {
  724                 s++;
  725                 if (++count >= PATH_MAX)
  726                     goto fail;
  727             }
  728         }
  729     }
  730     ret = count;
  731 out:
  732     return ret;
  733 fail:
  734     errno = ENAMETOOLONG;
  735     goto out;
  736 }
  737 /* 
  738  * This converts one UTF-8 sequence to cpu-endian Unicode value
  739  * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF
  740  *
  741  * Return the number of used utf8 bytes or -1 with errno set 
  742  * if sequence is invalid.
  743  */
  744 static int utf8_to_unicode(u32 *wc, const char *s)
  745 {
  746         unsigned int byte = *((const unsigned char *)s);
  747 
  748                     /* single byte */
  749     if (byte == 0) {
  750         *wc = (u32) 0;
  751         return 0;
  752     } else if (byte < 0x80) {
  753         *wc = (u32) byte;
  754         return 1;
  755                     /* double byte */
  756     } else if (byte < 0xc2) {
  757         goto fail;
  758     } else if (byte < 0xE0) {
  759         if ((s[1] & 0xC0) == 0x80) {
  760             *wc = ((u32)(byte & 0x1F) << 6)
  761                 | ((u32)(s[1] & 0x3F));
  762             return 2;
  763         } else
  764             goto fail;
  765                     /* three-byte */
  766     } else if (byte < 0xF0) {
  767         if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
  768             *wc = ((u32)(byte & 0x0F) << 12)
  769                 | ((u32)(s[1] & 0x3F) << 6)
  770                 | ((u32)(s[2] & 0x3F));
  771             /* Check valid ranges */
  772 #if ALLOW_BROKEN_UNICODE
  773             if (((*wc >= 0x800) && (*wc <= 0xD7FF))
  774               || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
  775               || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
  776                 return 3;
  777 #else
  778             if (((*wc >= 0x800) && (*wc <= 0xD7FF))
  779               || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
  780                 return 3;
  781 #endif /* ALLOW_BROKEN_UNICODE */
  782         }
  783         goto fail;
  784                     /* four-byte */
  785     } else if (byte < 0xF5) {
  786         if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
  787           && ((s[3] & 0xC0) == 0x80)) {
  788             *wc = ((u32)(byte & 0x07) << 18)
  789                 | ((u32)(s[1] & 0x3F) << 12)
  790                 | ((u32)(s[2] & 0x3F) << 6)
  791                 | ((u32)(s[3] & 0x3F));
  792             /* Check valid ranges */
  793             if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
  794                 return 4;
  795         }
  796         goto fail;
  797     }
  798 fail:
  799     errno = EILSEQ;
  800     return -1;
  801 }
  802 
  803 /**
  804  * ntfs_utf8_to_utf16 - convert a UTF-8 string to a UTF-16LE string
  805  * @ins:    input multibyte string buffer
  806  * @outs:   on return contains the (allocated) output utf16 string
  807  * @outs_len:   length of output buffer in utf16 characters
  808  * 
  809  * Return -1 with errno set.
  810  */
  811 static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
  812 {
  813 #if defined(__APPLE__) || defined(__DARWIN__)
  814 #ifdef ENABLE_NFCONV
  815     char *new_ins = NULL;
  816     if(nfconvert_utf8) {
  817         int new_ins_len;
  818         new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
  819         if(new_ins_len >= 0)
  820             ins = new_ins;
  821         else
  822             ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
  823     }
  824 #endif /* ENABLE_NFCONV */
  825 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
  826     const char *t = ins;
  827     u32 wc;
  828     BOOL allocated;
  829     ntfschar *outpos;
  830     int shorts, ret = -1;
  831 
  832     shorts = utf8_to_utf16_size(ins);
  833     if (shorts < 0)
  834         goto fail;
  835 
  836     allocated = FALSE;
  837     if (!*outs) {
  838         *outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar));
  839         if (!*outs)
  840             goto fail;
  841         allocated = TRUE;
  842     }
  843 
  844     outpos = *outs;
  845 
  846     while(1) {
  847         int m  = utf8_to_unicode(&wc, t);
  848         if (m <= 0) {
  849             if (m < 0) {
  850                 /* do not leave space allocated if failed */
  851                 if (allocated) {
  852                     free(*outs);
  853                     *outs = (ntfschar*)NULL;
  854                 }
  855                 goto fail;
  856             }
  857             *outpos++ = const_cpu_to_le16(0);
  858             break;
  859         }
  860         if (wc < 0x10000)
  861             *outpos++ = cpu_to_le16(wc);
  862         else {
  863             wc -= 0x10000;
  864             *outpos++ = cpu_to_le16((wc >> 10) + 0xd800);
  865             *outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00);
  866         }
  867         t += m;
  868     }
  869     
  870     ret = --outpos - *outs;
  871 fail:
  872 #if defined(__APPLE__) || defined(__DARWIN__)
  873 #ifdef ENABLE_NFCONV
  874     if(new_ins != NULL)
  875         free(new_ins);
  876 #endif /* ENABLE_NFCONV */
  877 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
  878     return ret;
  879 }
  880 
  881 /**
  882  * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string
  883  * @ins:    input Unicode string buffer
  884  * @ins_len:    length of input string in Unicode characters
  885  * @outs:   on return contains the (allocated) output multibyte string
  886  * @outs_len:   length of output buffer in bytes (ignored if *@outs is NULL)
  887  *
  888  * Convert the input little endian, 2-byte Unicode string @ins, of length
  889  * @ins_len into the multibyte string format dictated by the current locale.
  890  *
  891  * If *@outs is NULL, the function allocates the string and the caller is
  892  * responsible for calling free(*@outs); when finished with it.
  893  *
  894  * On success the function returns the number of bytes written to the output
  895  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
  896  * string buffer was allocated, *@outs is set to it.
  897  *
  898  * On error, -1 is returned, and errno is set to the error code. The following
  899  * error codes can be expected:
  900  *  EINVAL      Invalid arguments (e.g. @ins or @outs is NULL).
  901  *  EILSEQ      The input string cannot be represented as a multibyte
  902  *          sequence according to the current locale.
  903  *  ENAMETOOLONG    Destination buffer is too small for input string.
  904  *  ENOMEM      Not enough memory to allocate destination buffer.
  905  */
  906 int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
  907         int outs_len)
  908 {
  909     char *mbs;
  910     int mbs_len;
  911 #ifdef MB_CUR_MAX
  912     wchar_t wc;
  913     int i, o;
  914     int cnt = 0;
  915 #ifdef HAVE_MBSINIT
  916     mbstate_t mbstate;
  917 #endif
  918 #endif /* MB_CUR_MAX */
  919 
  920     if (!ins || !outs) {
  921         errno = EINVAL;
  922         return -1;
  923     }
  924     mbs = *outs;
  925     mbs_len = outs_len;
  926     if (mbs && !mbs_len) {
  927         errno = ENAMETOOLONG;
  928         return -1;
  929     }
  930     if (use_utf8)
  931         return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len);
  932 #ifdef MB_CUR_MAX
  933     if (!mbs) {
  934         mbs_len = (ins_len + 1) * MB_CUR_MAX;
  935         mbs = ntfs_malloc(mbs_len);
  936         if (!mbs)
  937             return -1;
  938     }
  939 #ifdef HAVE_MBSINIT
  940     memset(&mbstate, 0, sizeof(mbstate));
  941 #else
  942     wctomb(NULL, 0);
  943 #endif
  944     for (i = o = 0; i < ins_len; i++) {
  945         /* Reallocate memory if necessary or abort. */
  946         if ((int)(o + MB_CUR_MAX) > mbs_len) {
  947             char *tc;
  948             if (mbs == *outs) {
  949                 errno = ENAMETOOLONG;
  950                 return -1;
  951             }
  952             tc = ntfs_malloc((mbs_len + 64) & ~63);
  953             if (!tc)
  954                 goto err_out;
  955             memcpy(tc, mbs, mbs_len);
  956             mbs_len = (mbs_len + 64) & ~63;
  957             free(mbs);
  958             mbs = tc;
  959         }
  960         /* Convert the LE Unicode character to a CPU wide character. */
  961         wc = (wchar_t)le16_to_cpu(ins[i]);
  962         if (!wc)
  963             break;
  964         /* Convert the CPU endian wide character to multibyte. */
  965 #ifdef HAVE_MBSINIT
  966         cnt = wcrtomb(mbs + o, wc, &mbstate);
  967 #else
  968         cnt = wctomb(mbs + o, wc);
  969 #endif
  970         if (cnt == -1)
  971             goto err_out;
  972         if (cnt <= 0) {
  973             ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);
  974             errno = EINVAL;
  975             goto err_out;
  976         }
  977         o += cnt;
  978     }
  979 #ifdef HAVE_MBSINIT
  980     /* Make sure we are back in the initial state. */
  981     if (!mbsinit(&mbstate)) {
  982         ntfs_log_debug("Eeek. mbstate not in initial state!\n");
  983         errno = EILSEQ;
  984         goto err_out;
  985     }
  986 #endif
  987     /* Now write the NULL character. */
  988     mbs[o] = '\0';
  989     if (*outs != mbs)
  990         *outs = mbs;
  991     return o;
  992 err_out:
  993     if (mbs != *outs) {
  994         int eo = errno;
  995         free(mbs);
  996         errno = eo;
  997     }
  998 #else /* MB_CUR_MAX */
  999     errno = EILSEQ;
 1000 #endif /* MB_CUR_MAX */
 1001     return -1;
 1002 }
 1003 
 1004 /**
 1005  * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
 1006  * @ins:    input multibyte string buffer
 1007  * @outs:   on return contains the (allocated) output Unicode string
 1008  *
 1009  * Convert the input multibyte string @ins, from the current locale into the
 1010  * corresponding little endian, 2-byte Unicode string.
 1011  *
 1012  * The function allocates the string and the caller is responsible for calling 
 1013  * free(*@outs); when finished with it.
 1014  *
 1015  * On success the function returns the number of Unicode characters written to
 1016  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
 1017  * character.
 1018  *
 1019  * On error, -1 is returned, and errno is set to the error code. The following
 1020  * error codes can be expected:
 1021  *  EINVAL      Invalid arguments (e.g. @ins or @outs is NULL).
 1022  *  EILSEQ      The input string cannot be represented as a Unicode
 1023  *          string according to the current locale.
 1024  *  ENAMETOOLONG    Destination buffer is too small for input string.
 1025  *  ENOMEM      Not enough memory to allocate destination buffer.
 1026  */
 1027 int ntfs_mbstoucs(const char *ins, ntfschar **outs)
 1028 {
 1029 #ifdef MB_CUR_MAX
 1030     ntfschar *ucs;
 1031     const char *s;
 1032     wchar_t wc;
 1033     int i, o, cnt, ins_len, ucs_len, ins_size;
 1034 #ifdef HAVE_MBSINIT
 1035     mbstate_t mbstate;
 1036 #endif
 1037 #endif /* MB_CUR_MAX */
 1038 
 1039     if (!ins || !outs) {
 1040         errno = EINVAL;
 1041         return -1;
 1042     }
 1043     
 1044     if (use_utf8)
 1045         return ntfs_utf8_to_utf16(ins, outs);
 1046 
 1047 #ifdef MB_CUR_MAX
 1048     /* Determine the size of the multi-byte string in bytes. */
 1049     ins_size = strlen(ins);
 1050     /* Determine the length of the multi-byte string. */
 1051     s = ins;
 1052 #if defined(HAVE_MBSINIT)
 1053     memset(&mbstate, 0, sizeof(mbstate));
 1054     ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
 1055 #ifdef __CYGWIN32__
 1056     if (!ins_len && *ins) {
 1057         /* Older Cygwin had broken mbsrtowcs() implementation. */
 1058         ins_len = strlen(ins);
 1059     }
 1060 #endif
 1061 #elif !defined(DJGPP)
 1062     ins_len = mbstowcs(NULL, s, 0);
 1063 #else
 1064     /* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
 1065     ins_len = strlen(ins);
 1066 #endif
 1067     if (ins_len == -1)
 1068         return ins_len;
 1069 #ifdef HAVE_MBSINIT
 1070     if ((s != ins) || !mbsinit(&mbstate)) {
 1071 #else
 1072     if (s != ins) {
 1073 #endif
 1074         errno = EILSEQ;
 1075         return -1;
 1076     }
 1077     /* Add the NULL terminator. */
 1078     ins_len++;
 1079     ucs_len = ins_len;
 1080     ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
 1081     if (!ucs)
 1082         return -1;
 1083 #ifdef HAVE_MBSINIT
 1084     memset(&mbstate, 0, sizeof(mbstate));
 1085 #else
 1086     mbtowc(NULL, NULL, 0);
 1087 #endif
 1088     for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
 1089         /* Reallocate memory if necessary. */
 1090         if (o >= ucs_len) {
 1091             ntfschar *tc;
 1092             ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
 1093             tc = realloc(ucs, ucs_len);
 1094             if (!tc)
 1095                 goto err_out;
 1096             ucs = tc;
 1097             ucs_len /= sizeof(ntfschar);
 1098         }
 1099         /* Convert the multibyte character to a wide character. */
 1100 #ifdef HAVE_MBSINIT
 1101         cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
 1102 #else
 1103         cnt = mbtowc(&wc, ins + i, ins_size - i);
 1104 #endif
 1105         if (!cnt)
 1106             break;
 1107         if (cnt == -1)
 1108             goto err_out;
 1109         if (cnt < -1) {
 1110             ntfs_log_trace("Eeek. cnt = %i\n", cnt);
 1111             errno = EINVAL;
 1112             goto err_out;
 1113         }
 1114         /* Make sure we are not overflowing the NTFS Unicode set. */
 1115         if ((unsigned long)wc >= (unsigned long)(1 <<
 1116                 (8 * sizeof(ntfschar)))) {
 1117             errno = EILSEQ;
 1118             goto err_out;
 1119         }
 1120         /* Convert the CPU wide character to a LE Unicode character. */
 1121         ucs[o] = cpu_to_le16(wc);
 1122     }
 1123 #ifdef HAVE_MBSINIT
 1124     /* Make sure we are back in the initial state. */
 1125     if (!mbsinit(&mbstate)) {
 1126         ntfs_log_trace("Eeek. mbstate not in initial state!\n");
 1127         errno = EILSEQ;
 1128         goto err_out;
 1129     }
 1130 #endif
 1131     /* Now write the NULL character. */
 1132     ucs[o] = const_cpu_to_le16(L'\0');
 1133     *outs = ucs;
 1134     return o;
 1135 err_out:
 1136     free(ucs);
 1137 #else /* MB_CUR_MAX */
 1138     errno = EILSEQ;
 1139 #endif /* MB_CUR_MAX */
 1140     return -1;
 1141 }
 1142 
 1143 /*
 1144  *      Turn a UTF8 name uppercase
 1145  *
 1146  *  Returns an allocated uppercase name which has to be freed by caller
 1147  *  or NULL if there is an error (described by errno)
 1148  */
 1149 
 1150 char *ntfs_uppercase_mbs(const char *low,
 1151             const ntfschar *upcase, u32 upcase_size)
 1152 {
 1153     int size;
 1154     char *upp;
 1155     u32 wc;
 1156     int n;
 1157     const char *s;
 1158     char *t;
 1159 
 1160     size = strlen(low);
 1161     upp = (char*)ntfs_malloc(3*size + 1);
 1162     if (upp) {
 1163         s = low;
 1164         t = upp;
 1165         do {
 1166             n = utf8_to_unicode(&wc, s);
 1167             if (n > 0) {
 1168                 if (wc < upcase_size)
 1169                     wc = le16_to_cpu(upcase[wc]);
 1170                 if (wc < 0x80)
 1171                     *t++ = wc;
 1172                 else if (wc < 0x800) {
 1173                     *t++ = (0xc0 | ((wc >> 6) & 0x3f));
 1174                     *t++ = 0x80 | (wc & 0x3f);
 1175                 } else if (wc < 0x10000) {
 1176                     *t++ = 0xe0 | (wc >> 12);
 1177                     *t++ = 0x80 | ((wc >> 6) & 0x3f);
 1178                     *t++ = 0x80 | (wc & 0x3f);
 1179                 } else {
 1180                     *t++ = 0xf0 | ((wc >> 18) & 7);
 1181                     *t++ = 0x80 | ((wc >> 12) & 63);
 1182                     *t++ = 0x80 | ((wc >> 6) & 0x3f);
 1183                     *t++ = 0x80 | (wc & 0x3f);
 1184                 }
 1185             s += n;
 1186             }
 1187         } while (n > 0);
 1188         if (n < 0) {
 1189             free(upp);
 1190             upp = (char*)NULL;
 1191             errno = EILSEQ;
 1192         }
 1193         *t = 0;
 1194     }
 1195     return (upp);
 1196 }
 1197 
 1198 /**
 1199  * ntfs_upcase_table_build - build the default upcase table for NTFS
 1200  * @uc:     destination buffer where to store the built table
 1201  * @uc_len: size of destination buffer in bytes
 1202  *
 1203  * ntfs_upcase_table_build() builds the default upcase table for NTFS and
 1204  * stores it in the caller supplied buffer @uc of size @uc_len.
 1205  *
 1206  * Note, @uc_len must be at least 128kiB in size or bad things will happen!
 1207  */
 1208 void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
 1209 {
 1210     struct NEWUPPERCASE {
 1211         unsigned short first;
 1212         unsigned short last;
 1213         short diff;
 1214         unsigned char step;
 1215         unsigned char osmajor;
 1216         unsigned char osminor;
 1217     } ;
 1218 
 1219     /*
 1220      *  This is the table as defined by Windows XP
 1221      */
 1222     static int uc_run_table[][3] = { /* Start, End, Add */
 1223     {0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
 1224     {0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
 1225     {0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
 1226     {0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
 1227     {0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
 1228     {0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
 1229     {0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
 1230     {0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
 1231     {0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
 1232     {0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
 1233     {0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
 1234     {0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
 1235     {0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
 1236     {0}
 1237     };
 1238     static int uc_dup_table[][2] = { /* Start, End */
 1239     {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
 1240     {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
 1241     {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
 1242     {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
 1243     {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
 1244     {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
 1245     {0}
 1246     };
 1247     static int uc_byte_table[][2] = { /* Offset, Value */
 1248     {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
 1249     {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
 1250     {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
 1251     {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
 1252     {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
 1253     {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
 1254     {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
 1255     {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
 1256     {0}
 1257     };
 1258 
 1259 /*
 1260  *      Changes which were applied to later Windows versions
 1261  *
 1262  *   md5 for $UpCase from Winxp : 6fa3db2468275286210751e869d36373
 1263  *                        Vista : 2f03b5a69d486ff3864cecbd07f24440
 1264  *                        Win8 :  7ff498a44e45e77374cc7c962b1b92f2
 1265  */
 1266     static const struct NEWUPPERCASE newuppercase[] = {
 1267                         /* from Windows 6.0 (Vista) */
 1268         { 0x37b, 0x37d, 0x82, 1, 6, 0 },
 1269         { 0x1f80, 0x1f87, 0x8, 1, 6, 0 },
 1270         { 0x1f90, 0x1f97, 0x8, 1, 6, 0 },
 1271         { 0x1fa0, 0x1fa7, 0x8, 1, 6, 0 },
 1272         { 0x2c30, 0x2c5e, -0x30, 1, 6, 0 },
 1273         { 0x2d00, 0x2d25, -0x1c60, 1, 6, 0 },
 1274         { 0x2c68, 0x2c6c, -0x1, 2, 6, 0 },
 1275         { 0x219, 0x21f, -0x1, 2, 6, 0 },
 1276         { 0x223, 0x233, -0x1, 2, 6, 0 },
 1277         { 0x247, 0x24f, -0x1, 2, 6, 0 },
 1278         { 0x3d9, 0x3e1, -0x1, 2, 6, 0 },
 1279         { 0x48b, 0x48f, -0x1, 2, 6, 0 },
 1280         { 0x4fb, 0x513, -0x1, 2, 6, 0 },
 1281         { 0x2c81, 0x2ce3, -0x1, 2, 6, 0 },
 1282         { 0x3f8, 0x3fb, -0x1, 3, 6, 0 },
 1283         { 0x4c6, 0x4ce, -0x1, 4, 6, 0 },
 1284         { 0x23c, 0x242, -0x1, 6, 6, 0 },
 1285         { 0x4ed, 0x4f7, -0x1, 10, 6, 0 },
 1286         { 0x450, 0x45d, -0x50, 13, 6, 0 },
 1287         { 0x2c61, 0x2c76, -0x1, 21, 6, 0 },
 1288         { 0x1fcc, 0x1ffc, -0x9, 48, 6, 0 },
 1289         { 0x180, 0x180, 0xc3, 1, 6, 0 },
 1290         { 0x195, 0x195, 0x61, 1, 6, 0 },
 1291         { 0x19a, 0x19a, 0xa3, 1, 6, 0 },
 1292         { 0x19e, 0x19e, 0x82, 1, 6, 0 },
 1293         { 0x1bf, 0x1bf, 0x38, 1, 6, 0 },
 1294         { 0x1f9, 0x1f9, -0x1, 1, 6, 0 },
 1295         { 0x23a, 0x23a, 0x2a2b, 1, 6, 0 },
 1296         { 0x23e, 0x23e, 0x2a28, 1, 6, 0 },
 1297         { 0x26b, 0x26b, 0x29f7, 1, 6, 0 },
 1298         { 0x27d, 0x27d, 0x29e7, 1, 6, 0 },
 1299         { 0x280, 0x280, -0xda, 1, 6, 0 },
 1300         { 0x289, 0x289, -0x45, 1, 6, 0 },
 1301         { 0x28c, 0x28c, -0x47, 1, 6, 0 },
 1302         { 0x3f2, 0x3f2, 0x7, 1, 6, 0 },
 1303         { 0x4cf, 0x4cf, -0xf, 1, 6, 0 },
 1304         { 0x1d7d, 0x1d7d, 0xee6, 1, 6, 0 },
 1305         { 0x1fb3, 0x1fb3, 0x9, 1, 6, 0 },
 1306         { 0x214e, 0x214e, -0x1c, 1, 6, 0 },
 1307         { 0x2184, 0x2184, -0x1, 1, 6, 0 },
 1308                         /* from Windows 6.1 (Win7) */
 1309         { 0x23a, 0x23e,  0x0, 4, 6, 1 },
 1310         { 0x250, 0x250,  0x2a1f, 2, 6, 1 },
 1311         { 0x251, 0x251,  0x2a1c, 2, 6, 1 },
 1312         { 0x271, 0x271,  0x29fd, 2, 6, 1 },
 1313         { 0x371, 0x373, -0x1, 2, 6, 1 },
 1314         { 0x377, 0x377, -0x1, 2, 6, 1 },
 1315         { 0x3c2, 0x3c2,  0x0, 2, 6, 1 },
 1316         { 0x3d7, 0x3d7, -0x8, 2, 6, 1 },
 1317         { 0x515, 0x523, -0x1, 2, 6, 1 },
 1318             /* below, -0x75fc stands for 0x8a04 and truncation */
 1319         { 0x1d79, 0x1d79, -0x75fc, 2, 6, 1 },
 1320         { 0x1efb, 0x1eff, -0x1, 2, 6, 1 },
 1321         { 0x1fc3, 0x1ff3,  0x9, 48, 6, 1 },
 1322         { 0x1fcc, 0x1ffc,  0x0, 48, 6, 1 },
 1323         { 0x2c65, 0x2c65, -0x2a2b, 2, 6, 1 },
 1324         { 0x2c66, 0x2c66, -0x2a28, 2, 6, 1 },
 1325         { 0x2c73, 0x2c73, -0x1, 2, 6, 1 },
 1326         { 0xa641, 0xa65f, -0x1, 2, 6, 1 },
 1327         { 0xa663, 0xa66d, -0x1, 2, 6, 1 },
 1328         { 0xa681, 0xa697, -0x1, 2, 6, 1 },
 1329         { 0xa723, 0xa72f, -0x1, 2, 6, 1 },
 1330         { 0xa733, 0xa76f, -0x1, 2, 6, 1 },
 1331         { 0xa77a, 0xa77c, -0x1, 2, 6, 1 },
 1332         { 0xa77f, 0xa787, -0x1, 2, 6, 1 },
 1333         { 0xa78c, 0xa78c, -0x1, 2, 6, 1 },
 1334                             /* end mark */
 1335         { 0 }
 1336     } ;
 1337 
 1338     int i, r;
 1339     int k, off;
 1340     const struct NEWUPPERCASE *puc;
 1341 
 1342     memset((char*)uc, 0, uc_len);
 1343     uc_len >>= 1;
 1344     if (uc_len > 65536)
 1345         uc_len = 65536;
 1346     for (i = 0; (u32)i < uc_len; i++)
 1347         uc[i] = cpu_to_le16(i);
 1348     for (r = 0; uc_run_table[r][0]; r++) {
 1349         off = uc_run_table[r][2];
 1350         for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
 1351             uc[i] = cpu_to_le16(i + off);
 1352     }
 1353     for (r = 0; uc_dup_table[r][0]; r++)
 1354         for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
 1355             uc[i + 1] = cpu_to_le16(i);
 1356     for (r = 0; uc_byte_table[r][0]; r++) {
 1357         k = uc_byte_table[r][1];
 1358         uc[uc_byte_table[r][0]] = cpu_to_le16(k);
 1359     }
 1360     for (r=0; newuppercase[r].first; r++) {
 1361         puc = &newuppercase[r];
 1362         if ((puc->osmajor < UPCASE_MAJOR)
 1363           || ((puc->osmajor == UPCASE_MAJOR)
 1364              && (puc->osminor <= UPCASE_MINOR))) {
 1365             off = puc->diff;
 1366             for (i = puc->first; i <= puc->last; i += puc->step)
 1367                 uc[i] = cpu_to_le16(i + off);
 1368         }
 1369     }
 1370 }
 1371 
 1372 /*
 1373  *      Allocate and build the default upcase table
 1374  *
 1375  *  Returns the number of entries
 1376  *      0 if failed
 1377  */
 1378 
 1379 #define UPCASE_LEN 65536 /* default number of entries in upcase */
 1380 
 1381 u32 ntfs_upcase_build_default(ntfschar **upcase)
 1382 {
 1383     u32 upcase_len = 0;
 1384 
 1385     *upcase = (ntfschar*)ntfs_malloc(UPCASE_LEN*2);
 1386     if (*upcase) {
 1387         ntfs_upcase_table_build(*upcase, UPCASE_LEN*2);
 1388         upcase_len = UPCASE_LEN;
 1389     }
 1390     return (upcase_len);
 1391 }
 1392 
 1393 /*
 1394  *      Build a table for converting to lower case
 1395  *
 1396  *  This is only meaningful when there is a single lower case
 1397  *  character leading to an upper case one, and currently the
 1398  *  only exception is the greek letter sigma which has a single
 1399  *  upper case glyph (code U+03A3), but two lower case glyphs
 1400  *  (code U+03C3 and U+03C2, the latter to be used at the end
 1401  *  of a word). In the following implementation the upper case
 1402  *  sigma will be lowercased as U+03C3.
 1403  */
 1404 
 1405 ntfschar *ntfs_locase_table_build(const ntfschar *uc, u32 uc_cnt)
 1406 {
 1407     ntfschar *lc;
 1408     u32 upp;
 1409     u32 i;
 1410 
 1411     lc = (ntfschar*)ntfs_malloc(uc_cnt*sizeof(ntfschar));
 1412     if (lc) {
 1413         for (i=0; i<uc_cnt; i++)
 1414             lc[i] = cpu_to_le16(i);
 1415         for (i=0; i<uc_cnt; i++) {
 1416             upp = le16_to_cpu(uc[i]);
 1417             if ((upp != i) && (upp < uc_cnt))
 1418                 lc[upp] = cpu_to_le16(i);
 1419         }
 1420     } else
 1421         ntfs_log_error("Could not build the locase table\n");
 1422     return (lc);
 1423 }
 1424 
 1425 /**
 1426  * ntfs_str2ucs - convert a string to a valid NTFS file name
 1427  * @s:      input string
 1428  * @len:    length of output buffer in Unicode characters
 1429  *
 1430  * Convert the input @s string into the corresponding little endian,
 1431  * 2-byte Unicode string. The length of the converted string is less 
 1432  * or equal to the maximum length allowed by the NTFS format (255).
 1433  *
 1434  * If @s is NULL then return AT_UNNAMED.
 1435  *
 1436  * On success the function returns the Unicode string in an allocated 
 1437  * buffer and the caller is responsible to free it when it's not needed
 1438  * anymore.
 1439  *
 1440  * On error NULL is returned and errno is set to the error code.
 1441  */
 1442 ntfschar *ntfs_str2ucs(const char *s, int *len)
 1443 {
 1444     ntfschar *ucs = NULL;
 1445 
 1446     if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) {
 1447         ntfs_log_perror("Couldn't convert '%s' to Unicode", s);
 1448         return NULL;
 1449     }
 1450     if (*len > NTFS_MAX_NAME_LEN) {
 1451         free(ucs);
 1452         errno = ENAMETOOLONG;
 1453         return NULL;
 1454     }
 1455     if (!ucs || !*len) {
 1456         ucs  = AT_UNNAMED;
 1457         *len = 0;
 1458     }
 1459     return ucs;
 1460 }
 1461 
 1462 /**
 1463  * ntfs_ucsfree - free memory allocated by ntfs_str2ucs()
 1464  * @ucs     input string to be freed
 1465  *
 1466  * Free memory at @ucs and which was allocated by ntfs_str2ucs.
 1467  *
 1468  * Return value: none.
 1469  */
 1470 void ntfs_ucsfree(ntfschar *ucs)
 1471 {
 1472     if (ucs && (ucs != AT_UNNAMED))
 1473         free(ucs);
 1474 }
 1475 
 1476 /*
 1477  *      Check whether a name contains no chars forbidden
 1478  *  for DOS or Win32 use
 1479  *
 1480  *  If @strict is TRUE, then trailing dots and spaces are forbidden.
 1481  *  These names are technically allowed in the Win32 namespace, but
 1482  *  they can be problematic.  See comment for FILE_NAME_WIN32.
 1483  *
 1484  *  If there is a bad char, errno is set to EINVAL
 1485  */
 1486 
 1487 BOOL ntfs_forbidden_chars(const ntfschar *name, int len, BOOL strict)
 1488 {
 1489     BOOL forbidden;
 1490     int ch;
 1491     int i;
 1492     static const u32 mainset = (1L << ('\"' - 0x20))
 1493             | (1L << ('*' - 0x20))
 1494             | (1L << ('/' - 0x20))
 1495             | (1L << (':' - 0x20))
 1496             | (1L << ('<' - 0x20))
 1497             | (1L << ('>' - 0x20))
 1498             | (1L << ('?' - 0x20));
 1499 
 1500     forbidden = (len == 0) ||
 1501             (strict && (name[len-1] == const_cpu_to_le16(' ') ||
 1502                 name[len-1] == const_cpu_to_le16('.')));
 1503     for (i=0; i<len; i++) {
 1504         ch = le16_to_cpu(name[i]);
 1505         if ((ch < 0x20)
 1506             || ((ch < 0x40)
 1507             && ((1L << (ch - 0x20)) & mainset))
 1508             || (ch == '\\')
 1509             || (ch == '|'))
 1510             forbidden = TRUE;
 1511     }
 1512     if (forbidden)
 1513         errno = EINVAL;
 1514     return (forbidden);
 1515 }
 1516 
 1517 /*
 1518  *      Check whether a name contains no forbidden chars and
 1519  *  is not a reserved name for DOS or Win32 use
 1520  *
 1521  *  The reserved names are CON, PRN, AUX, NUL, COM1..COM9, LPT1..LPT9
 1522  *  with no suffix or any suffix.
 1523  *
 1524  *  If @strict is TRUE, then trailing dots and spaces are forbidden.
 1525  *  These names are technically allowed in the Win32 namespace, but
 1526  *  they can be problematic.  See comment for FILE_NAME_WIN32.
 1527  *
 1528  *  If the name is forbidden, errno is set to EINVAL
 1529  */
 1530 
 1531 BOOL ntfs_forbidden_names(ntfs_volume *vol, const ntfschar *name, int len,
 1532               BOOL strict)
 1533 {
 1534     BOOL forbidden;
 1535     int h;
 1536     static const ntfschar dot = const_cpu_to_le16('.');
 1537     static const ntfschar con[] = { const_cpu_to_le16('c'),
 1538             const_cpu_to_le16('o'), const_cpu_to_le16('n') };
 1539     static const ntfschar prn[] = { const_cpu_to_le16('p'),
 1540             const_cpu_to_le16('r'), const_cpu_to_le16('n') };
 1541     static const ntfschar aux[] = { const_cpu_to_le16('a'),
 1542             const_cpu_to_le16('u'), const_cpu_to_le16('x') };
 1543     static const ntfschar nul[] = { const_cpu_to_le16('n'),
 1544             const_cpu_to_le16('u'), const_cpu_to_le16('l') };
 1545     static const ntfschar com[] = { const_cpu_to_le16('c'),
 1546             const_cpu_to_le16('o'), const_cpu_to_le16('m') };
 1547     static const ntfschar lpt[] = { const_cpu_to_le16('l'),
 1548             const_cpu_to_le16('p'), const_cpu_to_le16('t') };
 1549 
 1550     forbidden = ntfs_forbidden_chars(name, len, strict);
 1551     if (!forbidden && (len >= 3)) {
 1552         /*
 1553          * Rough hash check to tell whether the first couple of chars
 1554          * may be one of CO PR AU NU LP or lowercase variants.
 1555          */
 1556         h = ((le16_to_cpu(name[0]) & 31)*48)
 1557                 ^ ((le16_to_cpu(name[1]) & 31)*165);
 1558         if ((h % 23) == 17) {
 1559             /* do a full check, depending on the third char */
 1560             switch (le16_to_cpu(name[2]) & ~0x20) {
 1561             case 'N' :
 1562                 if (((len == 3) || (name[3] == dot))
 1563                     && (!ntfs_ucsncasecmp(name, con, 3,
 1564                         vol->upcase, vol->upcase_len)
 1565                     || !ntfs_ucsncasecmp(name, prn, 3,
 1566                         vol->upcase, vol->upcase_len)))
 1567                     forbidden = TRUE;
 1568                 break;
 1569             case 'X' :
 1570                 if (((len == 3) || (name[3] == dot))
 1571                     && !ntfs_ucsncasecmp(name, aux, 3,
 1572                         vol->upcase, vol->upcase_len))
 1573                     forbidden = TRUE;
 1574                 break;
 1575             case 'L' :
 1576                 if (((len == 3) || (name[3] == dot))
 1577                     && !ntfs_ucsncasecmp(name, nul, 3,
 1578                         vol->upcase, vol->upcase_len))
 1579                     forbidden = TRUE;
 1580                 break;
 1581             case 'M' :
 1582                 if ((len > 3)
 1583                     && (le16_to_cpu(name[3]) >= '1')
 1584                     && (le16_to_cpu(name[3]) <= '9')
 1585                     && ((len == 4) || (name[4] == dot))
 1586                     && !ntfs_ucsncasecmp(name, com, 3,
 1587                         vol->upcase, vol->upcase_len))
 1588                     forbidden = TRUE;
 1589                 break;
 1590             case 'T' :
 1591                 if ((len > 3)
 1592                     && (le16_to_cpu(name[3]) >= '1')
 1593                     && (le16_to_cpu(name[3]) <= '9')
 1594                     && ((len == 4) || (name[4] == dot))
 1595                     && !ntfs_ucsncasecmp(name, lpt, 3,
 1596                         vol->upcase, vol->upcase_len))
 1597                     forbidden = TRUE;
 1598                 break;
 1599             }
 1600         }
 1601     }
 1602 
 1603     if (forbidden)
 1604         errno = EINVAL;
 1605     return (forbidden);
 1606 }
 1607 
 1608 /*
 1609  *      Check whether the same name can be used as a DOS and
 1610  *  a Win32 name
 1611  *
 1612  *  The names must be the same, or the short name the uppercase
 1613  *  variant of the long name
 1614  */
 1615 
 1616 BOOL ntfs_collapsible_chars(ntfs_volume *vol,
 1617             const ntfschar *shortname, int shortlen,
 1618             const ntfschar *longname, int longlen)
 1619 {
 1620     BOOL collapsible;
 1621     unsigned int ch;
 1622     unsigned int cs;
 1623     int i;
 1624 
 1625     collapsible = shortlen == longlen;
 1626     for (i=0; collapsible && (i<shortlen); i++) {
 1627         ch = le16_to_cpu(longname[i]);
 1628         cs = le16_to_cpu(shortname[i]);
 1629         if ((cs != ch)
 1630             && ((ch >= vol->upcase_len)
 1631             || (cs >= vol->upcase_len)
 1632             || (vol->upcase[cs] != vol->upcase[ch])))
 1633                 collapsible = FALSE;
 1634     }
 1635     return (collapsible);
 1636 }
 1637 
 1638 /*
 1639  * Define the character encoding to be used.
 1640  * Use UTF-8 unless specified otherwise.
 1641  */
 1642 
 1643 int ntfs_set_char_encoding(const char *locale)
 1644 {
 1645     use_utf8 = 0;
 1646     if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8")
 1647         || strstr(locale,"utf-8") || strstr(locale,"UTF-8"))
 1648         use_utf8 = 1;
 1649     else
 1650         if (setlocale(LC_ALL, locale))
 1651             use_utf8 = 0;
 1652         else {
 1653             ntfs_log_error("Invalid locale, encoding to UTF-8\n");
 1654             use_utf8 = 1;
 1655         }
 1656     return 0; /* always successful */
 1657 }
 1658 
 1659 #if defined(__APPLE__) || defined(__DARWIN__)
 1660 
 1661 int ntfs_macosx_normalize_filenames(int normalize) {
 1662 #ifdef ENABLE_NFCONV
 1663     if (normalize == 0 || normalize == 1) {
 1664         nfconvert_utf8 = normalize;
 1665         return 0;
 1666     }
 1667     else {
 1668         return -1;
 1669     }
 1670 #else
 1671     return -1;
 1672 #endif /* ENABLE_NFCONV */
 1673 } 
 1674 
 1675 int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
 1676         int composed)
 1677 {
 1678 #ifdef ENABLE_NFCONV
 1679     /* For this code to compile, the CoreFoundation framework must be fed to
 1680      * the linker. */
 1681     CFStringRef cfSourceString;
 1682     CFMutableStringRef cfMutableString;
 1683     CFRange rangeToProcess;
 1684     CFIndex requiredBufferLength;
 1685     char *result = NULL;
 1686     int resultLength = -1;
 1687     
 1688     /* Convert the UTF-8 string to a CFString. */
 1689     cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault,
 1690         utf8_string, kCFStringEncodingUTF8);
 1691     if (cfSourceString == NULL) {
 1692         ntfs_log_error("CFStringCreateWithCString failed!\n");
 1693         return -2;
 1694     }
 1695 
 1696     /* Create a mutable string from cfSourceString that we are free to
 1697      * modify. */
 1698     cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0,
 1699         cfSourceString);
 1700     CFRelease(cfSourceString); /* End-of-life. */
 1701     if (cfMutableString == NULL) {
 1702         ntfs_log_error("CFStringCreateMutableCopy failed!\n");
 1703         return -3;
 1704     }
 1705 
 1706     /* Normalize the mutable string to the desired normalization form. */
 1707     CFStringNormalize(cfMutableString, (composed != 0 ?
 1708         kCFStringNormalizationFormC : kCFStringNormalizationFormD));
 1709 
 1710     /* Store the resulting string in a '\0'-terminated UTF-8 encoded char*
 1711      * buffer. */
 1712     rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
 1713     if (CFStringGetBytes(cfMutableString, rangeToProcess,
 1714         kCFStringEncodingUTF8, 0, false, NULL, 0,
 1715         &requiredBufferLength) > 0)
 1716     {
 1717         resultLength = sizeof(char) * (requiredBufferLength + 1);
 1718         result = ntfs_calloc(resultLength);
 1719 
 1720         if (result != NULL) {
 1721             if (CFStringGetBytes(cfMutableString, rangeToProcess,
 1722                 kCFStringEncodingUTF8, 0, false,
 1723                 (UInt8*) result, resultLength - 1,
 1724                 &requiredBufferLength) <= 0)
 1725             {
 1726                 ntfs_log_error("Could not perform UTF-8 "
 1727                     "conversion of normalized "
 1728                     "CFMutableString.\n");
 1729                 free(result);
 1730                 result = NULL;
 1731             }
 1732         }
 1733         else {
 1734             ntfs_log_error("Could not perform a ntfs_calloc of %d "
 1735                 "bytes for char *result.\n", resultLength);
 1736         }
 1737     }
 1738     else {
 1739         ntfs_log_error("Could not perform check for required length of "
 1740             "UTF-8 conversion of normalized CFMutableString.\n");
 1741     }
 1742 
 1743     CFRelease(cfMutableString);
 1744 
 1745     if (result != NULL) {
 1746         *target = result;
 1747         return resultLength - 1;
 1748     }
 1749     else {
 1750         return -1;
 1751     }
 1752 #else
 1753     return -1;
 1754 #endif /* ENABLE_NFCONV */
 1755 }
 1756 #endif /* defined(__APPLE__) || defined(__DARWIN__) */