"Fossies" - the Fresh Open Source Software Archive

Member "xxHash-0.8.0/tests/generate_unicode_test.c" (27 Jul 2020, 6245 Bytes) of package /linux/misc/xxHash-0.8.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /*
    2  * Generates a Unicode test for xxhsum without using Unicode in the source files.
    3  *
    4  * Copyright (C) 2020 Devin Hussey (easyaspi314)
    5  *
    6  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions are
   10  * met:
   11  *
   12  *     * Redistributions of source code must retain the above copyright
   13  * notice, this list of conditions and the following disclaimer.
   14  *     * Redistributions in binary form must reproduce the above
   15  * copyright notice, this list of conditions and the following disclaimer
   16  * in the documentation and/or other materials provided with the
   17  * distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   23  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Certain terminals don't properly handle UTF-8 (i.e. rxvt and command prompt
   34  * in the default codepage), and that can cause issues when editing text.
   35  *
   36  * We use this C file to generate a file with a Unicode filename, a file with
   37  * a checksum of said file, and both a Windows batch script and a Unix shell
   38  * script to test the file.
   39  */
   40 
   41 #define _CRT_SECURE_NO_WARNINGS /* Silence warnings on MSVC */
   42 #include <stdio.h>
   43 
   44 /* Use a Japanese filename, something that can't be cheated with ANSI.
   45  * yuniko-do.unicode (literally unicode.unicode) */
   46 
   47 /* Use raw hex values to ensure that the output is well-formed UTF-8. It is also more C90 compliant. */
   48 static const char FILENAME[] = {
   49     (char)0xe3, (char)0x83, (char)0xa6,  /* U+30e6: Katakana letter yu */
   50     (char)0xe3, (char)0x83, (char)0x8b,  /* U+30cb: Katakana letter ni */
   51     (char)0xe3, (char)0x82, (char)0xb3,  /* U+30b3: Katakana letter ko */
   52     (char)0xe3, (char)0x83, (char)0xbc,  /* U+30fc: Katakana-Hiragana prolonged sound mark (dash) */
   53     (char)0xe3, (char)0x83, (char)0x89,  /* U+30c9: Katakana letter do */
   54     '.','u','n','i','c','o','d','e','\0' /* ".unicode" (so we can glob in make clean and .gitignore) */
   55 };
   56 
   57 #ifdef _WIN32
   58 /* The same text as above, but encoded in Windows UTF-16. */
   59 static const wchar_t WFILENAME[] = { 0x30e6, 0x30cb, 0x30b3, 0x30fc, 0x30c9, L'.', L'u', L'n', L'i', L'c', L'o', L'd', L'e', L'\0' };
   60 #endif
   61 
   62 int main(void)
   63 {
   64     FILE *f, *script, *checksum;
   65 
   66     /* Create our Unicode file. Use _wfopen on Windows as fopen doesn't support Unicode filenames. */
   67 #ifdef _WIN32
   68     if (!(f = _wfopen(WFILENAME, L"wb"))) return 1;
   69 #else
   70     if (!(f = fopen(FILENAME, "wb"))) return 1;
   71 #endif
   72     fprintf(f, "test\n");
   73     fclose(f);
   74 
   75     /* XXH64 checksum file with the precalculated checksum for said file. */
   76     if (!(checksum = fopen("unicode_test.xxh64", "wb")))
   77         return 1;
   78     fprintf(checksum, "2d7f1808da1fa63c  %s\n", FILENAME);
   79     fclose(checksum);
   80 
   81 
   82     /* Create two scripts for both Windows and Unix. */
   83 
   84     /* Generate a Windows batch script. Always insert CRLF manually. */
   85     if (!(script = fopen("unicode_test.bat", "wb")))
   86         return 1;
   87 
   88     /* Disable echoing the commands. We do that ourselves the naive way. */
   89     fprintf(script, "@echo off\r\n");
   90 
   91     /* Change to codepage 65001 to enable UTF-8 support. */
   92     fprintf(script, "chcp 65001 >NUL 2>&1\r\n");
   93 
   94     /* First test a Unicode filename */
   95     fprintf(script, "echo Testing filename provided on command line...\r\n");
   96     fprintf(script, "echo xxhsum.exe \"%s\"\r\n", FILENAME);
   97     fprintf(script, "xxhsum.exe \"%s\"\r\n", FILENAME);
   98 
   99     /* Bail on error */
  100     fprintf(script, "if %%ERRORLEVEL%% neq 0 (\r\n");
  101     fprintf(script, "    exit /B %%ERRORLEVEL%%\r\n");
  102     fprintf(script, ")\r\n");
  103 
  104     /* Then test a checksum file. */
  105     fprintf(script, "echo Testing a checksum file...\r\n");
  106     fprintf(script, "echo xxhsum.exe -c unicode_test.xxh64\r\n");
  107     fprintf(script, "xxhsum.exe -c unicode_test.xxh64\r\n");
  108 
  109     fprintf(script, "exit /B %%ERRORLEVEL%%\r\n");
  110 
  111     fclose(script);
  112 
  113     /* Generate a Unix shell script */
  114     if (!(script = fopen("unicode_test.sh", "wb")))
  115         return 1;
  116 
  117     fprintf(script, "#!/bin/sh\n");
  118     /*
  119      * Some versions of MSYS, MinGW and Cygwin do not support UTF-8, and the ones that
  120      * don't may error with something like this:
  121      *
  122      *    Error: Could not open '<mojibake>.unicode': No such file or directory.
  123      *
  124      * which is an internal error that happens when it tries to convert MinGW/Cygwin
  125      * paths to Windows paths.
  126      *
  127      * In that case, we bail to cmd.exe and the batch script, which supports UTF-8
  128      * on Windows 7 and later.
  129      */
  130     fprintf(script, "case $(uname) in\n");
  131     /* MinGW/MSYS converts /c to C:\ unless you have a double slash,
  132      * Cygwin does not. */
  133     fprintf(script, "    *CYGWIN*)\n");
  134     fprintf(script, "        exec cmd.exe /c unicode_test.bat\n");
  135     fprintf(script, "        ;;\n");
  136     fprintf(script, "    *MINGW*|*MSYS*)\n");
  137     fprintf(script, "        exec cmd.exe //c unicode_test.bat\n");
  138     fprintf(script, "        ;;\n");
  139     fprintf(script, "esac\n");
  140 
  141     /* First test a Unicode filename */
  142     fprintf(script, "echo Testing filename provided on command line...\n");
  143     fprintf(script, "echo './xxhsum \"%s\" || exit $?'\n", FILENAME);
  144     fprintf(script, "./xxhsum \"%s\" || exit $?\n", FILENAME);
  145 
  146     /* Then test a checksum file. */
  147     fprintf(script, "echo Testing a checksum file...\n");
  148     fprintf(script, "echo './xxhsum -c unicode_test.xxh64 || exit $?'\n");
  149     fprintf(script, "./xxhsum -c unicode_test.xxh64 || exit $?\n");
  150 
  151     fclose(script);
  152 
  153     return 0;
  154 }