"Fossies" - the Fresh Open Source Software Archive

Member "xxHash-0.8.0/xxh_x86dispatch.c" (27 Jul 2020, 27347 Bytes) of package /linux/misc/xxHash-0.8.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "xxh_x86dispatch.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.7.4_vs_0.8.0.

    1 /*
    2  * xxHash - Extremely Fast Hash algorithm
    3  * Copyright (C) 2020 Yann Collet
    4  *
    5  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions are
    9  * met:
   10  *
   11  *    * Redistributions of source code must retain the above copyright
   12  *      notice, this list of conditions and the following disclaimer.
   13  *    * Redistributions in binary form must reproduce the above
   14  *      copyright notice, this list of conditions and the following disclaimer
   15  *      in the documentation and/or other materials provided with the
   16  *      distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
   22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
   28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   29  *
   30  * You can contact the author at:
   31  *   - xxHash homepage: https://www.xxhash.com
   32  *   - xxHash source repository: https://github.com/Cyan4973/xxHash
   33  */
   34 
   35 
   36 #if defined (__cplusplus)
   37 extern "C" {
   38 #endif
   39 
   40 /*
   41  * Dispatcher code for XXH3 on x86-based targets.
   42  */
   43 #if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64))
   44 #  error "Dispatching is currently only supported on x86 and x86_64."
   45 #endif
   46 
   47 #ifndef __GNUC__
   48 #  error "Dispatching requires __attribute__((__target__)) capability"
   49 #endif
   50 
   51 #define XXH_DISPATCH_AVX2    /* enable dispatch towards AVX2 */
   52 #define XXH_DISPATCH_AVX512  /* enable dispatch towards AVX512 */
   53 
   54 #ifdef XXH_DISPATCH_DEBUG
   55 /* debug logging */
   56 #  include <stdio.h>
   57 #  define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
   58 #else
   59 #  define XXH_debugPrint(str) ((void)0)
   60 #  undef NDEBUG /* avoid redefinition */
   61 #  define NDEBUG
   62 #endif
   63 #include <assert.h>
   64 
   65 #if defined(__GNUC__)
   66 #  include <immintrin.h> /* sse2 */
   67 #  include <emmintrin.h> /* avx2 */
   68 #elif defined(_MSC_VER)
   69 #  include <intrin.h>
   70 #endif
   71 
   72 #define XXH_INLINE_ALL
   73 #define XXH_X86DISPATCH
   74 #define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
   75 #define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
   76 #define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
   77 #include "xxhash.h"
   78 
   79 /*
   80  * Modified version of Intel's guide
   81  * https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
   82  */
   83 #if defined(_MSC_VER)
   84 # include <intrin.h>
   85 #endif
   86 
   87 /*
   88  * Support both AT&T and Intel dialects
   89  *
   90  * GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
   91  * compiled with -masm=intel. Instead, it supports dialect switching with
   92  * curly braces: { AT&T syntax | Intel syntax }
   93  *
   94  * Clang's integrated assembler automatically converts AT&T syntax to Intel if
   95  * needed, making the dialect switching useless (it isn't even supported).
   96  *
   97  * Note: Comments are written in the inline assembly itself.
   98  */
   99 #ifdef __clang__
  100 #  define I_ATT(intel, att) att "\n\t"
  101 #else
  102 #  define I_ATT(intel, att) "{" att "|" intel "}\n\t"
  103 #endif
  104 
  105 
  106 static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
  107 {
  108 #if defined(_MSC_VER)
  109     __cpuidex(abcd, eax, ecx);
  110 #else
  111     xxh_u32 ebx, edx;
  112 # if defined(__i386__) && defined(__PIC__)
  113     __asm__(
  114         "# Call CPUID\n\t"
  115         "#\n\t"
  116         "# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
  117         "# EBX, so we use EDI instead.\n\t"
  118         I_ATT("mov     edi, ebx",   "movl    %%ebx, %%edi")
  119         I_ATT("cpuid",              "cpuid"               )
  120         I_ATT("xchg    edi, ebx",   "xchgl   %%ebx, %%edi")
  121         : "=D" (ebx),
  122 # else
  123     __asm__(
  124         "# Call CPUID\n\t"
  125         I_ATT("cpuid",              "cpuid")
  126         : "=b" (ebx),
  127 # endif
  128               "+a" (eax), "+c" (ecx), "=d" (edx));
  129     abcd[0] = eax;
  130     abcd[1] = ebx;
  131     abcd[2] = ecx;
  132     abcd[3] = edx;
  133 #endif
  134 }
  135 
  136 #if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512)
  137 /*
  138  * While the CPU may support AVX2, the operating system might not properly save
  139  * the full YMM/ZMM registers.
  140  *
  141  * xgetbv is used for detecting this: Any compliant operating system will define
  142  * a set of flags in the xcr0 register indicating how it saves the AVX registers.
  143  *
  144  * You can manually disable this flag on Windows by running, as admin:
  145  *
  146  *   bcdedit.exe /set xsavedisable 1
  147  *
  148  * and rebooting. Run the same command with 0 to re-enable it.
  149  */
  150 static xxh_u64 XXH_xgetbv(void)
  151 {
  152 #if defined(_MSC_VER)
  153     return _xgetbv(0);  /* min VS2010 SP1 compiler is required */
  154 #else
  155     xxh_u32 xcr0_lo, xcr0_hi;
  156     __asm__(
  157         "# Call XGETBV\n\t"
  158         "#\n\t"
  159         "# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
  160         "# the XGETBV opcode, so we encode it by hand instead.\n\t"
  161         "# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
  162         ".byte   0x0f, 0x01, 0xd0\n\t"
  163        : "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
  164     return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
  165 #endif
  166 }
  167 #endif
  168 
  169 #define SSE2_CPUID_MASK (1 << 26)
  170 #define OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
  171 #define AVX2_CPUID_MASK (1 << 5)
  172 #define AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
  173 #define AVX512F_CPUID_MASK (1 << 16)
  174 #define AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
  175 
  176 /* Returns the best XXH3 implementation */
  177 static int XXH_featureTest(void)
  178 {
  179     xxh_u32 abcd[4];
  180     xxh_u32 max_leaves;
  181     int best = XXH_SCALAR;
  182 #if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512)
  183     xxh_u64 xgetbv_val;
  184 #endif
  185 #if defined(__GNUC__) && defined(__i386__)
  186     xxh_u32 cpuid_supported;
  187     __asm__(
  188         "# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
  189         "# is supported in the EFLAGS on i386.\n\t"
  190         "# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
  191         "#   The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
  192         "#   for the CPUID instruction. If a software procedure can set and\n\t"
  193         "#   clear this flag, the processor executing the procedure supports\n\t"
  194         "#   the CPUID instruction.\n\t"
  195         "#   <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
  196         "#\n\t"
  197         "# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
  198 
  199         "# Save EFLAGS\n\t"
  200         I_ATT("pushfd",                           "pushfl"                    )
  201         "# Store EFLAGS\n\t"
  202         I_ATT("pushfd",                           "pushfl"                    )
  203         "# Invert the ID bit in stored EFLAGS\n\t"
  204         I_ATT("xor     dword ptr[esp], 0x200000", "xorl    $0x200000, (%%esp)")
  205         "# Load stored EFLAGS (with ID bit inverted)\n\t"
  206         I_ATT("popfd",                            "popfl"                     )
  207         "# Store EFLAGS again (ID bit may or not be inverted)\n\t"
  208         I_ATT("pushfd",                           "pushfl"                    )
  209         "# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
  210         I_ATT("pop     eax",                      "popl    %%eax"             )
  211         "# eax = whichever bits were changed\n\t"
  212         I_ATT("xor     eax, dword ptr[esp]",      "xorl    (%%esp), %%eax"    )
  213         "# Restore original EFLAGS\n\t"
  214         I_ATT("popfd",                            "popfl"                     )
  215         "# eax = zero if ID bit can't be changed, else non-zero\n\t"
  216         I_ATT("and     eax, 0x200000",            "andl    $0x200000, %%eax"  )
  217         : "=a" (cpuid_supported) :: "cc");
  218 
  219     if (XXH_unlikely(!cpuid_supported)) {
  220         XXH_debugPrint("CPUID support is not detected!");
  221         return best;
  222     }
  223 
  224 #endif
  225     /* Check how many CPUID pages we have */
  226     XXH_cpuid(0, 0, abcd);
  227     max_leaves = abcd[0];
  228 
  229     /* Shouldn't happen on hardware, but happens on some QEMU configs. */
  230     if (XXH_unlikely(max_leaves == 0)) {
  231         XXH_debugPrint("Max CPUID leaves == 0!");
  232         return best;
  233     }
  234 
  235     /* Check for SSE2, OSXSAVE and xgetbv */
  236     XXH_cpuid(1, 0, abcd);
  237 
  238     /*
  239      * Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
  240      */
  241     if (XXH_unlikely((abcd[3] & SSE2_CPUID_MASK) != SSE2_CPUID_MASK))
  242         return best;
  243 
  244     XXH_debugPrint("SSE2 support detected.");
  245 
  246     best = XXH_SSE2;
  247 #if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512)
  248     /* Make sure we have enough leaves */
  249     if (XXH_unlikely(max_leaves < 7))
  250         return best;
  251 
  252     /* Test for OSXSAVE and XGETBV */
  253     if ((abcd[2] & OSXSAVE_CPUID_MASK) != OSXSAVE_CPUID_MASK)
  254         return best;
  255 
  256     /* CPUID check for AVX features */
  257     XXH_cpuid(7, 0, abcd);
  258 
  259     xgetbv_val = XXH_xgetbv();
  260 #if defined(XXH_DISPATCH_AVX2)
  261     /* Validate that AVX2 is supported by the CPU */
  262     if ((abcd[1] & AVX2_CPUID_MASK) != AVX2_CPUID_MASK)
  263         return best;
  264 
  265     /* Validate that the OS supports YMM registers */
  266     if ((xgetbv_val & AVX2_XGETBV_MASK) != AVX2_XGETBV_MASK) {
  267         XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
  268         return best;
  269     }
  270 
  271     /* AVX2 supported */
  272     XXH_debugPrint("AVX2 support detected.");
  273     best = XXH_AVX2;
  274 #endif
  275 #if defined(XXH_DISPATCH_AVX512)
  276     /* Check if AVX512F is supported by the CPU */
  277     if ((abcd[1] & AVX512F_CPUID_MASK) != AVX512F_CPUID_MASK) {
  278         XXH_debugPrint("AVX512F not supported by CPU");
  279         return best;
  280     }
  281 
  282     /* Validate that the OS supports ZMM registers */
  283     if ((xgetbv_val & AVX512F_XGETBV_MASK) != AVX512F_XGETBV_MASK) {
  284         XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
  285         return best;
  286     }
  287 
  288     /* AVX512F supported */
  289     XXH_debugPrint("AVX512F support detected.");
  290     best = XXH_AVX512;
  291 #endif
  292 #endif
  293     return best;
  294 }
  295 
  296 
  297 /* ===   Vector implementations   === */
  298 
  299 /* ===   XXH3, default variants   === */
  300 
  301 XXH_NO_INLINE XXH64_hash_t
  302 XXHL64_default_scalar(const void* XXH_RESTRICT input, size_t len)
  303 {
  304     return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
  305 }
  306 
  307 XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t
  308 XXHL64_default_sse2(const void* XXH_RESTRICT input, size_t len)
  309 {
  310     return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
  311 }
  312 
  313 #ifdef XXH_DISPATCH_AVX2
  314 XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t
  315 XXHL64_default_avx2(const void* XXH_RESTRICT input, size_t len)
  316 {
  317     return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
  318 }
  319 #endif
  320 
  321 #ifdef XXH_DISPATCH_AVX512
  322 XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t
  323 XXHL64_default_avx512(const void* XXH_RESTRICT input, size_t len)
  324 {
  325     return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
  326 }
  327 #endif
  328 
  329 /* ===   XXH3, Seeded variants   === */
  330 
  331 XXH_NO_INLINE XXH64_hash_t
  332 XXHL64_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  333 {
  334     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
  335                     XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar);
  336 }
  337 
  338 XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t
  339 XXHL64_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  340 {
  341     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
  342                     XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2);
  343 }
  344 
  345 #ifdef XXH_DISPATCH_AVX2
  346 XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t
  347 XXHL64_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  348 {
  349     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
  350                     XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2);
  351 }
  352 #endif
  353 
  354 #ifdef XXH_DISPATCH_AVX512
  355 XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t
  356 XXHL64_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  357 {
  358     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
  359                     XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512);
  360 }
  361 #endif
  362 
  363 /* ===   XXH3, Secret variants   === */
  364 
  365 XXH_NO_INLINE XXH64_hash_t
  366 XXHL64_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
  367 {
  368     return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
  369                     XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
  370 }
  371 
  372 XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t
  373 XXHL64_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
  374 {
  375     return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
  376                     XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
  377 }
  378 
  379 #ifdef XXH_DISPATCH_AVX2
  380 XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t
  381 XXHL64_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
  382 {
  383     return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
  384                     XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
  385 }
  386 #endif
  387 
  388 #ifdef XXH_DISPATCH_AVX512
  389 XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t
  390 XXHL64_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
  391 {
  392     return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
  393                     XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
  394 }
  395 #endif
  396 
  397 /* ===   XXH3 update variants   === */
  398 
  399 XXH_NO_INLINE XXH_errorcode
  400 XXH3_64bits_update_scalar(XXH3_state_t* state, const void* input, size_t len)
  401 {
  402     return XXH3_update(state, (const xxh_u8*)input, len,
  403                        XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
  404 }
  405 
  406 XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode
  407 XXH3_64bits_update_sse2(XXH3_state_t* state, const void* input, size_t len)
  408 {
  409     return XXH3_update(state, (const xxh_u8*)input, len,
  410                        XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
  411 }
  412 
  413 #ifdef XXH_DISPATCH_AVX2
  414 XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode
  415 XXH3_64bits_update_avx2(XXH3_state_t* state, const void* input, size_t len)
  416 {
  417     return XXH3_update(state, (const xxh_u8*)input, len,
  418                        XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
  419 }
  420 #endif
  421 
  422 #ifdef XXH_DISPATCH_AVX512
  423 XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode
  424 XXH3_64bits_update_avx512(XXH3_state_t* state, const void* input, size_t len)
  425 {
  426     return XXH3_update(state, (const xxh_u8*)input, len,
  427                        XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
  428 }
  429 #endif
  430 
  431 /* ===   XXH128 default variants   === */
  432 
  433 XXH_NO_INLINE XXH128_hash_t
  434 XXHL128_default_scalar(const void* XXH_RESTRICT input, size_t len)
  435 {
  436     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
  437 }
  438 
  439 XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t
  440 XXHL128_default_sse2(const void* XXH_RESTRICT input, size_t len)
  441 {
  442     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
  443 }
  444 
  445 #ifdef XXH_DISPATCH_AVX2
  446 XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t
  447 XXHL128_default_avx2(const void* XXH_RESTRICT input, size_t len)
  448 {
  449     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
  450 }
  451 #endif
  452 
  453 #ifdef XXH_DISPATCH_AVX512
  454 XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t
  455 XXHL128_default_avx512(const void* XXH_RESTRICT input, size_t len)
  456 {
  457     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
  458 }
  459 #endif
  460 
  461 /* ===   XXH128 Secret variants   === */
  462 
  463 XXH_NO_INLINE XXH128_hash_t
  464 XXHL128_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
  465 {
  466     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
  467                     XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
  468 }
  469 
  470 XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t
  471 XXHL128_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
  472 {
  473     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
  474                     XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
  475 }
  476 
  477 #ifdef XXH_DISPATCH_AVX2
  478 XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t
  479 XXHL128_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
  480 {
  481     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
  482                     XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
  483 }
  484 #endif
  485 
  486 #ifdef XXH_DISPATCH_AVX512
  487 XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t
  488 XXHL128_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
  489 {
  490     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
  491                     XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
  492 }
  493 #endif
  494 
  495 /* ===   XXH128 Seeded variants   === */
  496 
  497 XXH_NO_INLINE XXH128_hash_t
  498 XXHL128_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  499 {
  500     return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
  501                     XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar);
  502 }
  503 
  504 XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t
  505 XXHL128_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  506 {
  507     return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
  508                     XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2);
  509 }
  510 
  511 #ifdef XXH_DISPATCH_AVX2
  512 XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t
  513 XXHL128_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  514 {
  515     return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
  516                     XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2);
  517 }
  518 #endif
  519 
  520 #ifdef XXH_DISPATCH_AVX512
  521 XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t
  522 XXHL128_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
  523 {
  524     return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
  525                     XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512);
  526 }
  527 #endif
  528 
  529 /* ===   XXH128 update variants   === */
  530 
  531 XXH_NO_INLINE XXH_errorcode
  532 XXH3_128bits_update_scalar(XXH3_state_t* state, const void* input, size_t len)
  533 {
  534     return XXH3_update(state, (const xxh_u8*)input, len,
  535                        XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
  536 }
  537 
  538 XXH_NO_INLINE XXH_TARGET_SSE2 XXH_errorcode
  539 XXH3_128bits_update_sse2(XXH3_state_t* state, const void* input, size_t len)
  540 {
  541     return XXH3_update(state, (const xxh_u8*)input, len,
  542                        XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
  543 }
  544 
  545 #ifdef XXH_DISPATCH_AVX2
  546 XXH_NO_INLINE XXH_TARGET_AVX2 XXH_errorcode
  547 XXH3_128bits_update_avx2(XXH3_state_t* state, const void* input, size_t len)
  548 {
  549     return XXH3_update(state, (const xxh_u8*)input, len,
  550                        XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
  551 }
  552 #endif
  553 
  554 #ifdef XXH_DISPATCH_AVX512
  555 XXH_NO_INLINE XXH_TARGET_AVX512 XXH_errorcode
  556 XXH3_128bits_update_avx512(XXH3_state_t* state, const void* input, size_t len)
  557 {
  558     return XXH3_update(state, (const xxh_u8*)input, len,
  559                        XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
  560 }
  561 #endif
  562 
  563 /* ====    Dispatchers    ==== */
  564 
  565 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t);
  566 
  567 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
  568 
  569 typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
  570 
  571 typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t);
  572 
  573 typedef struct {
  574     XXH3_dispatchx86_hashLong64_default    hashLong64_default;
  575     XXH3_dispatchx86_hashLong64_withSeed   hashLong64_seed;
  576     XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
  577     XXH3_dispatchx86_update                update;
  578 } dispatchFunctions_s;
  579 
  580 static dispatchFunctions_s g_dispatch = { NULL, NULL, NULL, NULL};
  581 
  582 #define NB_DISPATCHES 4
  583 static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = {
  584         /* scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_64bits_update_scalar },
  585         /* sse2   */ { XXHL64_default_sse2,   XXHL64_seed_sse2,   XXHL64_secret_sse2,   XXH3_64bits_update_sse2 },
  586 #ifdef XXH_DISPATCH_AVX2
  587         /* avx2   */ { XXHL64_default_avx2,   XXHL64_seed_avx2,   XXHL64_secret_avx2,   XXH3_64bits_update_avx2 },
  588 #else
  589         /* avx2 */ { NULL, NULL, NULL, NULL },
  590 #endif
  591 #ifdef XXH_DISPATCH_AVX512
  592         /* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_64bits_update_avx512 }
  593 #else
  594         /* avx512 */ { NULL, NULL, NULL, NULL }
  595 #endif
  596 };
  597 
  598 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t);
  599 
  600 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
  601 
  602 typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
  603 
  604 typedef struct {
  605     XXH3_dispatchx86_hashLong128_default    hashLong128_default;
  606     XXH3_dispatchx86_hashLong128_withSeed   hashLong128_seed;
  607     XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
  608     XXH3_dispatchx86_update                 update;
  609 } dispatch128Functions_s;
  610 
  611 static dispatch128Functions_s g_dispatch128 = { NULL, NULL, NULL, NULL };
  612 
  613 static const dispatch128Functions_s k_dispatch128[NB_DISPATCHES] = {
  614         /* scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_128bits_update_scalar },
  615         /* sse2   */ { XXHL128_default_sse2,   XXHL128_seed_sse2,   XXHL128_secret_sse2,   XXH3_128bits_update_sse2 },
  616 #ifdef XXH_DISPATCH_AVX2
  617         /* avx2   */ { XXHL128_default_avx2,   XXHL128_seed_avx2,   XXHL128_secret_avx2,   XXH3_128bits_update_avx2 },
  618 #else
  619         /* avx2 */ { NULL, NULL, NULL, NULL },
  620 #endif
  621 #ifdef XXH_DISPATCH_AVX512
  622         /* avx512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_128bits_update_avx512 }
  623 #else
  624         /* avx512 */ { NULL, NULL, NULL, NULL }
  625 #endif
  626 };
  627 
  628 static void setDispatch(void)
  629 {
  630     int vecID = XXH_featureTest();
  631     XXH_STATIC_ASSERT(XXH_AVX512 == NB_DISPATCHES-1);
  632     assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
  633 #ifndef XXH_DISPATCH_AVX512
  634     assert(vecID != XXH_AVX512);
  635 #endif
  636 #ifndef XXH_DISPATCH_AVX2
  637     assert(vecID != XXH_AVX2);
  638 #endif
  639     g_dispatch = k_dispatch[vecID];
  640     g_dispatch128 = k_dispatch128[vecID];
  641 }
  642 
  643 
  644 /* ====    XXH3 public functions    ==== */
  645 
  646 static XXH64_hash_t
  647 XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len,
  648                                           XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
  649 {
  650     (void)seed64; (void)secret; (void)secretLen;
  651     if (g_dispatch.hashLong64_default == NULL) setDispatch();
  652     return g_dispatch.hashLong64_default(input, len);
  653 }
  654 
  655 XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len)
  656 {
  657     return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
  658 }
  659 
  660 static XXH64_hash_t
  661 XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len,
  662                                      XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
  663 {
  664     (void)secret; (void)secretLen;
  665     if (g_dispatch.hashLong64_seed == NULL) setDispatch();
  666     return g_dispatch.hashLong64_seed(input, len, seed64);
  667 }
  668 
  669 XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
  670 {
  671     return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
  672 }
  673 
  674 static XXH64_hash_t
  675 XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len,
  676                                        XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
  677 {
  678     (void)seed64;
  679     if (g_dispatch.hashLong64_secret == NULL) setDispatch();
  680     return g_dispatch.hashLong64_secret(input, len, secret, secretLen);
  681 }
  682 
  683 XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
  684 {
  685     return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
  686 }
  687 
  688 XXH_errorcode
  689 XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
  690 {
  691     if (g_dispatch.update == NULL) setDispatch();
  692     return g_dispatch.update(state, (const xxh_u8*)input, len);
  693 }
  694 
  695 
  696 /* ====    XXH128 public functions    ==== */
  697 
  698 static XXH128_hash_t
  699 XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len,
  700                                            XXH64_hash_t seed64, const void* secret, size_t secretLen)
  701 {
  702     (void)seed64; (void)secret; (void)secretLen;
  703     if (g_dispatch128.hashLong128_default == NULL) setDispatch();
  704     return g_dispatch128.hashLong128_default(input, len);
  705 }
  706 
  707 XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len)
  708 {
  709     return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
  710 }
  711 
  712 static XXH128_hash_t
  713 XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len,
  714                                      XXH64_hash_t seed64, const void* secret, size_t secretLen)
  715 {
  716     (void)secret; (void)secretLen;
  717     if (g_dispatch128.hashLong128_seed == NULL) setDispatch();
  718     return g_dispatch128.hashLong128_seed(input, len, seed64);
  719 }
  720 
  721 XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
  722 {
  723     return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
  724 }
  725 
  726 static XXH128_hash_t
  727 XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len,
  728                                         XXH64_hash_t seed64, const void* secret, size_t secretLen)
  729 {
  730     (void)seed64;
  731     if (g_dispatch128.hashLong128_secret == NULL) setDispatch();
  732     return g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
  733 }
  734 
  735 XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
  736 {
  737     return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
  738 }
  739 
  740 XXH_errorcode
  741 XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
  742 {
  743     if (g_dispatch128.update == NULL) setDispatch();
  744     return g_dispatch128.update(state, (const xxh_u8*)input, len);
  745 }
  746 
  747 #if defined (__cplusplus)
  748 }
  749 #endif