"Fossies" - the Fresh Open Source Software Archive

Member "jansson-2.14/src/lookup3.h" (19 Nov 2020, 13856 Bytes) of package /linux/www/jansson-2.14.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "lookup3.h" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.13.1_vs_2.14.

    1 // clang-format off
    2 /*
    3 -------------------------------------------------------------------------------
    4 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
    5 
    6 These are functions for producing 32-bit hashes for hash table lookup.
    7 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 
    8 are externally useful functions.  Routines to test the hash are included 
    9 if SELF_TEST is defined.  You can use this free for any purpose.  It's in
   10 the public domain.  It has no warranty.
   11 
   12 You probably want to use hashlittle().  hashlittle() and hashbig()
   13 hash byte arrays.  hashlittle() is is faster than hashbig() on
   14 little-endian machines.  Intel and AMD are little-endian machines.
   15 On second thought, you probably want hashlittle2(), which is identical to
   16 hashlittle() except it returns two 32-bit hashes for the price of one.  
   17 You could implement hashbig2() if you wanted but I haven't bothered here.
   18 
   19 If you want to find a hash of, say, exactly 7 integers, do
   20   a = i1;  b = i2;  c = i3;
   21   mix(a,b,c);
   22   a += i4; b += i5; c += i6;
   23   mix(a,b,c);
   24   a += i7;
   25   final(a,b,c);
   26 then use c as the hash value.  If you have a variable length array of
   27 4-byte integers to hash, use hashword().  If you have a byte array (like
   28 a character string), use hashlittle().  If you have several byte arrays, or
   29 a mix of things, see the comments above hashlittle().  
   30 
   31 Why is this so big?  I read 12 bytes at a time into 3 4-byte integers, 
   32 then mix those integers.  This is fast (you can do a lot more thorough
   33 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
   34 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
   35 -------------------------------------------------------------------------------
   36 */
   37 
   38 #include <stdlib.h>
   39 
   40 #ifdef HAVE_CONFIG_H
   41 #include <jansson_private_config.h>
   42 #endif
   43 
   44 #ifdef HAVE_STDINT_H
   45 #include <stdint.h>     /* defines uint32_t etc */
   46 #endif
   47 
   48 #ifdef HAVE_SYS_PARAM_H
   49 #include <sys/param.h>  /* attempt to define endianness */
   50 #endif
   51 
   52 #ifdef HAVE_ENDIAN_H
   53 # include <endian.h>    /* attempt to define endianness */
   54 #endif
   55 
   56 /*
   57  * My best guess at if you are big-endian or little-endian.  This may
   58  * need adjustment.
   59  */
   60 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
   61      __BYTE_ORDER == __LITTLE_ENDIAN) || \
   62     (defined(i386) || defined(__i386__) || defined(__i486__) || \
   63      defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
   64 # define HASH_LITTLE_ENDIAN 1
   65 # define HASH_BIG_ENDIAN 0
   66 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
   67        __BYTE_ORDER == __BIG_ENDIAN) || \
   68       (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
   69 # define HASH_LITTLE_ENDIAN 0
   70 # define HASH_BIG_ENDIAN 1
   71 #else
   72 # define HASH_LITTLE_ENDIAN 0
   73 # define HASH_BIG_ENDIAN 0
   74 #endif
   75 
   76 #define hashsize(n) ((size_t)1<<(n))
   77 #define hashmask(n) (hashsize(n)-1)
   78 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
   79 
   80 /*
   81 -------------------------------------------------------------------------------
   82 mix -- mix 3 32-bit values reversibly.
   83 
   84 This is reversible, so any information in (a,b,c) before mix() is
   85 still in (a,b,c) after mix().
   86 
   87 If four pairs of (a,b,c) inputs are run through mix(), or through
   88 mix() in reverse, there are at least 32 bits of the output that
   89 are sometimes the same for one pair and different for another pair.
   90 This was tested for:
   91 * pairs that differed by one bit, by two bits, in any combination
   92   of top bits of (a,b,c), or in any combination of bottom bits of
   93   (a,b,c).
   94 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
   95   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
   96   is commonly produced by subtraction) look like a single 1-bit
   97   difference.
   98 * the base values were pseudorandom, all zero but one bit set, or 
   99   all zero plus a counter that starts at zero.
  100 
  101 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
  102 satisfy this are
  103     4  6  8 16 19  4
  104     9 15  3 18 27 15
  105    14  9  3  7 17  3
  106 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
  107 for "differ" defined as + with a one-bit base and a two-bit delta.  I
  108 used http://burtleburtle.net/bob/hash/avalanche.html to choose 
  109 the operations, constants, and arrangements of the variables.
  110 
  111 This does not achieve avalanche.  There are input bits of (a,b,c)
  112 that fail to affect some output bits of (a,b,c), especially of a.  The
  113 most thoroughly mixed value is c, but it doesn't really even achieve
  114 avalanche in c.
  115 
  116 This allows some parallelism.  Read-after-writes are good at doubling
  117 the number of bits affected, so the goal of mixing pulls in the opposite
  118 direction as the goal of parallelism.  I did what I could.  Rotates
  119 seem to cost as much as shifts on every machine I could lay my hands
  120 on, and rotates are much kinder to the top and bottom bits, so I used
  121 rotates.
  122 -------------------------------------------------------------------------------
  123 */
  124 #define mix(a,b,c) \
  125 { \
  126   a -= c;  a ^= rot(c, 4);  c += b; \
  127   b -= a;  b ^= rot(a, 6);  a += c; \
  128   c -= b;  c ^= rot(b, 8);  b += a; \
  129   a -= c;  a ^= rot(c,16);  c += b; \
  130   b -= a;  b ^= rot(a,19);  a += c; \
  131   c -= b;  c ^= rot(b, 4);  b += a; \
  132 }
  133 
  134 /*
  135 -------------------------------------------------------------------------------
  136 final -- final mixing of 3 32-bit values (a,b,c) into c
  137 
  138 Pairs of (a,b,c) values differing in only a few bits will usually
  139 produce values of c that look totally different.  This was tested for
  140 * pairs that differed by one bit, by two bits, in any combination
  141   of top bits of (a,b,c), or in any combination of bottom bits of
  142   (a,b,c).
  143 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
  144   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  145   is commonly produced by subtraction) look like a single 1-bit
  146   difference.
  147 * the base values were pseudorandom, all zero but one bit set, or 
  148   all zero plus a counter that starts at zero.
  149 
  150 These constants passed:
  151  14 11 25 16 4 14 24
  152  12 14 25 16 4 14 24
  153 and these came close:
  154   4  8 15 26 3 22 24
  155  10  8 15 26 3 22 24
  156  11  8 15 26 3 22 24
  157 -------------------------------------------------------------------------------
  158 */
  159 #define final(a,b,c) \
  160 { \
  161   c ^= b; c -= rot(b,14); \
  162   a ^= c; a -= rot(c,11); \
  163   b ^= a; b -= rot(a,25); \
  164   c ^= b; c -= rot(b,16); \
  165   a ^= c; a -= rot(c,4);  \
  166   b ^= a; b -= rot(a,14); \
  167   c ^= b; c -= rot(b,24); \
  168 }
  169 
  170 /*
  171 -------------------------------------------------------------------------------
  172 hashlittle() -- hash a variable-length key into a 32-bit value
  173   k       : the key (the unaligned variable-length array of bytes)
  174   length  : the length of the key, counting by bytes
  175   initval : can be any 4-byte value
  176 Returns a 32-bit value.  Every bit of the key affects every bit of
  177 the return value.  Two keys differing by one or two bits will have
  178 totally different hash values.
  179 
  180 The best hash table sizes are powers of 2.  There is no need to do
  181 mod a prime (mod is sooo slow!).  If you need less than 32 bits,
  182 use a bitmask.  For example, if you need only 10 bits, do
  183   h = (h & hashmask(10));
  184 In which case, the hash table should have hashsize(10) elements.
  185 
  186 If you are hashing n strings (uint8_t **)k, do it like this:
  187   for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
  188 
  189 By Bob Jenkins, 2006.  bob_jenkins@burtleburtle.net.  You may use this
  190 code any way you wish, private, educational, or commercial.  It's free.
  191 
  192 Use for hash table lookup, or anything where one collision in 2^^32 is
  193 acceptable.  Do NOT use for cryptographic purposes.
  194 -------------------------------------------------------------------------------
  195 */
  196 
  197 static uint32_t hashlittle(const void *key, size_t length, uint32_t initval)
  198 {
  199   uint32_t a,b,c;                                          /* internal state */
  200   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
  201 
  202   /* Set up the internal state */
  203   a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
  204 
  205   u.ptr = key;
  206   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
  207     const uint32_t *k = (const uint32_t *)key;         /* read 32-bit chunks */
  208 
  209 /* Detect Valgrind or AddressSanitizer */
  210 #ifdef VALGRIND
  211 # define NO_MASKING_TRICK 1
  212 #else
  213 # if defined(__has_feature)  /* Clang */
  214 #  if __has_feature(address_sanitizer)  /* is ASAN enabled? */
  215 #   define NO_MASKING_TRICK 1
  216 #  endif
  217 # else
  218 #  if defined(__SANITIZE_ADDRESS__)  /* GCC 4.8.x, is ASAN enabled? */
  219 #   define NO_MASKING_TRICK 1
  220 #  endif
  221 # endif
  222 #endif
  223 
  224 #ifdef NO_MASKING_TRICK
  225     const uint8_t  *k8;
  226 #endif
  227 
  228     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
  229     while (length > 12)
  230     {
  231       a += k[0];
  232       b += k[1];
  233       c += k[2];
  234       mix(a,b,c);
  235       length -= 12;
  236       k += 3;
  237     }
  238 
  239     /*----------------------------- handle the last (probably partial) block */
  240     /* 
  241      * "k[2]&0xffffff" actually reads beyond the end of the string, but
  242      * then masks off the part it's not allowed to read.  Because the
  243      * string is aligned, the masked-off tail is in the same word as the
  244      * rest of the string.  Every machine with memory protection I've seen
  245      * does it on word boundaries, so is OK with this.  But VALGRIND will
  246      * still catch it and complain.  The masking trick does make the hash
  247      * noticeably faster for short strings (like English words).
  248      */
  249 #ifndef NO_MASKING_TRICK
  250 
  251     switch(length)
  252     {
  253     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  254     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
  255     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
  256     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
  257     case 8 : b+=k[1]; a+=k[0]; break;
  258     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
  259     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
  260     case 5 : b+=k[1]&0xff; a+=k[0]; break;
  261     case 4 : a+=k[0]; break;
  262     case 3 : a+=k[0]&0xffffff; break;
  263     case 2 : a+=k[0]&0xffff; break;
  264     case 1 : a+=k[0]&0xff; break;
  265     case 0 : return c;              /* zero length strings require no mixing */
  266     }
  267 
  268 #else /* make valgrind happy */
  269 
  270     k8 = (const uint8_t *)k;
  271     switch(length)
  272     {
  273     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  274     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
  275     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
  276     case 9 : c+=k8[8];                   /* fall through */
  277     case 8 : b+=k[1]; a+=k[0]; break;
  278     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
  279     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
  280     case 5 : b+=k8[4];                   /* fall through */
  281     case 4 : a+=k[0]; break;
  282     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
  283     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
  284     case 1 : a+=k8[0]; break;
  285     case 0 : return c;
  286     }
  287 
  288 #endif /* !valgrind */
  289 
  290   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
  291     const uint16_t *k = (const uint16_t *)key;         /* read 16-bit chunks */
  292     const uint8_t  *k8;
  293 
  294     /*--------------- all but last block: aligned reads and different mixing */
  295     while (length > 12)
  296     {
  297       a += k[0] + (((uint32_t)k[1])<<16);
  298       b += k[2] + (((uint32_t)k[3])<<16);
  299       c += k[4] + (((uint32_t)k[5])<<16);
  300       mix(a,b,c);
  301       length -= 12;
  302       k += 6;
  303     }
  304 
  305     /*----------------------------- handle the last (probably partial) block */
  306     k8 = (const uint8_t *)k;
  307     switch(length)
  308     {
  309     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
  310              b+=k[2]+(((uint32_t)k[3])<<16);
  311              a+=k[0]+(((uint32_t)k[1])<<16);
  312              break;
  313     case 11: c+=((uint32_t)k8[10])<<16;     /* fall through */
  314     case 10: c+=k[4];
  315              b+=k[2]+(((uint32_t)k[3])<<16);
  316              a+=k[0]+(((uint32_t)k[1])<<16);
  317              break;
  318     case 9 : c+=k8[8];                      /* fall through */
  319     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
  320              a+=k[0]+(((uint32_t)k[1])<<16);
  321              break;
  322     case 7 : b+=((uint32_t)k8[6])<<16;      /* fall through */
  323     case 6 : b+=k[2];
  324              a+=k[0]+(((uint32_t)k[1])<<16);
  325              break;
  326     case 5 : b+=k8[4];                      /* fall through */
  327     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
  328              break;
  329     case 3 : a+=((uint32_t)k8[2])<<16;      /* fall through */
  330     case 2 : a+=k[0];
  331              break;
  332     case 1 : a+=k8[0];
  333              break;
  334     case 0 : return c;                     /* zero length requires no mixing */
  335     }
  336 
  337   } else {                        /* need to read the key one byte at a time */
  338     const uint8_t *k = (const uint8_t *)key;
  339 
  340     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
  341     while (length > 12)
  342     {
  343       a += k[0];
  344       a += ((uint32_t)k[1])<<8;
  345       a += ((uint32_t)k[2])<<16;
  346       a += ((uint32_t)k[3])<<24;
  347       b += k[4];
  348       b += ((uint32_t)k[5])<<8;
  349       b += ((uint32_t)k[6])<<16;
  350       b += ((uint32_t)k[7])<<24;
  351       c += k[8];
  352       c += ((uint32_t)k[9])<<8;
  353       c += ((uint32_t)k[10])<<16;
  354       c += ((uint32_t)k[11])<<24;
  355       mix(a,b,c);
  356       length -= 12;
  357       k += 12;
  358     }
  359 
  360     /*-------------------------------- last block: affect all 32 bits of (c) */
  361     switch(length)                   /* all the case statements fall through */
  362     {
  363     case 12: c+=((uint32_t)k[11])<<24; /* fall through */
  364     case 11: c+=((uint32_t)k[10])<<16; /* fall through */
  365     case 10: c+=((uint32_t)k[9])<<8; /* fall through */
  366     case 9 : c+=k[8]; /* fall through */
  367     case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
  368     case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
  369     case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
  370     case 5 : b+=k[4]; /* fall through */
  371     case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
  372     case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
  373     case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
  374     case 1 : a+=k[0];
  375              break;
  376     case 0 : return c;
  377     }
  378   }
  379 
  380   final(a,b,c);
  381   return c;
  382 }