"Fossies" - the Fresh Open Source Software Archive

Member "memcached-1.6.15/jenkins_hash.c" (16 Jul 2020, 14709 Bytes) of package /linux/www/memcached-1.6.15.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "jenkins_hash.c" see the Fossies "Dox" file reference documentation.

    1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
    2 /*
    3  * Hash table
    4  *
    5  * The hash function used here is by Bob Jenkins, 1996:
    6  *    <http://burtleburtle.net/bob/hash/doobs.html>
    7  *       "By Bob Jenkins, 1996.  bob_jenkins@burtleburtle.net.
    8  *       You may use this code any way you wish, private, educational,
    9  *       or commercial.  It's free."
   10  *
   11  */
   12 #include "memcached.h"
   13 #include "jenkins_hash.h"
   14 
   15 /*
   16  * Since the hash function does bit manipulation, it needs to know
   17  * whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
   18  * are set in the configure script.
   19  */
   20 #if ENDIAN_BIG == 1
   21 # define HASH_LITTLE_ENDIAN 0
   22 # define HASH_BIG_ENDIAN 1
   23 #else
   24 # if ENDIAN_LITTLE == 1
   25 #  define HASH_LITTLE_ENDIAN 1
   26 #  define HASH_BIG_ENDIAN 0
   27 # else
   28 #  define HASH_LITTLE_ENDIAN 0
   29 #  define HASH_BIG_ENDIAN 0
   30 # endif
   31 #endif
   32 
   33 #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
   34 
   35 /*
   36 -------------------------------------------------------------------------------
   37 mix -- mix 3 32-bit values reversibly.
   38 
   39 This is reversible, so any information in (a,b,c) before mix() is
   40 still in (a,b,c) after mix().
   41 
   42 If four pairs of (a,b,c) inputs are run through mix(), or through
   43 mix() in reverse, there are at least 32 bits of the output that
   44 are sometimes the same for one pair and different for another pair.
   45 This was tested for:
   46 * pairs that differed by one bit, by two bits, in any combination
   47   of top bits of (a,b,c), or in any combination of bottom bits of
   48   (a,b,c).
   49 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
   50   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
   51   is commonly produced by subtraction) look like a single 1-bit
   52   difference.
   53 * the base values were pseudorandom, all zero but one bit set, or
   54   all zero plus a counter that starts at zero.
   55 
   56 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
   57 satisfy this are
   58     4  6  8 16 19  4
   59     9 15  3 18 27 15
   60    14  9  3  7 17  3
   61 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
   62 for "differ" defined as + with a one-bit base and a two-bit delta.  I
   63 used http://burtleburtle.net/bob/hash/avalanche.html to choose
   64 the operations, constants, and arrangements of the variables.
   65 
   66 This does not achieve avalanche.  There are input bits of (a,b,c)
   67 that fail to affect some output bits of (a,b,c), especially of a.  The
   68 most thoroughly mixed value is c, but it doesn't really even achieve
   69 avalanche in c.
   70 
   71 This allows some parallelism.  Read-after-writes are good at doubling
   72 the number of bits affected, so the goal of mixing pulls in the opposite
   73 direction as the goal of parallelism.  I did what I could.  Rotates
   74 seem to cost as much as shifts on every machine I could lay my hands
   75 on, and rotates are much kinder to the top and bottom bits, so I used
   76 rotates.
   77 -------------------------------------------------------------------------------
   78 */
   79 #define mix(a,b,c) \
   80 { \
   81   a -= c;  a ^= rot(c, 4);  c += b; \
   82   b -= a;  b ^= rot(a, 6);  a += c; \
   83   c -= b;  c ^= rot(b, 8);  b += a; \
   84   a -= c;  a ^= rot(c,16);  c += b; \
   85   b -= a;  b ^= rot(a,19);  a += c; \
   86   c -= b;  c ^= rot(b, 4);  b += a; \
   87 }
   88 
   89 /*
   90 -------------------------------------------------------------------------------
   91 final -- final mixing of 3 32-bit values (a,b,c) into c
   92 
   93 Pairs of (a,b,c) values differing in only a few bits will usually
   94 produce values of c that look totally different.  This was tested for
   95 * pairs that differed by one bit, by two bits, in any combination
   96   of top bits of (a,b,c), or in any combination of bottom bits of
   97   (a,b,c).
   98 * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
   99   the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
  100   is commonly produced by subtraction) look like a single 1-bit
  101   difference.
  102 * the base values were pseudorandom, all zero but one bit set, or
  103   all zero plus a counter that starts at zero.
  104 
  105 These constants passed:
  106  14 11 25 16 4 14 24
  107  12 14 25 16 4 14 24
  108 and these came close:
  109   4  8 15 26 3 22 24
  110  10  8 15 26 3 22 24
  111  11  8 15 26 3 22 24
  112 -------------------------------------------------------------------------------
  113 */
  114 #define final(a,b,c) \
  115 { \
  116   c ^= b; c -= rot(b,14); \
  117   a ^= c; a -= rot(c,11); \
  118   b ^= a; b -= rot(a,25); \
  119   c ^= b; c -= rot(b,16); \
  120   a ^= c; a -= rot(c,4);  \
  121   b ^= a; b -= rot(a,14); \
  122   c ^= b; c -= rot(b,24); \
  123 }
  124 
  125 #if HASH_LITTLE_ENDIAN == 1
  126 uint32_t jenkins_hash(
  127   const void *key,       /* the key to hash */
  128   size_t      length)    /* length of the key */
  129 {
  130   uint32_t a,b,c;                                          /* internal state */
  131   union { const void *ptr; size_t i; } u;     /* needed for Mac Powerbook G4 */
  132 
  133   /* Set up the internal state */
  134   a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;
  135 
  136   u.ptr = key;
  137   if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
  138     const uint32_t *k = key;                           /* read 32-bit chunks */
  139 #ifdef VALGRIND
  140     const uint8_t  *k8;
  141 #endif /* ifdef VALGRIND */
  142 
  143     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
  144     while (length > 12)
  145     {
  146       a += k[0];
  147       b += k[1];
  148       c += k[2];
  149       mix(a,b,c);
  150       length -= 12;
  151       k += 3;
  152     }
  153 
  154     /*----------------------------- handle the last (probably partial) block */
  155     /*
  156      * "k[2]&0xffffff" actually reads beyond the end of the string, but
  157      * then masks off the part it's not allowed to read.  Because the
  158      * string is aligned, the masked-off tail is in the same word as the
  159      * rest of the string.  Every machine with memory protection I've seen
  160      * does it on word boundaries, so is OK with this.  But VALGRIND will
  161      * still catch it and complain.  The masking trick does make the hash
  162      * noticeably faster for short strings (like English words).
  163      */
  164 #ifndef VALGRIND
  165 
  166     switch(length)
  167     {
  168     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  169     case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
  170     case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
  171     case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
  172     case 8 : b+=k[1]; a+=k[0]; break;
  173     case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
  174     case 6 : b+=k[1]&0xffff; a+=k[0]; break;
  175     case 5 : b+=k[1]&0xff; a+=k[0]; break;
  176     case 4 : a+=k[0]; break;
  177     case 3 : a+=k[0]&0xffffff; break;
  178     case 2 : a+=k[0]&0xffff; break;
  179     case 1 : a+=k[0]&0xff; break;
  180     case 0 : return c;  /* zero length strings require no mixing */
  181     }
  182 
  183 #else /* make valgrind happy */
  184 
  185     k8 = (const uint8_t *)k;
  186     switch(length)
  187     {
  188     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  189     case 11: c+=((uint32_t)k8[10])<<16;  /* fall through */
  190     case 10: c+=((uint32_t)k8[9])<<8;    /* fall through */
  191     case 9 : c+=k8[8];                   /* fall through */
  192     case 8 : b+=k[1]; a+=k[0]; break;
  193     case 7 : b+=((uint32_t)k8[6])<<16;   /* fall through */
  194     case 6 : b+=((uint32_t)k8[5])<<8;    /* fall through */
  195     case 5 : b+=k8[4];                   /* fall through */
  196     case 4 : a+=k[0]; break;
  197     case 3 : a+=((uint32_t)k8[2])<<16;   /* fall through */
  198     case 2 : a+=((uint32_t)k8[1])<<8;    /* fall through */
  199     case 1 : a+=k8[0]; break;
  200     case 0 : return c;  /* zero length strings require no mixing */
  201     }
  202 
  203 #endif /* !valgrind */
  204 
  205   } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
  206     const uint16_t *k = key;                           /* read 16-bit chunks */
  207     const uint8_t  *k8;
  208 
  209     /*--------------- all but last block: aligned reads and different mixing */
  210     while (length > 12)
  211     {
  212       a += k[0] + (((uint32_t)k[1])<<16);
  213       b += k[2] + (((uint32_t)k[3])<<16);
  214       c += k[4] + (((uint32_t)k[5])<<16);
  215       mix(a,b,c);
  216       length -= 12;
  217       k += 6;
  218     }
  219 
  220     /*----------------------------- handle the last (probably partial) block */
  221     k8 = (const uint8_t *)k;
  222     switch(length)
  223     {
  224     case 12: c+=k[4]+(((uint32_t)k[5])<<16);
  225              b+=k[2]+(((uint32_t)k[3])<<16);
  226              a+=k[0]+(((uint32_t)k[1])<<16);
  227              break;
  228     case 11: c+=((uint32_t)k8[10])<<16;     /* @fallthrough */
  229     case 10: c+=k[4];                       /* @fallthrough@ */
  230              b+=k[2]+(((uint32_t)k[3])<<16);
  231              a+=k[0]+(((uint32_t)k[1])<<16);
  232              break;
  233     case 9 : c+=k8[8];                      /* @fallthrough */
  234     case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
  235              a+=k[0]+(((uint32_t)k[1])<<16);
  236              break;
  237     case 7 : b+=((uint32_t)k8[6])<<16;      /* @fallthrough */
  238     case 6 : b+=k[2];
  239              a+=k[0]+(((uint32_t)k[1])<<16);
  240              break;
  241     case 5 : b+=k8[4];                      /* @fallthrough */
  242     case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
  243              break;
  244     case 3 : a+=((uint32_t)k8[2])<<16;      /* @fallthrough */
  245     case 2 : a+=k[0];
  246              break;
  247     case 1 : a+=k8[0];
  248              break;
  249     case 0 : return c;  /* zero length strings require no mixing */
  250     }
  251 
  252   } else {                        /* need to read the key one byte at a time */
  253     const uint8_t *k = key;
  254 
  255     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
  256     while (length > 12)
  257     {
  258       a += k[0];
  259       a += ((uint32_t)k[1])<<8;
  260       a += ((uint32_t)k[2])<<16;
  261       a += ((uint32_t)k[3])<<24;
  262       b += k[4];
  263       b += ((uint32_t)k[5])<<8;
  264       b += ((uint32_t)k[6])<<16;
  265       b += ((uint32_t)k[7])<<24;
  266       c += k[8];
  267       c += ((uint32_t)k[9])<<8;
  268       c += ((uint32_t)k[10])<<16;
  269       c += ((uint32_t)k[11])<<24;
  270       mix(a,b,c);
  271       length -= 12;
  272       k += 12;
  273     }
  274 
  275     /*-------------------------------- last block: affect all 32 bits of (c) */
  276     switch(length)                   /* all the case statements fall through */
  277     {
  278     case 12: c+=((uint32_t)k[11])<<24;
  279     case 11: c+=((uint32_t)k[10])<<16;
  280     case 10: c+=((uint32_t)k[9])<<8;
  281     case 9 : c+=k[8];
  282     case 8 : b+=((uint32_t)k[7])<<24;
  283     case 7 : b+=((uint32_t)k[6])<<16;
  284     case 6 : b+=((uint32_t)k[5])<<8;
  285     case 5 : b+=k[4];
  286     case 4 : a+=((uint32_t)k[3])<<24;
  287     case 3 : a+=((uint32_t)k[2])<<16;
  288     case 2 : a+=((uint32_t)k[1])<<8;
  289     case 1 : a+=k[0];
  290              break;
  291     case 0 : return c;  /* zero length strings require no mixing */
  292     }
  293   }
  294 
  295   final(a,b,c);
  296   return c;             /* zero length strings require no mixing */
  297 }
  298 
  299 #elif HASH_BIG_ENDIAN == 1
  300 /*
  301  * hashbig():
  302  * This is the same as hashword() on big-endian machines.  It is different
  303  * from hashlittle() on all machines.  hashbig() takes advantage of
  304  * big-endian byte ordering.
  305  */
  306 uint32_t jenkins_hash( const void *key, size_t length)
  307 {
  308   uint32_t a,b,c;
  309   union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
  310 
  311   /* Set up the internal state */
  312   a = b = c = 0xdeadbeef + ((uint32_t)length) + 0;
  313 
  314   u.ptr = key;
  315   if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
  316     const uint32_t *k = key;                           /* read 32-bit chunks */
  317 #ifdef VALGRIND
  318     const uint8_t  *k8;
  319 #endif /* ifdef VALGRIND */
  320 
  321     /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
  322     while (length > 12)
  323     {
  324       a += k[0];
  325       b += k[1];
  326       c += k[2];
  327       mix(a,b,c);
  328       length -= 12;
  329       k += 3;
  330     }
  331 
  332     /*----------------------------- handle the last (probably partial) block */
  333     /*
  334      * "k[2]<<8" actually reads beyond the end of the string, but
  335      * then shifts out the part it's not allowed to read.  Because the
  336      * string is aligned, the illegal read is in the same word as the
  337      * rest of the string.  Every machine with memory protection I've seen
  338      * does it on word boundaries, so is OK with this.  But VALGRIND will
  339      * still catch it and complain.  The masking trick does make the hash
  340      * noticeably faster for short strings (like English words).
  341      */
  342 #ifndef VALGRIND
  343 
  344     switch(length)
  345     {
  346     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  347     case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
  348     case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
  349     case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
  350     case 8 : b+=k[1]; a+=k[0]; break;
  351     case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
  352     case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
  353     case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
  354     case 4 : a+=k[0]; break;
  355     case 3 : a+=k[0]&0xffffff00; break;
  356     case 2 : a+=k[0]&0xffff0000; break;
  357     case 1 : a+=k[0]&0xff000000; break;
  358     case 0 : return c;              /* zero length strings require no mixing */
  359     }
  360 
  361 #else  /* make valgrind happy */
  362 
  363     k8 = (const uint8_t *)k;
  364     switch(length)                   /* all the case statements fall through */
  365     {
  366     case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
  367     case 11: c+=((uint32_t)k8[10])<<8;  /* fall through */
  368     case 10: c+=((uint32_t)k8[9])<<16;  /* fall through */
  369     case 9 : c+=((uint32_t)k8[8])<<24;  /* fall through */
  370     case 8 : b+=k[1]; a+=k[0]; break;
  371     case 7 : b+=((uint32_t)k8[6])<<8;   /* fall through */
  372     case 6 : b+=((uint32_t)k8[5])<<16;  /* fall through */
  373     case 5 : b+=((uint32_t)k8[4])<<24;  /* fall through */
  374     case 4 : a+=k[0]; break;
  375     case 3 : a+=((uint32_t)k8[2])<<8;   /* fall through */
  376     case 2 : a+=((uint32_t)k8[1])<<16;  /* fall through */
  377     case 1 : a+=((uint32_t)k8[0])<<24; break;
  378     case 0 : return c;
  379     }
  380 
  381 #endif /* !VALGRIND */
  382 
  383   } else {                        /* need to read the key one byte at a time */
  384     const uint8_t *k = key;
  385 
  386     /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
  387     while (length > 12)
  388     {
  389       a += ((uint32_t)k[0])<<24;
  390       a += ((uint32_t)k[1])<<16;
  391       a += ((uint32_t)k[2])<<8;
  392       a += ((uint32_t)k[3]);
  393       b += ((uint32_t)k[4])<<24;
  394       b += ((uint32_t)k[5])<<16;
  395       b += ((uint32_t)k[6])<<8;
  396       b += ((uint32_t)k[7]);
  397       c += ((uint32_t)k[8])<<24;
  398       c += ((uint32_t)k[9])<<16;
  399       c += ((uint32_t)k[10])<<8;
  400       c += ((uint32_t)k[11]);
  401       mix(a,b,c);
  402       length -= 12;
  403       k += 12;
  404     }
  405 
  406     /*-------------------------------- last block: affect all 32 bits of (c) */
  407     switch(length)                   /* all the case statements fall through */
  408     {
  409     case 12: c+=k[11];
  410     case 11: c+=((uint32_t)k[10])<<8;
  411     case 10: c+=((uint32_t)k[9])<<16;
  412     case 9 : c+=((uint32_t)k[8])<<24;
  413     case 8 : b+=k[7];
  414     case 7 : b+=((uint32_t)k[6])<<8;
  415     case 6 : b+=((uint32_t)k[5])<<16;
  416     case 5 : b+=((uint32_t)k[4])<<24;
  417     case 4 : a+=k[3];
  418     case 3 : a+=((uint32_t)k[2])<<8;
  419     case 2 : a+=((uint32_t)k[1])<<16;
  420     case 1 : a+=((uint32_t)k[0])<<24;
  421              break;
  422     case 0 : return c;
  423     }
  424   }
  425 
  426   final(a,b,c);
  427   return c;
  428 }
  429 #else /* HASH_XXX_ENDIAN == 1 */
  430 #error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
  431 #endif /* HASH_XXX_ENDIAN == 1 */