"Fossies" - the Fresh Open Source Software Archive

Member "memcached-1.6.9/items.c" (21 Nov 2020, 63824 Bytes) of package /linux/www/memcached-1.6.9.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "items.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.6.8_vs_1.6.9.

    1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
    2 #include "memcached.h"
    3 #include "bipbuffer.h"
    4 #include "slab_automove.h"
    5 #include "storage.h"
    6 #ifdef EXTSTORE
    7 #include "slab_automove_extstore.h"
    8 #endif
    9 #include <sys/stat.h>
   10 #include <sys/socket.h>
   11 #include <sys/resource.h>
   12 #include <fcntl.h>
   13 #include <netinet/in.h>
   14 #include <errno.h>
   15 #include <stdlib.h>
   16 #include <stdio.h>
   17 #include <signal.h>
   18 #include <string.h>
   19 #include <time.h>
   20 #include <assert.h>
   21 #include <unistd.h>
   22 #include <poll.h>
   23 
   24 /* Forward Declarations */
   25 static void item_link_q(item *it);
   26 static void item_unlink_q(item *it);
   27 
   28 static unsigned int lru_type_map[4] = {HOT_LRU, WARM_LRU, COLD_LRU, TEMP_LRU};
   29 
   30 #define LARGEST_ID POWER_LARGEST
   31 typedef struct {
   32     uint64_t evicted;
   33     uint64_t evicted_nonzero;
   34     uint64_t reclaimed;
   35     uint64_t outofmemory;
   36     uint64_t tailrepairs;
   37     uint64_t expired_unfetched; /* items reclaimed but never touched */
   38     uint64_t evicted_unfetched; /* items evicted but never touched */
   39     uint64_t evicted_active; /* items evicted that should have been shuffled */
   40     uint64_t crawler_reclaimed;
   41     uint64_t crawler_items_checked;
   42     uint64_t lrutail_reflocked;
   43     uint64_t moves_to_cold;
   44     uint64_t moves_to_warm;
   45     uint64_t moves_within_lru;
   46     uint64_t direct_reclaims;
   47     uint64_t hits_to_hot;
   48     uint64_t hits_to_warm;
   49     uint64_t hits_to_cold;
   50     uint64_t hits_to_temp;
   51     uint64_t mem_requested;
   52     rel_time_t evicted_time;
   53 } itemstats_t;
   54 
   55 static item *heads[LARGEST_ID];
   56 static item *tails[LARGEST_ID];
   57 static itemstats_t itemstats[LARGEST_ID];
   58 static unsigned int sizes[LARGEST_ID];
   59 static uint64_t sizes_bytes[LARGEST_ID];
   60 static unsigned int *stats_sizes_hist = NULL;
   61 static uint64_t stats_sizes_cas_min = 0;
   62 static int stats_sizes_buckets = 0;
   63 static uint64_t cas_id = 0;
   64 
   65 static volatile int do_run_lru_maintainer_thread = 0;
   66 static int lru_maintainer_initialized = 0;
   67 static pthread_mutex_t lru_maintainer_lock = PTHREAD_MUTEX_INITIALIZER;
   68 static pthread_mutex_t cas_id_lock = PTHREAD_MUTEX_INITIALIZER;
   69 static pthread_mutex_t stats_sizes_lock = PTHREAD_MUTEX_INITIALIZER;
   70 
   71 void item_stats_reset(void) {
   72     int i;
   73     for (i = 0; i < LARGEST_ID; i++) {
   74         pthread_mutex_lock(&lru_locks[i]);
   75         memset(&itemstats[i], 0, sizeof(itemstats_t));
   76         pthread_mutex_unlock(&lru_locks[i]);
   77     }
   78 }
   79 
   80 /* called with class lru lock held */
   81 void do_item_stats_add_crawl(const int i, const uint64_t reclaimed,
   82         const uint64_t unfetched, const uint64_t checked) {
   83     itemstats[i].crawler_reclaimed += reclaimed;
   84     itemstats[i].expired_unfetched += unfetched;
   85     itemstats[i].crawler_items_checked += checked;
   86 }
   87 
   88 typedef struct _lru_bump_buf {
   89     struct _lru_bump_buf *prev;
   90     struct _lru_bump_buf *next;
   91     pthread_mutex_t mutex;
   92     bipbuf_t *buf;
   93     uint64_t dropped;
   94 } lru_bump_buf;
   95 
   96 typedef struct {
   97     item *it;
   98     uint32_t hv;
   99 } lru_bump_entry;
  100 
  101 static lru_bump_buf *bump_buf_head = NULL;
  102 static lru_bump_buf *bump_buf_tail = NULL;
  103 static pthread_mutex_t bump_buf_lock = PTHREAD_MUTEX_INITIALIZER;
  104 /* TODO: tunable? Need bench results */
  105 #define LRU_BUMP_BUF_SIZE 8192
  106 
  107 static bool lru_bump_async(lru_bump_buf *b, item *it, uint32_t hv);
  108 static uint64_t lru_total_bumps_dropped(void);
  109 
  110 /* Get the next CAS id for a new item. */
  111 /* TODO: refactor some atomics for this. */
  112 uint64_t get_cas_id(void) {
  113     pthread_mutex_lock(&cas_id_lock);
  114     uint64_t next_id = ++cas_id;
  115     pthread_mutex_unlock(&cas_id_lock);
  116     return next_id;
  117 }
  118 
  119 void set_cas_id(uint64_t new_cas) {
  120     pthread_mutex_lock(&cas_id_lock);
  121     cas_id = new_cas;
  122     pthread_mutex_unlock(&cas_id_lock);
  123 }
  124 
  125 int item_is_flushed(item *it) {
  126     rel_time_t oldest_live = settings.oldest_live;
  127     uint64_t cas = ITEM_get_cas(it);
  128     uint64_t oldest_cas = settings.oldest_cas;
  129     if (oldest_live == 0 || oldest_live > current_time)
  130         return 0;
  131     if ((it->time <= oldest_live)
  132             || (oldest_cas != 0 && cas != 0 && cas < oldest_cas)) {
  133         return 1;
  134     }
  135     return 0;
  136 }
  137 
  138 /* must be locked before call */
  139 unsigned int do_get_lru_size(uint32_t id) {
  140     return sizes[id];
  141 }
  142 
  143 /* Enable this for reference-count debugging. */
  144 #if 0
  145 # define DEBUG_REFCNT(it,op) \
  146                 fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
  147                         it, op, it->refcount, \
  148                         (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
  149                         (it->it_flags & ITEM_SLABBED) ? 'S' : ' ')
  150 #else
  151 # define DEBUG_REFCNT(it,op) while(0)
  152 #endif
  153 
  154 /**
  155  * Generates the variable-sized part of the header for an object.
  156  *
  157  * nkey    - The length of the key
  158  * flags   - key flags
  159  * nbytes  - Number of bytes to hold value and addition CRLF terminator
  160  * suffix  - Buffer for the "VALUE" line suffix (flags, size).
  161  * nsuffix - The length of the suffix is stored here.
  162  *
  163  * Returns the total size of the header.
  164  */
  165 static size_t item_make_header(const uint8_t nkey, const unsigned int flags, const int nbytes,
  166                      char *suffix, uint8_t *nsuffix) {
  167     if (flags == 0) {
  168         *nsuffix = 0;
  169     } else {
  170         *nsuffix = sizeof(flags);
  171     }
  172     return sizeof(item) + nkey + *nsuffix + nbytes;
  173 }
  174 
  175 item *do_item_alloc_pull(const size_t ntotal, const unsigned int id) {
  176     item *it = NULL;
  177     int i;
  178     /* If no memory is available, attempt a direct LRU juggle/eviction */
  179     /* This is a race in order to simplify lru_pull_tail; in cases where
  180      * locked items are on the tail, you want them to fall out and cause
  181      * occasional OOM's, rather than internally work around them.
  182      * This also gives one fewer code path for slab alloc/free
  183      */
  184     for (i = 0; i < 10; i++) {
  185         /* Try to reclaim memory first */
  186         if (!settings.lru_segmented) {
  187             lru_pull_tail(id, COLD_LRU, 0, 0, 0, NULL);
  188         }
  189         it = slabs_alloc(ntotal, id, 0);
  190 
  191         if (it == NULL) {
  192             // We send '0' in for "total_bytes" as this routine is always
  193             // pulling to evict, or forcing HOT -> COLD migration.
  194             // As of this writing, total_bytes isn't at all used with COLD_LRU.
  195             if (lru_pull_tail(id, COLD_LRU, 0, LRU_PULL_EVICT, 0, NULL) <= 0) {
  196                 if (settings.lru_segmented) {
  197                     lru_pull_tail(id, HOT_LRU, 0, 0, 0, NULL);
  198                 } else {
  199                     break;
  200                 }
  201             }
  202         } else {
  203             break;
  204         }
  205     }
  206 
  207     if (i > 0) {
  208         pthread_mutex_lock(&lru_locks[id]);
  209         itemstats[id].direct_reclaims += i;
  210         pthread_mutex_unlock(&lru_locks[id]);
  211     }
  212 
  213     return it;
  214 }
  215 
  216 /* Chain another chunk onto this chunk. */
  217 /* slab mover: if it finds a chunk without ITEM_CHUNK flag, and no ITEM_LINKED
  218  * flag, it counts as busy and skips.
  219  * I think it might still not be safe to do linking outside of the slab lock
  220  */
  221 item_chunk *do_item_alloc_chunk(item_chunk *ch, const size_t bytes_remain) {
  222     // TODO: Should be a cleaner way of finding real size with slabber calls
  223     size_t size = bytes_remain + sizeof(item_chunk);
  224     if (size > settings.slab_chunk_size_max)
  225         size = settings.slab_chunk_size_max;
  226     unsigned int id = slabs_clsid(size);
  227 
  228     item_chunk *nch = (item_chunk *) do_item_alloc_pull(size, id);
  229     if (nch == NULL)
  230         return NULL;
  231 
  232     // link in.
  233     // ITEM_CHUNK[ED] bits need to be protected by the slabs lock.
  234     slabs_mlock();
  235     nch->head = ch->head;
  236     ch->next = nch;
  237     nch->prev = ch;
  238     nch->next = 0;
  239     nch->used = 0;
  240     nch->slabs_clsid = id;
  241     nch->size = size - sizeof(item_chunk);
  242     nch->it_flags |= ITEM_CHUNK;
  243     slabs_munlock();
  244     return nch;
  245 }
  246 
  247 item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
  248                     const rel_time_t exptime, const int nbytes) {
  249     uint8_t nsuffix;
  250     item *it = NULL;
  251     char suffix[40];
  252     // Avoid potential underflows.
  253     if (nbytes < 2)
  254         return 0;
  255 
  256     size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
  257     if (settings.use_cas) {
  258         ntotal += sizeof(uint64_t);
  259     }
  260 
  261     unsigned int id = slabs_clsid(ntotal);
  262     unsigned int hdr_id = 0;
  263     if (id == 0)
  264         return 0;
  265 
  266     /* This is a large item. Allocate a header object now, lazily allocate
  267      *  chunks while reading the upload.
  268      */
  269     if (ntotal > settings.slab_chunk_size_max) {
  270         /* We still link this item into the LRU for the larger slab class, but
  271          * we're pulling a header from an entirely different slab class. The
  272          * free routines handle large items specifically.
  273          */
  274         int htotal = nkey + 1 + nsuffix + sizeof(item) + sizeof(item_chunk);
  275         if (settings.use_cas) {
  276             htotal += sizeof(uint64_t);
  277         }
  278 #ifdef NEED_ALIGN
  279         // header chunk needs to be padded on some systems
  280         int remain = htotal % 8;
  281         if (remain != 0) {
  282             htotal += 8 - remain;
  283         }
  284 #endif
  285         hdr_id = slabs_clsid(htotal);
  286         it = do_item_alloc_pull(htotal, hdr_id);
  287         /* setting ITEM_CHUNKED is fine here because we aren't LINKED yet. */
  288         if (it != NULL)
  289             it->it_flags |= ITEM_CHUNKED;
  290     } else {
  291         it = do_item_alloc_pull(ntotal, id);
  292     }
  293 
  294     if (it == NULL) {
  295         pthread_mutex_lock(&lru_locks[id]);
  296         itemstats[id].outofmemory++;
  297         pthread_mutex_unlock(&lru_locks[id]);
  298         return NULL;
  299     }
  300 
  301     assert(it->it_flags == 0 || it->it_flags == ITEM_CHUNKED);
  302     //assert(it != heads[id]);
  303 
  304     /* Refcount is seeded to 1 by slabs_alloc() */
  305     it->next = it->prev = 0;
  306 
  307     /* Items are initially loaded into the HOT_LRU. This is '0' but I want at
  308      * least a note here. Compiler (hopefully?) optimizes this out.
  309      */
  310     if (settings.temp_lru &&
  311             exptime - current_time <= settings.temporary_ttl) {
  312         id |= TEMP_LRU;
  313     } else if (settings.lru_segmented) {
  314         id |= HOT_LRU;
  315     } else {
  316         /* There is only COLD in compat-mode */
  317         id |= COLD_LRU;
  318     }
  319     it->slabs_clsid = id;
  320 
  321     DEBUG_REFCNT(it, '*');
  322     it->it_flags |= settings.use_cas ? ITEM_CAS : 0;
  323     it->it_flags |= nsuffix != 0 ? ITEM_CFLAGS : 0;
  324     it->nkey = nkey;
  325     it->nbytes = nbytes;
  326     memcpy(ITEM_key(it), key, nkey);
  327     it->exptime = exptime;
  328     if (nsuffix > 0) {
  329         memcpy(ITEM_suffix(it), &flags, sizeof(flags));
  330     }
  331 
  332     /* Initialize internal chunk. */
  333     if (it->it_flags & ITEM_CHUNKED) {
  334         item_chunk *chunk = (item_chunk *) ITEM_schunk(it);
  335 
  336         chunk->next = 0;
  337         chunk->prev = 0;
  338         chunk->used = 0;
  339         chunk->size = 0;
  340         chunk->head = it;
  341         chunk->orig_clsid = hdr_id;
  342     }
  343     it->h_next = 0;
  344 
  345     return it;
  346 }
  347 
  348 void item_free(item *it) {
  349     size_t ntotal = ITEM_ntotal(it);
  350     unsigned int clsid;
  351     assert((it->it_flags & ITEM_LINKED) == 0);
  352     assert(it != heads[it->slabs_clsid]);
  353     assert(it != tails[it->slabs_clsid]);
  354     assert(it->refcount == 0);
  355 
  356     /* so slab size changer can tell later if item is already free or not */
  357     clsid = ITEM_clsid(it);
  358     DEBUG_REFCNT(it, 'F');
  359     slabs_free(it, ntotal, clsid);
  360 }
  361 
  362 /**
  363  * Returns true if an item will fit in the cache (its size does not exceed
  364  * the maximum for a cache entry.)
  365  */
  366 bool item_size_ok(const size_t nkey, const int flags, const int nbytes) {
  367     char prefix[40];
  368     uint8_t nsuffix;
  369     if (nbytes < 2)
  370         return false;
  371 
  372     size_t ntotal = item_make_header(nkey + 1, flags, nbytes,
  373                                      prefix, &nsuffix);
  374     if (settings.use_cas) {
  375         ntotal += sizeof(uint64_t);
  376     }
  377 
  378     return slabs_clsid(ntotal) != 0;
  379 }
  380 
  381 /* fixing stats/references during warm start */
  382 void do_item_link_fixup(item *it) {
  383     item **head, **tail;
  384     int ntotal = ITEM_ntotal(it);
  385     uint32_t hv = hash(ITEM_key(it), it->nkey);
  386     assoc_insert(it, hv);
  387 
  388     head = &heads[it->slabs_clsid];
  389     tail = &tails[it->slabs_clsid];
  390     if (it->prev == 0 && *head == 0) *head = it;
  391     if (it->next == 0 && *tail == 0) *tail = it;
  392     sizes[it->slabs_clsid]++;
  393     sizes_bytes[it->slabs_clsid] += ntotal;
  394 
  395     STATS_LOCK();
  396     stats_state.curr_bytes += ntotal;
  397     stats_state.curr_items += 1;
  398     stats.total_items += 1;
  399     STATS_UNLOCK();
  400 
  401     item_stats_sizes_add(it);
  402 
  403     return;
  404 }
  405 
  406 static void do_item_link_q(item *it) { /* item is the new head */
  407     item **head, **tail;
  408     assert((it->it_flags & ITEM_SLABBED) == 0);
  409 
  410     head = &heads[it->slabs_clsid];
  411     tail = &tails[it->slabs_clsid];
  412     assert(it != *head);
  413     assert((*head && *tail) || (*head == 0 && *tail == 0));
  414     it->prev = 0;
  415     it->next = *head;
  416     if (it->next) it->next->prev = it;
  417     *head = it;
  418     if (*tail == 0) *tail = it;
  419     sizes[it->slabs_clsid]++;
  420 #ifdef EXTSTORE
  421     if (it->it_flags & ITEM_HDR) {
  422         sizes_bytes[it->slabs_clsid] += (ITEM_ntotal(it) - it->nbytes) + sizeof(item_hdr);
  423     } else {
  424         sizes_bytes[it->slabs_clsid] += ITEM_ntotal(it);
  425     }
  426 #else
  427     sizes_bytes[it->slabs_clsid] += ITEM_ntotal(it);
  428 #endif
  429 
  430     return;
  431 }
  432 
  433 static void item_link_q(item *it) {
  434     pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
  435     do_item_link_q(it);
  436     pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
  437 }
  438 
  439 static void item_link_q_warm(item *it) {
  440     pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
  441     do_item_link_q(it);
  442     itemstats[it->slabs_clsid].moves_to_warm++;
  443     pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
  444 }
  445 
  446 static void do_item_unlink_q(item *it) {
  447     item **head, **tail;
  448     head = &heads[it->slabs_clsid];
  449     tail = &tails[it->slabs_clsid];
  450 
  451     if (*head == it) {
  452         assert(it->prev == 0);
  453         *head = it->next;
  454     }
  455     if (*tail == it) {
  456         assert(it->next == 0);
  457         *tail = it->prev;
  458     }
  459     assert(it->next != it);
  460     assert(it->prev != it);
  461 
  462     if (it->next) it->next->prev = it->prev;
  463     if (it->prev) it->prev->next = it->next;
  464     sizes[it->slabs_clsid]--;
  465 #ifdef EXTSTORE
  466     if (it->it_flags & ITEM_HDR) {
  467         sizes_bytes[it->slabs_clsid] -= (ITEM_ntotal(it) - it->nbytes) + sizeof(item_hdr);
  468     } else {
  469         sizes_bytes[it->slabs_clsid] -= ITEM_ntotal(it);
  470     }
  471 #else
  472     sizes_bytes[it->slabs_clsid] -= ITEM_ntotal(it);
  473 #endif
  474 
  475     return;
  476 }
  477 
  478 static void item_unlink_q(item *it) {
  479     pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
  480     do_item_unlink_q(it);
  481     pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
  482 }
  483 
  484 int do_item_link(item *it, const uint32_t hv) {
  485     MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
  486     assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
  487     it->it_flags |= ITEM_LINKED;
  488     it->time = current_time;
  489 
  490     STATS_LOCK();
  491     stats_state.curr_bytes += ITEM_ntotal(it);
  492     stats_state.curr_items += 1;
  493     stats.total_items += 1;
  494     STATS_UNLOCK();
  495 
  496     /* Allocate a new CAS ID on link. */
  497     ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
  498     assoc_insert(it, hv);
  499     item_link_q(it);
  500     refcount_incr(it);
  501     item_stats_sizes_add(it);
  502 
  503     return 1;
  504 }
  505 
  506 void do_item_unlink(item *it, const uint32_t hv) {
  507     MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
  508     if ((it->it_flags & ITEM_LINKED) != 0) {
  509         it->it_flags &= ~ITEM_LINKED;
  510         STATS_LOCK();
  511         stats_state.curr_bytes -= ITEM_ntotal(it);
  512         stats_state.curr_items -= 1;
  513         STATS_UNLOCK();
  514         item_stats_sizes_remove(it);
  515         assoc_delete(ITEM_key(it), it->nkey, hv);
  516         item_unlink_q(it);
  517         do_item_remove(it);
  518     }
  519 }
  520 
  521 /* FIXME: Is it necessary to keep this copy/pasted code? */
  522 void do_item_unlink_nolock(item *it, const uint32_t hv) {
  523     MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
  524     if ((it->it_flags & ITEM_LINKED) != 0) {
  525         it->it_flags &= ~ITEM_LINKED;
  526         STATS_LOCK();
  527         stats_state.curr_bytes -= ITEM_ntotal(it);
  528         stats_state.curr_items -= 1;
  529         STATS_UNLOCK();
  530         item_stats_sizes_remove(it);
  531         assoc_delete(ITEM_key(it), it->nkey, hv);
  532         do_item_unlink_q(it);
  533         do_item_remove(it);
  534     }
  535 }
  536 
  537 void do_item_remove(item *it) {
  538     MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
  539     assert((it->it_flags & ITEM_SLABBED) == 0);
  540     assert(it->refcount > 0);
  541 
  542     if (refcount_decr(it) == 0) {
  543         item_free(it);
  544     }
  545 }
  546 
  547 /* Bump the last accessed time, or relink if we're in compat mode */
  548 void do_item_update(item *it) {
  549     MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
  550 
  551     /* Hits to COLD_LRU immediately move to WARM. */
  552     if (settings.lru_segmented) {
  553         assert((it->it_flags & ITEM_SLABBED) == 0);
  554         if ((it->it_flags & ITEM_LINKED) != 0) {
  555             if (ITEM_lruid(it) == COLD_LRU && (it->it_flags & ITEM_ACTIVE)) {
  556                 it->time = current_time;
  557                 item_unlink_q(it);
  558                 it->slabs_clsid = ITEM_clsid(it);
  559                 it->slabs_clsid |= WARM_LRU;
  560                 it->it_flags &= ~ITEM_ACTIVE;
  561                 item_link_q_warm(it);
  562             } else {
  563                 it->time = current_time;
  564             }
  565         }
  566     } else if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
  567         assert((it->it_flags & ITEM_SLABBED) == 0);
  568 
  569         if ((it->it_flags & ITEM_LINKED) != 0) {
  570             it->time = current_time;
  571             item_unlink_q(it);
  572             item_link_q(it);
  573         }
  574     }
  575 }
  576 
  577 int do_item_replace(item *it, item *new_it, const uint32_t hv) {
  578     MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,
  579                            ITEM_key(new_it), new_it->nkey, new_it->nbytes);
  580     assert((it->it_flags & ITEM_SLABBED) == 0);
  581 
  582     do_item_unlink(it, hv);
  583     return do_item_link(new_it, hv);
  584 }
  585 
  586 /*@null@*/
  587 /* This is walking the line of violating lock order, but I think it's safe.
  588  * If the LRU lock is held, an item in the LRU cannot be wiped and freed.
  589  * The data could possibly be overwritten, but this is only accessing the
  590  * headers.
  591  * It may not be the best idea to leave it like this, but for now it's safe.
  592  */
  593 char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) {
  594     unsigned int memlimit = 2 * 1024 * 1024;   /* 2MB max response size */
  595     char *buffer;
  596     unsigned int bufcurr;
  597     item *it;
  598     unsigned int len;
  599     unsigned int shown = 0;
  600     char key_temp[KEY_MAX_LENGTH + 1];
  601     char temp[512];
  602     unsigned int id = slabs_clsid;
  603     id |= COLD_LRU;
  604 
  605     pthread_mutex_lock(&lru_locks[id]);
  606     it = heads[id];
  607 
  608     buffer = malloc((size_t)memlimit);
  609     if (buffer == 0) {
  610         pthread_mutex_unlock(&lru_locks[id]);
  611         return NULL;
  612     }
  613     bufcurr = 0;
  614 
  615     while (it != NULL && (limit == 0 || shown < limit)) {
  616         assert(it->nkey <= KEY_MAX_LENGTH);
  617         if (it->nbytes == 0 && it->nkey == 0) {
  618             it = it->next;
  619             continue;
  620         }
  621         /* Copy the key since it may not be null-terminated in the struct */
  622         strncpy(key_temp, ITEM_key(it), it->nkey);
  623         key_temp[it->nkey] = 0x00; /* terminate */
  624         len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %llu s]\r\n",
  625                        key_temp, it->nbytes - 2,
  626                        it->exptime == 0 ? 0 :
  627                        (unsigned long long)it->exptime + process_started);
  628         if (bufcurr + len + 6 > memlimit)  /* 6 is END\r\n\0 */
  629             break;
  630         memcpy(buffer + bufcurr, temp, len);
  631         bufcurr += len;
  632         shown++;
  633         it = it->next;
  634     }
  635 
  636     memcpy(buffer + bufcurr, "END\r\n", 6);
  637     bufcurr += 5;
  638 
  639     *bytes = bufcurr;
  640     pthread_mutex_unlock(&lru_locks[id]);
  641     return buffer;
  642 }
  643 
  644 /* With refactoring of the various stats code the automover won't need a
  645  * custom function here.
  646  */
  647 void fill_item_stats_automove(item_stats_automove *am) {
  648     int n;
  649     for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
  650         item_stats_automove *cur = &am[n];
  651 
  652         // outofmemory records into HOT
  653         int i = n | HOT_LRU;
  654         pthread_mutex_lock(&lru_locks[i]);
  655         cur->outofmemory = itemstats[i].outofmemory;
  656         pthread_mutex_unlock(&lru_locks[i]);
  657 
  658         // evictions and tail age are from COLD
  659         i = n | COLD_LRU;
  660         pthread_mutex_lock(&lru_locks[i]);
  661         cur->evicted = itemstats[i].evicted;
  662         if (tails[i]) {
  663             cur->age = current_time - tails[i]->time;
  664         } else {
  665             cur->age = 0;
  666         }
  667         pthread_mutex_unlock(&lru_locks[i]);
  668      }
  669 }
  670 
  671 void item_stats_totals(ADD_STAT add_stats, void *c) {
  672     itemstats_t totals;
  673     memset(&totals, 0, sizeof(itemstats_t));
  674     int n;
  675     for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
  676         int x;
  677         int i;
  678         for (x = 0; x < 4; x++) {
  679             i = n | lru_type_map[x];
  680             pthread_mutex_lock(&lru_locks[i]);
  681             totals.expired_unfetched += itemstats[i].expired_unfetched;
  682             totals.evicted_unfetched += itemstats[i].evicted_unfetched;
  683             totals.evicted_active += itemstats[i].evicted_active;
  684             totals.evicted += itemstats[i].evicted;
  685             totals.reclaimed += itemstats[i].reclaimed;
  686             totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
  687             totals.crawler_items_checked += itemstats[i].crawler_items_checked;
  688             totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
  689             totals.moves_to_cold += itemstats[i].moves_to_cold;
  690             totals.moves_to_warm += itemstats[i].moves_to_warm;
  691             totals.moves_within_lru += itemstats[i].moves_within_lru;
  692             totals.direct_reclaims += itemstats[i].direct_reclaims;
  693             pthread_mutex_unlock(&lru_locks[i]);
  694         }
  695     }
  696     APPEND_STAT("expired_unfetched", "%llu",
  697                 (unsigned long long)totals.expired_unfetched);
  698     APPEND_STAT("evicted_unfetched", "%llu",
  699                 (unsigned long long)totals.evicted_unfetched);
  700     if (settings.lru_maintainer_thread) {
  701         APPEND_STAT("evicted_active", "%llu",
  702                     (unsigned long long)totals.evicted_active);
  703     }
  704     APPEND_STAT("evictions", "%llu",
  705                 (unsigned long long)totals.evicted);
  706     APPEND_STAT("reclaimed", "%llu",
  707                 (unsigned long long)totals.reclaimed);
  708     APPEND_STAT("crawler_reclaimed", "%llu",
  709                 (unsigned long long)totals.crawler_reclaimed);
  710     APPEND_STAT("crawler_items_checked", "%llu",
  711                 (unsigned long long)totals.crawler_items_checked);
  712     APPEND_STAT("lrutail_reflocked", "%llu",
  713                 (unsigned long long)totals.lrutail_reflocked);
  714     if (settings.lru_maintainer_thread) {
  715         APPEND_STAT("moves_to_cold", "%llu",
  716                     (unsigned long long)totals.moves_to_cold);
  717         APPEND_STAT("moves_to_warm", "%llu",
  718                     (unsigned long long)totals.moves_to_warm);
  719         APPEND_STAT("moves_within_lru", "%llu",
  720                     (unsigned long long)totals.moves_within_lru);
  721         APPEND_STAT("direct_reclaims", "%llu",
  722                     (unsigned long long)totals.direct_reclaims);
  723         APPEND_STAT("lru_bumps_dropped", "%llu",
  724                     (unsigned long long)lru_total_bumps_dropped());
  725     }
  726 }
  727 
  728 void item_stats(ADD_STAT add_stats, void *c) {
  729     struct thread_stats thread_stats;
  730     threadlocal_stats_aggregate(&thread_stats);
  731     itemstats_t totals;
  732     int n;
  733     for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
  734         memset(&totals, 0, sizeof(itemstats_t));
  735         int x;
  736         int i;
  737         unsigned int size = 0;
  738         unsigned int age  = 0;
  739         unsigned int age_hot = 0;
  740         unsigned int age_warm = 0;
  741         unsigned int lru_size_map[4];
  742         const char *fmt = "items:%d:%s";
  743         char key_str[STAT_KEY_LEN];
  744         char val_str[STAT_VAL_LEN];
  745         int klen = 0, vlen = 0;
  746         for (x = 0; x < 4; x++) {
  747             i = n | lru_type_map[x];
  748             pthread_mutex_lock(&lru_locks[i]);
  749             totals.evicted += itemstats[i].evicted;
  750             totals.evicted_nonzero += itemstats[i].evicted_nonzero;
  751             totals.outofmemory += itemstats[i].outofmemory;
  752             totals.tailrepairs += itemstats[i].tailrepairs;
  753             totals.reclaimed += itemstats[i].reclaimed;
  754             totals.expired_unfetched += itemstats[i].expired_unfetched;
  755             totals.evicted_unfetched += itemstats[i].evicted_unfetched;
  756             totals.evicted_active += itemstats[i].evicted_active;
  757             totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
  758             totals.crawler_items_checked += itemstats[i].crawler_items_checked;
  759             totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
  760             totals.moves_to_cold += itemstats[i].moves_to_cold;
  761             totals.moves_to_warm += itemstats[i].moves_to_warm;
  762             totals.moves_within_lru += itemstats[i].moves_within_lru;
  763             totals.direct_reclaims += itemstats[i].direct_reclaims;
  764             totals.mem_requested += sizes_bytes[i];
  765             size += sizes[i];
  766             lru_size_map[x] = sizes[i];
  767             if (lru_type_map[x] == COLD_LRU && tails[i] != NULL) {
  768                 age = current_time - tails[i]->time;
  769             } else if (lru_type_map[x] == HOT_LRU && tails[i] != NULL) {
  770                 age_hot = current_time - tails[i]->time;
  771             } else if (lru_type_map[x] == WARM_LRU && tails[i] != NULL) {
  772                 age_warm = current_time - tails[i]->time;
  773             }
  774             if (lru_type_map[x] == COLD_LRU)
  775                 totals.evicted_time = itemstats[i].evicted_time;
  776             switch (lru_type_map[x]) {
  777                 case HOT_LRU:
  778                     totals.hits_to_hot = thread_stats.lru_hits[i];
  779                     break;
  780                 case WARM_LRU:
  781                     totals.hits_to_warm = thread_stats.lru_hits[i];
  782                     break;
  783                 case COLD_LRU:
  784                     totals.hits_to_cold = thread_stats.lru_hits[i];
  785                     break;
  786                 case TEMP_LRU:
  787                     totals.hits_to_temp = thread_stats.lru_hits[i];
  788                     break;
  789             }
  790             pthread_mutex_unlock(&lru_locks[i]);
  791         }
  792         if (size == 0)
  793             continue;
  794         APPEND_NUM_FMT_STAT(fmt, n, "number", "%u", size);
  795         if (settings.lru_maintainer_thread) {
  796             APPEND_NUM_FMT_STAT(fmt, n, "number_hot", "%u", lru_size_map[0]);
  797             APPEND_NUM_FMT_STAT(fmt, n, "number_warm", "%u", lru_size_map[1]);
  798             APPEND_NUM_FMT_STAT(fmt, n, "number_cold", "%u", lru_size_map[2]);
  799             if (settings.temp_lru) {
  800                 APPEND_NUM_FMT_STAT(fmt, n, "number_temp", "%u", lru_size_map[3]);
  801             }
  802             APPEND_NUM_FMT_STAT(fmt, n, "age_hot", "%u", age_hot);
  803             APPEND_NUM_FMT_STAT(fmt, n, "age_warm", "%u", age_warm);
  804         }
  805         APPEND_NUM_FMT_STAT(fmt, n, "age", "%u", age);
  806         APPEND_NUM_FMT_STAT(fmt, n, "mem_requested", "%llu", (unsigned long long)totals.mem_requested);
  807         APPEND_NUM_FMT_STAT(fmt, n, "evicted",
  808                             "%llu", (unsigned long long)totals.evicted);
  809         APPEND_NUM_FMT_STAT(fmt, n, "evicted_nonzero",
  810                             "%llu", (unsigned long long)totals.evicted_nonzero);
  811         APPEND_NUM_FMT_STAT(fmt, n, "evicted_time",
  812                             "%u", totals.evicted_time);
  813         APPEND_NUM_FMT_STAT(fmt, n, "outofmemory",
  814                             "%llu", (unsigned long long)totals.outofmemory);
  815         APPEND_NUM_FMT_STAT(fmt, n, "tailrepairs",
  816                             "%llu", (unsigned long long)totals.tailrepairs);
  817         APPEND_NUM_FMT_STAT(fmt, n, "reclaimed",
  818                             "%llu", (unsigned long long)totals.reclaimed);
  819         APPEND_NUM_FMT_STAT(fmt, n, "expired_unfetched",
  820                             "%llu", (unsigned long long)totals.expired_unfetched);
  821         APPEND_NUM_FMT_STAT(fmt, n, "evicted_unfetched",
  822                             "%llu", (unsigned long long)totals.evicted_unfetched);
  823         if (settings.lru_maintainer_thread) {
  824             APPEND_NUM_FMT_STAT(fmt, n, "evicted_active",
  825                                 "%llu", (unsigned long long)totals.evicted_active);
  826         }
  827         APPEND_NUM_FMT_STAT(fmt, n, "crawler_reclaimed",
  828                             "%llu", (unsigned long long)totals.crawler_reclaimed);
  829         APPEND_NUM_FMT_STAT(fmt, n, "crawler_items_checked",
  830                             "%llu", (unsigned long long)totals.crawler_items_checked);
  831         APPEND_NUM_FMT_STAT(fmt, n, "lrutail_reflocked",
  832                             "%llu", (unsigned long long)totals.lrutail_reflocked);
  833         if (settings.lru_maintainer_thread) {
  834             APPEND_NUM_FMT_STAT(fmt, n, "moves_to_cold",
  835                                 "%llu", (unsigned long long)totals.moves_to_cold);
  836             APPEND_NUM_FMT_STAT(fmt, n, "moves_to_warm",
  837                                 "%llu", (unsigned long long)totals.moves_to_warm);
  838             APPEND_NUM_FMT_STAT(fmt, n, "moves_within_lru",
  839                                 "%llu", (unsigned long long)totals.moves_within_lru);
  840             APPEND_NUM_FMT_STAT(fmt, n, "direct_reclaims",
  841                                 "%llu", (unsigned long long)totals.direct_reclaims);
  842             APPEND_NUM_FMT_STAT(fmt, n, "hits_to_hot",
  843                                 "%llu", (unsigned long long)totals.hits_to_hot);
  844 
  845             APPEND_NUM_FMT_STAT(fmt, n, "hits_to_warm",
  846                                 "%llu", (unsigned long long)totals.hits_to_warm);
  847 
  848             APPEND_NUM_FMT_STAT(fmt, n, "hits_to_cold",
  849                                 "%llu", (unsigned long long)totals.hits_to_cold);
  850 
  851             APPEND_NUM_FMT_STAT(fmt, n, "hits_to_temp",
  852                                 "%llu", (unsigned long long)totals.hits_to_temp);
  853 
  854         }
  855     }
  856 
  857     /* getting here means both ascii and binary terminators fit */
  858     add_stats(NULL, 0, NULL, 0, c);
  859 }
  860 
  861 bool item_stats_sizes_status(void) {
  862     bool ret = false;
  863     mutex_lock(&stats_sizes_lock);
  864     if (stats_sizes_hist != NULL)
  865         ret = true;
  866     mutex_unlock(&stats_sizes_lock);
  867     return ret;
  868 }
  869 
  870 void item_stats_sizes_init(void) {
  871     if (stats_sizes_hist != NULL)
  872         return;
  873     stats_sizes_buckets = settings.item_size_max / 32 + 1;
  874     stats_sizes_hist = calloc(stats_sizes_buckets, sizeof(int));
  875     stats_sizes_cas_min = (settings.use_cas) ? get_cas_id() : 0;
  876 }
  877 
  878 void item_stats_sizes_enable(ADD_STAT add_stats, void *c) {
  879     mutex_lock(&stats_sizes_lock);
  880     if (!settings.use_cas) {
  881         APPEND_STAT("sizes_status", "error", "");
  882         APPEND_STAT("sizes_error", "cas_support_disabled", "");
  883     } else if (stats_sizes_hist == NULL) {
  884         item_stats_sizes_init();
  885         if (stats_sizes_hist != NULL) {
  886             APPEND_STAT("sizes_status", "enabled", "");
  887         } else {
  888             APPEND_STAT("sizes_status", "error", "");
  889             APPEND_STAT("sizes_error", "no_memory", "");
  890         }
  891     } else {
  892         APPEND_STAT("sizes_status", "enabled", "");
  893     }
  894     mutex_unlock(&stats_sizes_lock);
  895 }
  896 
  897 void item_stats_sizes_disable(ADD_STAT add_stats, void *c) {
  898     mutex_lock(&stats_sizes_lock);
  899     if (stats_sizes_hist != NULL) {
  900         free(stats_sizes_hist);
  901         stats_sizes_hist = NULL;
  902     }
  903     APPEND_STAT("sizes_status", "disabled", "");
  904     mutex_unlock(&stats_sizes_lock);
  905 }
  906 
  907 void item_stats_sizes_add(item *it) {
  908     if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
  909         return;
  910     int ntotal = ITEM_ntotal(it);
  911     int bucket = ntotal / 32;
  912     if ((ntotal % 32) != 0) bucket++;
  913     if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]++;
  914 }
  915 
  916 /* I think there's no way for this to be accurate without using the CAS value.
  917  * Since items getting their time value bumped will pass this validation.
  918  */
  919 void item_stats_sizes_remove(item *it) {
  920     if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
  921         return;
  922     int ntotal = ITEM_ntotal(it);
  923     int bucket = ntotal / 32;
  924     if ((ntotal % 32) != 0) bucket++;
  925     if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]--;
  926 }
  927 
  928 /** dumps out a list of objects of each size, with granularity of 32 bytes */
  929 /*@null@*/
  930 /* Locks are correct based on a technicality. Holds LRU lock while doing the
  931  * work, so items can't go invalid, and it's only looking at header sizes
  932  * which don't change.
  933  */
  934 void item_stats_sizes(ADD_STAT add_stats, void *c) {
  935     mutex_lock(&stats_sizes_lock);
  936 
  937     if (stats_sizes_hist != NULL) {
  938         int i;
  939         for (i = 0; i < stats_sizes_buckets; i++) {
  940             if (stats_sizes_hist[i] != 0) {
  941                 char key[12];
  942                 snprintf(key, sizeof(key), "%d", i * 32);
  943                 APPEND_STAT(key, "%u", stats_sizes_hist[i]);
  944             }
  945         }
  946     } else {
  947         APPEND_STAT("sizes_status", "disabled", "");
  948     }
  949 
  950     add_stats(NULL, 0, NULL, 0, c);
  951     mutex_unlock(&stats_sizes_lock);
  952 }
  953 
  954 /** wrapper around assoc_find which does the lazy expiration logic */
  955 item *do_item_get(const char *key, const size_t nkey, const uint32_t hv, conn *c, const bool do_update) {
  956     item *it = assoc_find(key, nkey, hv);
  957     if (it != NULL) {
  958         refcount_incr(it);
  959         /* Optimization for slab reassignment. prevents popular items from
  960          * jamming in busy wait. Can only do this here to satisfy lock order
  961          * of item_lock, slabs_lock. */
  962         /* This was made unsafe by removal of the cache_lock:
  963          * slab_rebalance_signal and slab_rebal.* are modified in a separate
  964          * thread under slabs_lock. If slab_rebalance_signal = 1, slab_start =
  965          * NULL (0), but slab_end is still equal to some value, this would end
  966          * up unlinking every item fetched.
  967          * This is either an acceptable loss, or if slab_rebalance_signal is
  968          * true, slab_start/slab_end should be put behind the slabs_lock.
  969          * Which would cause a huge potential slowdown.
  970          * Could also use a specific lock for slab_rebal.* and
  971          * slab_rebalance_signal (shorter lock?)
  972          */
  973         /*if (slab_rebalance_signal &&
  974             ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
  975             do_item_unlink(it, hv);
  976             do_item_remove(it);
  977             it = NULL;
  978         }*/
  979     }
  980     int was_found = 0;
  981 
  982     if (settings.verbose > 2) {
  983         int ii;
  984         if (it == NULL) {
  985             fprintf(stderr, "> NOT FOUND ");
  986         } else {
  987             fprintf(stderr, "> FOUND KEY ");
  988         }
  989         for (ii = 0; ii < nkey; ++ii) {
  990             fprintf(stderr, "%c", key[ii]);
  991         }
  992     }
  993 
  994     if (it != NULL) {
  995         was_found = 1;
  996         if (item_is_flushed(it)) {
  997             do_item_unlink(it, hv);
  998             STORAGE_delete(c->thread->storage, it);
  999             do_item_remove(it);
 1000             it = NULL;
 1001             pthread_mutex_lock(&c->thread->stats.mutex);
 1002             c->thread->stats.get_flushed++;
 1003             pthread_mutex_unlock(&c->thread->stats.mutex);
 1004             if (settings.verbose > 2) {
 1005                 fprintf(stderr, " -nuked by flush");
 1006             }
 1007             was_found = 2;
 1008         } else if (it->exptime != 0 && it->exptime <= current_time) {
 1009             do_item_unlink(it, hv);
 1010             STORAGE_delete(c->thread->storage, it);
 1011             do_item_remove(it);
 1012             it = NULL;
 1013             pthread_mutex_lock(&c->thread->stats.mutex);
 1014             c->thread->stats.get_expired++;
 1015             pthread_mutex_unlock(&c->thread->stats.mutex);
 1016             if (settings.verbose > 2) {
 1017                 fprintf(stderr, " -nuked by expire");
 1018             }
 1019             was_found = 3;
 1020         } else {
 1021             if (do_update) {
 1022                 do_item_bump(c, it, hv);
 1023             }
 1024             DEBUG_REFCNT(it, '+');
 1025         }
 1026     }
 1027 
 1028     if (settings.verbose > 2)
 1029         fprintf(stderr, "\n");
 1030     /* For now this is in addition to the above verbose logging. */
 1031     LOGGER_LOG(c->thread->l, LOG_FETCHERS, LOGGER_ITEM_GET, NULL, was_found, key, nkey,
 1032                (it) ? ITEM_clsid(it) : 0, c->sfd);
 1033 
 1034     return it;
 1035 }
 1036 
 1037 // Requires lock held for item.
 1038 // Split out of do_item_get() to allow mget functions to look through header
 1039 // data before losing state modified via the bump function.
 1040 void do_item_bump(conn *c, item *it, const uint32_t hv) {
 1041     /* We update the hit markers only during fetches.
 1042      * An item needs to be hit twice overall to be considered
 1043      * ACTIVE, but only needs a single hit to maintain activity
 1044      * afterward.
 1045      * FETCHED tells if an item has ever been active.
 1046      */
 1047     if (settings.lru_segmented) {
 1048         if ((it->it_flags & ITEM_ACTIVE) == 0) {
 1049             if ((it->it_flags & ITEM_FETCHED) == 0) {
 1050                 it->it_flags |= ITEM_FETCHED;
 1051             } else {
 1052                 it->it_flags |= ITEM_ACTIVE;
 1053                 if (ITEM_lruid(it) != COLD_LRU) {
 1054                     it->time = current_time; // only need to bump time.
 1055                 } else if (!lru_bump_async(c->thread->lru_bump_buf, it, hv)) {
 1056                     // add flag before async bump to avoid race.
 1057                     it->it_flags &= ~ITEM_ACTIVE;
 1058                 }
 1059             }
 1060         }
 1061     } else {
 1062         it->it_flags |= ITEM_FETCHED;
 1063         do_item_update(it);
 1064     }
 1065 }
 1066 
 1067 item *do_item_touch(const char *key, size_t nkey, uint32_t exptime,
 1068                     const uint32_t hv, conn *c) {
 1069     item *it = do_item_get(key, nkey, hv, c, DO_UPDATE);
 1070     if (it != NULL) {
 1071         it->exptime = exptime;
 1072     }
 1073     return it;
 1074 }
 1075 
 1076 /*** LRU MAINTENANCE THREAD ***/
 1077 
 1078 /* Returns number of items remove, expired, or evicted.
 1079  * Callable from worker threads or the LRU maintainer thread */
 1080 int lru_pull_tail(const int orig_id, const int cur_lru,
 1081         const uint64_t total_bytes, const uint8_t flags, const rel_time_t max_age,
 1082         struct lru_pull_tail_return *ret_it) {
 1083     item *it = NULL;
 1084     int id = orig_id;
 1085     int removed = 0;
 1086     if (id == 0)
 1087         return 0;
 1088 
 1089     int tries = 5;
 1090     item *search;
 1091     item *next_it;
 1092     void *hold_lock = NULL;
 1093     unsigned int move_to_lru = 0;
 1094     uint64_t limit = 0;
 1095 
 1096     id |= cur_lru;
 1097     pthread_mutex_lock(&lru_locks[id]);
 1098     search = tails[id];
 1099     /* We walk up *only* for locked items, and if bottom is expired. */
 1100     for (; tries > 0 && search != NULL; tries--, search=next_it) {
 1101         /* we might relink search mid-loop, so search->prev isn't reliable */
 1102         next_it = search->prev;
 1103         if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
 1104             /* We are a crawler, ignore it. */
 1105             if (flags & LRU_PULL_CRAWL_BLOCKS) {
 1106                 pthread_mutex_unlock(&lru_locks[id]);
 1107                 return 0;
 1108             }
 1109             tries++;
 1110             continue;
 1111         }
 1112         uint32_t hv = hash(ITEM_key(search), search->nkey);
 1113         /* Attempt to hash item lock the "search" item. If locked, no
 1114          * other callers can incr the refcount. Also skip ourselves. */
 1115         if ((hold_lock = item_trylock(hv)) == NULL)
 1116             continue;
 1117         /* Now see if the item is refcount locked */
 1118         if (refcount_incr(search) != 2) {
 1119             /* Note pathological case with ref'ed items in tail.
 1120              * Can still unlink the item, but it won't be reusable yet */
 1121             itemstats[id].lrutail_reflocked++;
 1122             /* In case of refcount leaks, enable for quick workaround. */
 1123             /* WARNING: This can cause terrible corruption */
 1124             if (settings.tail_repair_time &&
 1125                     search->time + settings.tail_repair_time < current_time) {
 1126                 itemstats[id].tailrepairs++;
 1127                 search->refcount = 1;
 1128                 /* This will call item_remove -> item_free since refcnt is 1 */
 1129                 STORAGE_delete(ext_storage, search);
 1130                 do_item_unlink_nolock(search, hv);
 1131                 item_trylock_unlock(hold_lock);
 1132                 continue;
 1133             }
 1134         }
 1135 
 1136         /* Expired or flushed */
 1137         if ((search->exptime != 0 && search->exptime < current_time)
 1138             || item_is_flushed(search)) {
 1139             itemstats[id].reclaimed++;
 1140             if ((search->it_flags & ITEM_FETCHED) == 0) {
 1141                 itemstats[id].expired_unfetched++;
 1142             }
 1143             /* refcnt 2 -> 1 */
 1144             do_item_unlink_nolock(search, hv);
 1145             STORAGE_delete(ext_storage, search);
 1146             /* refcnt 1 -> 0 -> item_free */
 1147             do_item_remove(search);
 1148             item_trylock_unlock(hold_lock);
 1149             removed++;
 1150 
 1151             /* If all we're finding are expired, can keep going */
 1152             continue;
 1153         }
 1154 
 1155         /* If we're HOT_LRU or WARM_LRU and over size limit, send to COLD_LRU.
 1156          * If we're COLD_LRU, send to WARM_LRU unless we need to evict
 1157          */
 1158         switch (cur_lru) {
 1159             case HOT_LRU:
 1160                 limit = total_bytes * settings.hot_lru_pct / 100;
 1161             case WARM_LRU:
 1162                 if (limit == 0)
 1163                     limit = total_bytes * settings.warm_lru_pct / 100;
 1164                 /* Rescue ACTIVE items aggressively */
 1165                 if ((search->it_flags & ITEM_ACTIVE) != 0) {
 1166                     search->it_flags &= ~ITEM_ACTIVE;
 1167                     removed++;
 1168                     if (cur_lru == WARM_LRU) {
 1169                         itemstats[id].moves_within_lru++;
 1170                         do_item_unlink_q(search);
 1171                         do_item_link_q(search);
 1172                         do_item_remove(search);
 1173                         item_trylock_unlock(hold_lock);
 1174                     } else {
 1175                         /* Active HOT_LRU items flow to WARM */
 1176                         itemstats[id].moves_to_warm++;
 1177                         move_to_lru = WARM_LRU;
 1178                         do_item_unlink_q(search);
 1179                         it = search;
 1180                     }
 1181                 } else if (sizes_bytes[id] > limit ||
 1182                            current_time - search->time > max_age) {
 1183                     itemstats[id].moves_to_cold++;
 1184                     move_to_lru = COLD_LRU;
 1185                     do_item_unlink_q(search);
 1186                     it = search;
 1187                     removed++;
 1188                     break;
 1189                 } else {
 1190                     /* Don't want to move to COLD, not active, bail out */
 1191                     it = search;
 1192                 }
 1193                 break;
 1194             case COLD_LRU:
 1195                 it = search; /* No matter what, we're stopping */
 1196                 if (flags & LRU_PULL_EVICT) {
 1197                     if (settings.evict_to_free == 0) {
 1198                         /* Don't think we need a counter for this. It'll OOM.  */
 1199                         break;
 1200                     }
 1201                     itemstats[id].evicted++;
 1202                     itemstats[id].evicted_time = current_time - search->time;
 1203                     if (search->exptime != 0)
 1204                         itemstats[id].evicted_nonzero++;
 1205                     if ((search->it_flags & ITEM_FETCHED) == 0) {
 1206                         itemstats[id].evicted_unfetched++;
 1207                     }
 1208                     if ((search->it_flags & ITEM_ACTIVE)) {
 1209                         itemstats[id].evicted_active++;
 1210                     }
 1211                     LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EVICTION, search);
 1212                     STORAGE_delete(ext_storage, search);
 1213                     do_item_unlink_nolock(search, hv);
 1214                     removed++;
 1215                     if (settings.slab_automove == 2) {
 1216                         slabs_reassign(-1, orig_id);
 1217                     }
 1218                 } else if (flags & LRU_PULL_RETURN_ITEM) {
 1219                     /* Keep a reference to this item and return it. */
 1220                     ret_it->it = it;
 1221                     ret_it->hv = hv;
 1222                 } else if ((search->it_flags & ITEM_ACTIVE) != 0
 1223                         && settings.lru_segmented) {
 1224                     itemstats[id].moves_to_warm++;
 1225                     search->it_flags &= ~ITEM_ACTIVE;
 1226                     move_to_lru = WARM_LRU;
 1227                     do_item_unlink_q(search);
 1228                     removed++;
 1229                 }
 1230                 break;
 1231             case TEMP_LRU:
 1232                 it = search; /* Kill the loop. Parent only interested in reclaims */
 1233                 break;
 1234         }
 1235         if (it != NULL)
 1236             break;
 1237     }
 1238 
 1239     pthread_mutex_unlock(&lru_locks[id]);
 1240 
 1241     if (it != NULL) {
 1242         if (move_to_lru) {
 1243             it->slabs_clsid = ITEM_clsid(it);
 1244             it->slabs_clsid |= move_to_lru;
 1245             item_link_q(it);
 1246         }
 1247         if ((flags & LRU_PULL_RETURN_ITEM) == 0) {
 1248             do_item_remove(it);
 1249             item_trylock_unlock(hold_lock);
 1250         }
 1251     }
 1252 
 1253     return removed;
 1254 }
 1255 
 1256 
 1257 /* TODO: Third place this code needs to be deduped */
 1258 static void lru_bump_buf_link_q(lru_bump_buf *b) {
 1259     pthread_mutex_lock(&bump_buf_lock);
 1260     assert(b != bump_buf_head);
 1261 
 1262     b->prev = 0;
 1263     b->next = bump_buf_head;
 1264     if (b->next) b->next->prev = b;
 1265     bump_buf_head = b;
 1266     if (bump_buf_tail == 0) bump_buf_tail = b;
 1267     pthread_mutex_unlock(&bump_buf_lock);
 1268     return;
 1269 }
 1270 
 1271 void *item_lru_bump_buf_create(void) {
 1272     lru_bump_buf *b = calloc(1, sizeof(lru_bump_buf));
 1273     if (b == NULL) {
 1274         return NULL;
 1275     }
 1276 
 1277     b->buf = bipbuf_new(sizeof(lru_bump_entry) * LRU_BUMP_BUF_SIZE);
 1278     if (b->buf == NULL) {
 1279         free(b);
 1280         return NULL;
 1281     }
 1282 
 1283     pthread_mutex_init(&b->mutex, NULL);
 1284 
 1285     lru_bump_buf_link_q(b);
 1286     return b;
 1287 }
 1288 
 1289 static bool lru_bump_async(lru_bump_buf *b, item *it, uint32_t hv) {
 1290     bool ret = true;
 1291     refcount_incr(it);
 1292     pthread_mutex_lock(&b->mutex);
 1293     lru_bump_entry *be = (lru_bump_entry *) bipbuf_request(b->buf, sizeof(lru_bump_entry));
 1294     if (be != NULL) {
 1295         be->it = it;
 1296         be->hv = hv;
 1297         if (bipbuf_push(b->buf, sizeof(lru_bump_entry)) == 0) {
 1298             ret = false;
 1299             b->dropped++;
 1300         }
 1301     } else {
 1302         ret = false;
 1303         b->dropped++;
 1304     }
 1305     if (!ret) {
 1306         refcount_decr(it);
 1307     }
 1308     pthread_mutex_unlock(&b->mutex);
 1309     return ret;
 1310 }
 1311 
 1312 /* TODO: Might be worth a micro-optimization of having bump buffers link
 1313  * themselves back into the central queue when queue goes from zero to
 1314  * non-zero, then remove from list if zero more than N times.
 1315  * If very few hits on cold this would avoid extra memory barriers from LRU
 1316  * maintainer thread. If many hits, they'll just stay in the list.
 1317  */
 1318 static bool lru_maintainer_bumps(void) {
 1319     lru_bump_buf *b;
 1320     lru_bump_entry *be;
 1321     unsigned int size;
 1322     unsigned int todo;
 1323     bool bumped = false;
 1324     pthread_mutex_lock(&bump_buf_lock);
 1325     for (b = bump_buf_head; b != NULL; b=b->next) {
 1326         pthread_mutex_lock(&b->mutex);
 1327         be = (lru_bump_entry *) bipbuf_peek_all(b->buf, &size);
 1328         pthread_mutex_unlock(&b->mutex);
 1329 
 1330         if (be == NULL) {
 1331             continue;
 1332         }
 1333         todo = size;
 1334         bumped = true;
 1335 
 1336         while (todo) {
 1337             item_lock(be->hv);
 1338             do_item_update(be->it);
 1339             do_item_remove(be->it);
 1340             item_unlock(be->hv);
 1341             be++;
 1342             todo -= sizeof(lru_bump_entry);
 1343         }
 1344 
 1345         pthread_mutex_lock(&b->mutex);
 1346         be = (lru_bump_entry *) bipbuf_poll(b->buf, size);
 1347         pthread_mutex_unlock(&b->mutex);
 1348     }
 1349     pthread_mutex_unlock(&bump_buf_lock);
 1350     return bumped;
 1351 }
 1352 
 1353 static uint64_t lru_total_bumps_dropped(void) {
 1354     uint64_t total = 0;
 1355     lru_bump_buf *b;
 1356     pthread_mutex_lock(&bump_buf_lock);
 1357     for (b = bump_buf_head; b != NULL; b=b->next) {
 1358         pthread_mutex_lock(&b->mutex);
 1359         total += b->dropped;
 1360         pthread_mutex_unlock(&b->mutex);
 1361     }
 1362     pthread_mutex_unlock(&bump_buf_lock);
 1363     return total;
 1364 }
 1365 
 1366 /* Loop up to N times:
 1367  * If too many items are in HOT_LRU, push to COLD_LRU
 1368  * If too many items are in WARM_LRU, push to COLD_LRU
 1369  * If too many items are in COLD_LRU, poke COLD_LRU tail
 1370  * 1000 loops with 1ms min sleep gives us under 1m items shifted/sec. The
 1371  * locks can't handle much more than that. Leaving a TODO for how to
 1372  * autoadjust in the future.
 1373  */
 1374 static int lru_maintainer_juggle(const int slabs_clsid) {
 1375     int i;
 1376     int did_moves = 0;
 1377     uint64_t total_bytes = 0;
 1378     unsigned int chunks_perslab = 0;
 1379     //unsigned int chunks_free = 0;
 1380     /* TODO: if free_chunks below high watermark, increase aggressiveness */
 1381     slabs_available_chunks(slabs_clsid, NULL,
 1382             &chunks_perslab);
 1383     if (settings.temp_lru) {
 1384         /* Only looking for reclaims. Run before we size the LRU. */
 1385         for (i = 0; i < 500; i++) {
 1386             if (lru_pull_tail(slabs_clsid, TEMP_LRU, 0, 0, 0, NULL) <= 0) {
 1387                 break;
 1388             } else {
 1389                 did_moves++;
 1390             }
 1391         }
 1392     }
 1393 
 1394     rel_time_t cold_age = 0;
 1395     rel_time_t hot_age = 0;
 1396     rel_time_t warm_age = 0;
 1397     /* If LRU is in flat mode, force items to drain into COLD via max age of 0 */
 1398     if (settings.lru_segmented) {
 1399         pthread_mutex_lock(&lru_locks[slabs_clsid|COLD_LRU]);
 1400         if (tails[slabs_clsid|COLD_LRU]) {
 1401             cold_age = current_time - tails[slabs_clsid|COLD_LRU]->time;
 1402         }
 1403         // Also build up total_bytes for the classes.
 1404         total_bytes += sizes_bytes[slabs_clsid|COLD_LRU];
 1405         pthread_mutex_unlock(&lru_locks[slabs_clsid|COLD_LRU]);
 1406 
 1407         hot_age = cold_age * settings.hot_max_factor;
 1408         warm_age = cold_age * settings.warm_max_factor;
 1409 
 1410         // total_bytes doesn't have to be exact. cache it for the juggles.
 1411         pthread_mutex_lock(&lru_locks[slabs_clsid|HOT_LRU]);
 1412         total_bytes += sizes_bytes[slabs_clsid|HOT_LRU];
 1413         pthread_mutex_unlock(&lru_locks[slabs_clsid|HOT_LRU]);
 1414 
 1415         pthread_mutex_lock(&lru_locks[slabs_clsid|WARM_LRU]);
 1416         total_bytes += sizes_bytes[slabs_clsid|WARM_LRU];
 1417         pthread_mutex_unlock(&lru_locks[slabs_clsid|WARM_LRU]);
 1418     }
 1419 
 1420     /* Juggle HOT/WARM up to N times */
 1421     for (i = 0; i < 500; i++) {
 1422         int do_more = 0;
 1423         if (lru_pull_tail(slabs_clsid, HOT_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS, hot_age, NULL) ||
 1424             lru_pull_tail(slabs_clsid, WARM_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS, warm_age, NULL)) {
 1425             do_more++;
 1426         }
 1427         if (settings.lru_segmented) {
 1428             do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_bytes, LRU_PULL_CRAWL_BLOCKS, 0, NULL);
 1429         }
 1430         if (do_more == 0)
 1431             break;
 1432         did_moves++;
 1433     }
 1434     return did_moves;
 1435 }
 1436 
 1437 /* Will crawl all slab classes a minimum of once per hour */
 1438 #define MAX_MAINTCRAWL_WAIT 60 * 60
 1439 
 1440 /* Hoping user input will improve this function. This is all a wild guess.
 1441  * Operation: Kicks crawler for each slab id. Crawlers take some statistics as
 1442  * to items with nonzero expirations. It then buckets how many items will
 1443  * expire per minute for the next hour.
 1444  * This function checks the results of a run, and if it things more than 1% of
 1445  * expirable objects are ready to go, kick the crawler again to reap.
 1446  * It will also kick the crawler once per minute regardless, waiting a minute
 1447  * longer for each time it has no work to do, up to an hour wait time.
 1448  * The latter is to avoid newly started daemons from waiting too long before
 1449  * retrying a crawl.
 1450  */
 1451 static void lru_maintainer_crawler_check(struct crawler_expired_data *cdata, logger *l) {
 1452     int i;
 1453     static rel_time_t next_crawls[POWER_LARGEST];
 1454     static rel_time_t next_crawl_wait[POWER_LARGEST];
 1455     uint8_t todo[POWER_LARGEST];
 1456     memset(todo, 0, sizeof(uint8_t) * POWER_LARGEST);
 1457     bool do_run = false;
 1458     unsigned int tocrawl_limit = 0;
 1459 
 1460     // TODO: If not segmented LRU, skip non-cold
 1461     for (i = POWER_SMALLEST; i < POWER_LARGEST; i++) {
 1462         crawlerstats_t *s = &cdata->crawlerstats[i];
 1463         /* We've not successfully kicked off a crawl yet. */
 1464         if (s->run_complete) {
 1465             char *lru_name = "na";
 1466             pthread_mutex_lock(&cdata->lock);
 1467             int x;
 1468             /* Should we crawl again? */
 1469             uint64_t possible_reclaims = s->seen - s->noexp;
 1470             uint64_t available_reclaims = 0;
 1471             /* Need to think we can free at least 1% of the items before
 1472              * crawling. */
 1473             /* FIXME: Configurable? */
 1474             uint64_t low_watermark = (possible_reclaims / 100) + 1;
 1475             rel_time_t since_run = current_time - s->end_time;
 1476             /* Don't bother if the payoff is too low. */
 1477             for (x = 0; x < 60; x++) {
 1478                 available_reclaims += s->histo[x];
 1479                 if (available_reclaims > low_watermark) {
 1480                     if (next_crawl_wait[i] < (x * 60)) {
 1481                         next_crawl_wait[i] += 60;
 1482                     } else if (next_crawl_wait[i] >= 60) {
 1483                         next_crawl_wait[i] -= 60;
 1484                     }
 1485                     break;
 1486                 }
 1487             }
 1488 
 1489             if (available_reclaims == 0) {
 1490                 next_crawl_wait[i] += 60;
 1491             }
 1492 
 1493             if (next_crawl_wait[i] > MAX_MAINTCRAWL_WAIT) {
 1494                 next_crawl_wait[i] = MAX_MAINTCRAWL_WAIT;
 1495             }
 1496 
 1497             next_crawls[i] = current_time + next_crawl_wait[i] + 5;
 1498             switch (GET_LRU(i)) {
 1499                 case HOT_LRU:
 1500                     lru_name = "hot";
 1501                     break;
 1502                 case WARM_LRU:
 1503                     lru_name = "warm";
 1504                     break;
 1505                 case COLD_LRU:
 1506                     lru_name = "cold";
 1507                     break;
 1508                 case TEMP_LRU:
 1509                     lru_name = "temp";
 1510                     break;
 1511             }
 1512             LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_CRAWLER_STATUS, NULL,
 1513                     CLEAR_LRU(i),
 1514                     lru_name,
 1515                     (unsigned long long)low_watermark,
 1516                     (unsigned long long)available_reclaims,
 1517                     (unsigned int)since_run,
 1518                     next_crawls[i] - current_time,
 1519                     s->end_time - s->start_time,
 1520                     s->seen,
 1521                     s->reclaimed);
 1522             // Got our calculation, avoid running until next actual run.
 1523             s->run_complete = false;
 1524             pthread_mutex_unlock(&cdata->lock);
 1525         }
 1526         if (current_time > next_crawls[i]) {
 1527             pthread_mutex_lock(&lru_locks[i]);
 1528             if (sizes[i] > tocrawl_limit) {
 1529                 tocrawl_limit = sizes[i];
 1530             }
 1531             pthread_mutex_unlock(&lru_locks[i]);
 1532             todo[i] = 1;
 1533             do_run = true;
 1534             next_crawls[i] = current_time + 5; // minimum retry wait.
 1535         }
 1536     }
 1537     if (do_run) {
 1538         if (settings.lru_crawler_tocrawl && settings.lru_crawler_tocrawl < tocrawl_limit) {
 1539             tocrawl_limit = settings.lru_crawler_tocrawl;
 1540         }
 1541         lru_crawler_start(todo, tocrawl_limit, CRAWLER_AUTOEXPIRE, cdata, NULL, 0);
 1542     }
 1543 }
 1544 
 1545 slab_automove_reg_t slab_automove_default = {
 1546     .init = slab_automove_init,
 1547     .free = slab_automove_free,
 1548     .run = slab_automove_run
 1549 };
 1550 #ifdef EXTSTORE
 1551 slab_automove_reg_t slab_automove_extstore = {
 1552     .init = slab_automove_extstore_init,
 1553     .free = slab_automove_extstore_free,
 1554     .run = slab_automove_extstore_run
 1555 };
 1556 #endif
 1557 static pthread_t lru_maintainer_tid;
 1558 
 1559 #define MAX_LRU_MAINTAINER_SLEEP 1000000
 1560 #define MIN_LRU_MAINTAINER_SLEEP 1000
 1561 
 1562 static void *lru_maintainer_thread(void *arg) {
 1563     slab_automove_reg_t *sam = &slab_automove_default;
 1564 #ifdef EXTSTORE
 1565     void *storage = arg;
 1566     if (storage != NULL)
 1567         sam = &slab_automove_extstore;
 1568 #endif
 1569     int i;
 1570     useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;
 1571     useconds_t last_sleep = MIN_LRU_MAINTAINER_SLEEP;
 1572     rel_time_t last_crawler_check = 0;
 1573     rel_time_t last_automove_check = 0;
 1574     useconds_t next_juggles[MAX_NUMBER_OF_SLAB_CLASSES] = {0};
 1575     useconds_t backoff_juggles[MAX_NUMBER_OF_SLAB_CLASSES] = {0};
 1576     struct crawler_expired_data *cdata =
 1577         calloc(1, sizeof(struct crawler_expired_data));
 1578     if (cdata == NULL) {
 1579         fprintf(stderr, "Failed to allocate crawler data for LRU maintainer thread\n");
 1580         abort();
 1581     }
 1582     pthread_mutex_init(&cdata->lock, NULL);
 1583     cdata->crawl_complete = true; // kick off the crawler.
 1584     logger *l = logger_create();
 1585     if (l == NULL) {
 1586         fprintf(stderr, "Failed to allocate logger for LRU maintainer thread\n");
 1587         abort();
 1588     }
 1589 
 1590     double last_ratio = settings.slab_automove_ratio;
 1591     void *am = sam->init(&settings);
 1592 
 1593     pthread_mutex_lock(&lru_maintainer_lock);
 1594     if (settings.verbose > 2)
 1595         fprintf(stderr, "Starting LRU maintainer background thread\n");
 1596     while (do_run_lru_maintainer_thread) {
 1597         pthread_mutex_unlock(&lru_maintainer_lock);
 1598         if (to_sleep)
 1599             usleep(to_sleep);
 1600         pthread_mutex_lock(&lru_maintainer_lock);
 1601         /* A sleep of zero counts as a minimum of a 1ms wait */
 1602         last_sleep = to_sleep > 1000 ? to_sleep : 1000;
 1603         to_sleep = MAX_LRU_MAINTAINER_SLEEP;
 1604 
 1605         STATS_LOCK();
 1606         stats.lru_maintainer_juggles++;
 1607         STATS_UNLOCK();
 1608 
 1609         /* Each slab class gets its own sleep to avoid hammering locks */
 1610         for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
 1611             next_juggles[i] = next_juggles[i] > last_sleep ? next_juggles[i] - last_sleep : 0;
 1612 
 1613             if (next_juggles[i] > 0) {
 1614                 // Sleep the thread just for the minimum amount (or not at all)
 1615                 if (next_juggles[i] < to_sleep)
 1616                     to_sleep = next_juggles[i];
 1617                 continue;
 1618             }
 1619 
 1620             int did_moves = lru_maintainer_juggle(i);
 1621             if (did_moves == 0) {
 1622                 if (backoff_juggles[i] != 0) {
 1623                     backoff_juggles[i] += backoff_juggles[i] / 8;
 1624                 } else {
 1625                     backoff_juggles[i] = MIN_LRU_MAINTAINER_SLEEP;
 1626                 }
 1627                 if (backoff_juggles[i] > MAX_LRU_MAINTAINER_SLEEP)
 1628                     backoff_juggles[i] = MAX_LRU_MAINTAINER_SLEEP;
 1629             } else if (backoff_juggles[i] > 0) {
 1630                 backoff_juggles[i] /= 2;
 1631                 if (backoff_juggles[i] < MIN_LRU_MAINTAINER_SLEEP) {
 1632                     backoff_juggles[i] = 0;
 1633                 }
 1634             }
 1635             next_juggles[i] = backoff_juggles[i];
 1636             if (next_juggles[i] < to_sleep)
 1637                 to_sleep = next_juggles[i];
 1638         }
 1639 
 1640         /* Minimize the sleep if we had async LRU bumps to process */
 1641         if (settings.lru_segmented && lru_maintainer_bumps() && to_sleep > 1000) {
 1642             to_sleep = 1000;
 1643         }
 1644 
 1645         /* Once per second at most */
 1646         if (settings.lru_crawler && last_crawler_check != current_time) {
 1647             lru_maintainer_crawler_check(cdata, l);
 1648             last_crawler_check = current_time;
 1649         }
 1650 
 1651         if (settings.slab_automove == 1 && last_automove_check != current_time) {
 1652             if (last_ratio != settings.slab_automove_ratio) {
 1653                 sam->free(am);
 1654                 am = sam->init(&settings);
 1655                 last_ratio = settings.slab_automove_ratio;
 1656             }
 1657             int src, dst;
 1658             sam->run(am, &src, &dst);
 1659             if (src != -1 && dst != -1) {
 1660                 slabs_reassign(src, dst);
 1661                 LOGGER_LOG(l, LOG_SYSEVENTS, LOGGER_SLAB_MOVE, NULL,
 1662                         src, dst);
 1663             }
 1664             // dst == 0 means reclaim to global pool, be more aggressive
 1665             if (dst != 0) {
 1666                 last_automove_check = current_time;
 1667             } else if (dst == 0) {
 1668                 // also ensure we minimize the thread sleep
 1669                 to_sleep = 1000;
 1670             }
 1671         }
 1672     }
 1673     pthread_mutex_unlock(&lru_maintainer_lock);
 1674     sam->free(am);
 1675     // LRU crawler *must* be stopped.
 1676     free(cdata);
 1677     if (settings.verbose > 2)
 1678         fprintf(stderr, "LRU maintainer thread stopping\n");
 1679 
 1680     return NULL;
 1681 }
 1682 
 1683 int stop_lru_maintainer_thread(void) {
 1684     int ret;
 1685     pthread_mutex_lock(&lru_maintainer_lock);
 1686     /* LRU thread is a sleep loop, will die on its own */
 1687     do_run_lru_maintainer_thread = 0;
 1688     pthread_mutex_unlock(&lru_maintainer_lock);
 1689     if ((ret = pthread_join(lru_maintainer_tid, NULL)) != 0) {
 1690         fprintf(stderr, "Failed to stop LRU maintainer thread: %s\n", strerror(ret));
 1691         return -1;
 1692     }
 1693     settings.lru_maintainer_thread = false;
 1694     return 0;
 1695 }
 1696 
 1697 int start_lru_maintainer_thread(void *arg) {
 1698     int ret;
 1699 
 1700     pthread_mutex_lock(&lru_maintainer_lock);
 1701     do_run_lru_maintainer_thread = 1;
 1702     settings.lru_maintainer_thread = true;
 1703     if ((ret = pthread_create(&lru_maintainer_tid, NULL,
 1704         lru_maintainer_thread, arg)) != 0) {
 1705         fprintf(stderr, "Can't create LRU maintainer thread: %s\n",
 1706             strerror(ret));
 1707         pthread_mutex_unlock(&lru_maintainer_lock);
 1708         return -1;
 1709     }
 1710     pthread_mutex_unlock(&lru_maintainer_lock);
 1711 
 1712     return 0;
 1713 }
 1714 
 1715 /* If we hold this lock, crawler can't wake up or move */
 1716 void lru_maintainer_pause(void) {
 1717     pthread_mutex_lock(&lru_maintainer_lock);
 1718 }
 1719 
 1720 void lru_maintainer_resume(void) {
 1721     pthread_mutex_unlock(&lru_maintainer_lock);
 1722 }
 1723 
 1724 int init_lru_maintainer(void) {
 1725     lru_maintainer_initialized = 1;
 1726     return 0;
 1727 }
 1728 
 1729 /* Tail linkers and crawler for the LRU crawler. */
 1730 void do_item_linktail_q(item *it) { /* item is the new tail */
 1731     item **head, **tail;
 1732     assert(it->it_flags == 1);
 1733     assert(it->nbytes == 0);
 1734 
 1735     head = &heads[it->slabs_clsid];
 1736     tail = &tails[it->slabs_clsid];
 1737     //assert(*tail != 0);
 1738     assert(it != *tail);
 1739     assert((*head && *tail) || (*head == 0 && *tail == 0));
 1740     it->prev = *tail;
 1741     it->next = 0;
 1742     if (it->prev) {
 1743         assert(it->prev->next == 0);
 1744         it->prev->next = it;
 1745     }
 1746     *tail = it;
 1747     if (*head == 0) *head = it;
 1748     return;
 1749 }
 1750 
 1751 void do_item_unlinktail_q(item *it) {
 1752     item **head, **tail;
 1753     head = &heads[it->slabs_clsid];
 1754     tail = &tails[it->slabs_clsid];
 1755 
 1756     if (*head == it) {
 1757         assert(it->prev == 0);
 1758         *head = it->next;
 1759     }
 1760     if (*tail == it) {
 1761         assert(it->next == 0);
 1762         *tail = it->prev;
 1763     }
 1764     assert(it->next != it);
 1765     assert(it->prev != it);
 1766 
 1767     if (it->next) it->next->prev = it->prev;
 1768     if (it->prev) it->prev->next = it->next;
 1769     return;
 1770 }
 1771 
 1772 /* This is too convoluted, but it's a difficult shuffle. Try to rewrite it
 1773  * more clearly. */
 1774 item *do_item_crawl_q(item *it) {
 1775     item **head, **tail;
 1776     assert(it->it_flags == 1);
 1777     assert(it->nbytes == 0);
 1778     head = &heads[it->slabs_clsid];
 1779     tail = &tails[it->slabs_clsid];
 1780 
 1781     /* We've hit the head, pop off */
 1782     if (it->prev == 0) {
 1783         assert(*head == it);
 1784         if (it->next) {
 1785             *head = it->next;
 1786             assert(it->next->prev == it);
 1787             it->next->prev = 0;
 1788         }
 1789         return NULL; /* Done */
 1790     }
 1791 
 1792     /* Swing ourselves in front of the next item */
 1793     /* NB: If there is a prev, we can't be the head */
 1794     assert(it->prev != it);
 1795     if (it->prev) {
 1796         if (*head == it->prev) {
 1797             /* Prev was the head, now we're the head */
 1798             *head = it;
 1799         }
 1800         if (*tail == it) {
 1801             /* We are the tail, now they are the tail */
 1802             *tail = it->prev;
 1803         }
 1804         assert(it->next != it);
 1805         if (it->next) {
 1806             assert(it->prev->next == it);
 1807             it->prev->next = it->next;
 1808             it->next->prev = it->prev;
 1809         } else {
 1810             /* Tail. Move this above? */
 1811             it->prev->next = 0;
 1812         }
 1813         /* prev->prev's next is it->prev */
 1814         it->next = it->prev;
 1815         it->prev = it->next->prev;
 1816         it->next->prev = it;
 1817         /* New it->prev now, if we're not at the head. */
 1818         if (it->prev) {
 1819             it->prev->next = it;
 1820         }
 1821     }
 1822     assert(it->next != it);
 1823     assert(it->prev != it);
 1824 
 1825     return it->next; /* success */
 1826 }