"Fossies" - the Fresh Open Source Software Archive

Member "ponyc-0.33.2/src/libponyrt/mem/pool.c" (3 Feb 2020, 26657 Bytes) of package /linux/misc/ponyc-0.33.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pool.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.33.1_vs_0.33.2.

    1 #define PONY_WANT_ATOMIC_DEFS
    2 
    3 #include "pool.h"
    4 #include "alloc.h"
    5 #include "../ds/fun.h"
    6 #include "../sched/cpu.h"
    7 #include "ponyassert.h"
    8 #include <stdint.h>
    9 #include <stdbool.h>
   10 #include <stdlib.h>
   11 #include <stdio.h>
   12 #include <string.h>
   13 
   14 #include <platform.h>
   15 #include <pony/detail/atomics.h>
   16 
   17 #ifdef USE_VALGRIND
   18 #include <valgrind/valgrind.h>
   19 #include <valgrind/helgrind.h>
   20 #endif
   21 
   22 /// Allocations this size and above are aligned on this size. This is needed
   23 /// so that the pagemap for the heap is aligned.
   24 #define POOL_ALIGN_INDEX (POOL_ALIGN_BITS - POOL_MIN_BITS)
   25 #define POOL_ALIGN_MASK (POOL_ALIGN - 1)
   26 
   27 /// When we mmap, pull at least this many bytes.
   28 #ifdef PLATFORM_IS_ILP32
   29 #  define POOL_MMAP (16 * 1024 * 1024) // 16 MB
   30 #else
   31 #  ifdef PLATFORM_IS_WINDOWS
   32 #    define POOL_MMAP (16 * 1024 * 1024) // 16 MB
   33 #  else
   34 #    define POOL_MMAP (128 * 1024 * 1024) // 128 MB
   35 #  endif
   36 #endif
   37 
   38 /// An item on a per-size thread-local free list.
   39 typedef struct pool_item_t
   40 {
   41   struct pool_item_t* next;
   42 } pool_item_t;
   43 
   44 /// A per-size thread-local free list header.
   45 typedef struct pool_local_t
   46 {
   47   pool_item_t* pool;
   48   size_t length;
   49   char* start;
   50   char* end;
   51 } pool_local_t;
   52 
   53 /// A free list on a per-size global list of free lists.
   54 typedef struct pool_central_t
   55 {
   56   pool_item_t* next;
   57   uintptr_t length;
   58   struct pool_central_t* central;
   59 } pool_central_t;
   60 
   61 PONY_ABA_PROTECTED_PTR_DECLARE(pool_central_t)
   62 
   63 /// A per-size global list of free lists header.
   64 typedef struct pool_global_t
   65 {
   66   size_t size;
   67   size_t count;
   68 #ifdef PLATFORM_IS_X86
   69   PONY_ATOMIC_ABA_PROTECTED_PTR(pool_central_t) central;
   70 #else
   71   PONY_ATOMIC(pool_central_t*) central;
   72 #endif
   73 } pool_global_t;
   74 
   75 /// An item on an either thread-local or global list of free blocks.
   76 typedef struct pool_block_t
   77 {
   78   struct pool_block_t* prev;
   79   union
   80   {
   81     struct pool_block_t* next;
   82     PONY_ATOMIC(struct pool_block_t*) global;
   83   };
   84   size_t size;
   85   PONY_ATOMIC(bool) acquired;
   86 } pool_block_t;
   87 
   88 /// A thread local list of free blocks header.
   89 typedef struct pool_block_header_t
   90 {
   91   pool_block_t* head;
   92   size_t total_size;
   93   size_t largest_size;
   94 } pool_block_header_t;
   95 
   96 #ifdef PLATFORM_IS_X86
   97 #  define POOL_CENTRAL_INIT {{NULL, 0}}
   98 #else
   99 #  define POOL_CENTRAL_INIT NULL
  100 #endif
  101 
  102 static pool_global_t pool_global[POOL_COUNT] =
  103 {
  104   {POOL_MIN << 0, POOL_MAX / (POOL_MIN << 0), POOL_CENTRAL_INIT},
  105   {POOL_MIN << 1, POOL_MAX / (POOL_MIN << 1), POOL_CENTRAL_INIT},
  106   {POOL_MIN << 2, POOL_MAX / (POOL_MIN << 2), POOL_CENTRAL_INIT},
  107   {POOL_MIN << 3, POOL_MAX / (POOL_MIN << 3), POOL_CENTRAL_INIT},
  108   {POOL_MIN << 4, POOL_MAX / (POOL_MIN << 4), POOL_CENTRAL_INIT},
  109   {POOL_MIN << 5, POOL_MAX / (POOL_MIN << 5), POOL_CENTRAL_INIT},
  110   {POOL_MIN << 6, POOL_MAX / (POOL_MIN << 6), POOL_CENTRAL_INIT},
  111   {POOL_MIN << 7, POOL_MAX / (POOL_MIN << 7), POOL_CENTRAL_INIT},
  112   {POOL_MIN << 8, POOL_MAX / (POOL_MIN << 8), POOL_CENTRAL_INIT},
  113   {POOL_MIN << 9, POOL_MAX / (POOL_MIN << 9), POOL_CENTRAL_INIT},
  114   {POOL_MIN << 10, POOL_MAX / (POOL_MIN << 10), POOL_CENTRAL_INIT},
  115   {POOL_MIN << 11, POOL_MAX / (POOL_MIN << 11), POOL_CENTRAL_INIT},
  116   {POOL_MIN << 12, POOL_MAX / (POOL_MIN << 12), POOL_CENTRAL_INIT},
  117   {POOL_MIN << 13, POOL_MAX / (POOL_MIN << 13), POOL_CENTRAL_INIT},
  118   {POOL_MIN << 14, POOL_MAX / (POOL_MIN << 14), POOL_CENTRAL_INIT},
  119   {POOL_MIN << 15, POOL_MAX / (POOL_MIN << 15), POOL_CENTRAL_INIT},
  120 };
  121 
  122 static pool_block_t pool_block_global;
  123 static PONY_ATOMIC(size_t) in_pool_block_global;
  124 
  125 static __pony_thread_local pool_local_t pool_local[POOL_COUNT];
  126 static __pony_thread_local pool_block_header_t pool_block_header;
  127 
  128 #ifdef USE_POOLTRACK
  129 #include "../ds/stack.h"
  130 #include "../sched/cpu.h"
  131 
  132 #define POOL_TRACK_FREE ((void*)0)
  133 #define POOL_TRACK_ALLOC ((void*)1)
  134 #define POOL_TRACK_PUSH ((void*)2)
  135 #define POOL_TRACK_PULL ((void*)3)
  136 #define POOL_TRACK_PUSH_LIST ((void*)4)
  137 #define POOL_TRACK_PULL_LIST ((void*)5)
  138 #define POOL_TRACK_MAX_THREADS 64
  139 
  140 DECLARE_STACK(pool_track, pool_track_t, void);
  141 DEFINE_STACK(pool_track, pool_track_t, void);
  142 
  143 typedef struct
  144 {
  145   bool init;
  146   bool internal;
  147   int thread_id;
  148   pool_track_t* stack;
  149 } pool_track_info_t;
  150 
  151 static __pony_thread_local pool_track_info_t track;
  152 static PONY_ATOMIC(int) track_global_thread_id;
  153 static pool_track_info_t* track_global_info[POOL_TRACK_MAX_THREADS];
  154 
  155 static void pool_event_print(int thread, void* op, size_t event, size_t tsc,
  156   void* addr, size_t size)
  157 {
  158   if(op == POOL_TRACK_ALLOC)
  159     printf("%d ALLOC "__zu" ("__zu"): %p, "__zu"\n",
  160       thread, event, tsc, addr, size);
  161   else if(op == POOL_TRACK_FREE)
  162     printf("%d FREE "__zu" ("__zu"): %p, "__zu"\n",
  163       thread, event, tsc, addr, size);
  164   else if(op == POOL_TRACK_PUSH)
  165     printf("%d PUSH "__zu" ("__zu"): %p, "__zu"\n",
  166       thread, event, tsc, addr, size);
  167   else if(op == POOL_TRACK_PULL)
  168     printf("%d PULL "__zu" ("__zu"): %p, "__zu"\n",
  169       thread, event, tsc, addr, size);
  170   else if(op == POOL_TRACK_PUSH_LIST)
  171     printf("%d PUSH LIST "__zu" ("__zu"): "__zu", "__zu"\n",
  172       thread, event, tsc, (size_t)addr, size);
  173   else if(op == POOL_TRACK_PULL_LIST)
  174     printf("%d PULL LIST "__zu" ("__zu"): "__zu", "__zu"\n",
  175       thread, event, tsc, (size_t)addr, size);
  176 }
  177 
  178 static void pool_track(int thread_filter, void* addr_filter, int op_filter,
  179   size_t event_filter)
  180 {
  181   for(int i = 0; i < POOL_TRACK_MAX_THREADS; i++)
  182   {
  183     if((thread_filter != -1) && (thread_filter != i))
  184       continue;
  185 
  186     pool_track_info_t* track = track_global_info[i];
  187 
  188     if(track == NULL)
  189       continue;
  190 
  191     Stack* t = (Stack*)track->stack;
  192     size_t event = 0;
  193 
  194     int state = 0;
  195     void* op;
  196     void* addr;
  197     size_t size;
  198     size_t tsc;
  199 
  200     while(t != NULL)
  201     {
  202       for(int j = t->index - 1; j >= 0; j--)
  203       {
  204         switch(state)
  205         {
  206           case 0:
  207             tsc = (size_t)t->data[j];
  208             state = 1;
  209             break;
  210 
  211           case 1:
  212             size = (size_t)t->data[j];
  213             state = 2;
  214             break;
  215 
  216           case 2:
  217             addr = t->data[j];
  218             state = 3;
  219             break;
  220 
  221           case 3:
  222           {
  223             bool print = true;
  224             op = t->data[j];
  225             state = 0;
  226 
  227             if((op_filter != -1) && (op_filter != (int)op))
  228               print = false;
  229 
  230             if((event_filter != (size_t)-1) && (event_filter != event))
  231               print = false;
  232 
  233             if((addr_filter != NULL) &&
  234               ((addr > addr_filter) || ((addr + size) <= addr_filter)))
  235             {
  236               print = false;
  237             }
  238 
  239             if(print)
  240             {
  241               pool_event_print(i, op, event, tsc, addr, size);
  242 
  243               if(event_filter != (size_t)-1)
  244                 return;
  245             }
  246 
  247             event++;
  248             break;
  249           }
  250 
  251           default: {}
  252         }
  253       }
  254 
  255       t = t->prev;
  256     }
  257   }
  258 }
  259 
  260 static void track_init()
  261 {
  262   if(track.init)
  263     return;
  264 
  265   track.init = true;
  266   track.thread_id = atomic_fetch_add_explicit(&track_global_thread_id, 1,
  267     memory_order_relaxed);
  268   track_global_info[track.thread_id] = &track;
  269 
  270   // Force the symbol to be linked.
  271   pool_track(track.thread_id, NULL, -1, 0);
  272 }
  273 
  274 static void track_alloc(void* p, size_t size)
  275 {
  276   track_init();
  277 
  278   if(track.internal)
  279     return;
  280 
  281   track.internal = true;
  282 
  283   track.stack = pool_track_push(track.stack, POOL_TRACK_ALLOC);
  284   track.stack = pool_track_push(track.stack, p);
  285   track.stack = pool_track_push(track.stack, (void*)size);
  286   track.stack = pool_track_push(track.stack, (void*)ponyint_cpu_tick());
  287 
  288   track.internal = false;
  289 }
  290 
  291 static void track_free(void* p, size_t size)
  292 {
  293   track_init();
  294   pony_assert(!track.internal);
  295 
  296   track.internal = true;
  297 
  298   track.stack = pool_track_push(track.stack, POOL_TRACK_FREE);
  299   track.stack = pool_track_push(track.stack, p);
  300   track.stack = pool_track_push(track.stack, (void*)size);
  301   track.stack = pool_track_push(track.stack, (void*)ponyint_cpu_tick());
  302 
  303   track.internal = false;
  304 }
  305 
  306 static void track_push(pool_item_t* p, size_t length, size_t size)
  307 {
  308   track_init();
  309   pony_assert(!track.internal);
  310 
  311   track.internal = true;
  312   uint64_t tsc = ponyint_cpu_tick();
  313 
  314   track.stack = pool_track_push(track.stack, POOL_TRACK_PUSH_LIST);
  315   track.stack = pool_track_push(track.stack, (void*)length);
  316   track.stack = pool_track_push(track.stack, (void*)size);
  317   track.stack = pool_track_push(track.stack, (void*)tsc);
  318 
  319   while(p != NULL)
  320   {
  321     track.stack = pool_track_push(track.stack, POOL_TRACK_PUSH);
  322     track.stack = pool_track_push(track.stack, p);
  323     track.stack = pool_track_push(track.stack, (void*)size);
  324     track.stack = pool_track_push(track.stack, (void*)tsc);
  325     p = p->next;
  326   }
  327 
  328   track.internal = false;
  329 }
  330 
  331 static void track_pull(pool_item_t* p, size_t length, size_t size)
  332 {
  333   track_init();
  334   pony_assert(!track.internal);
  335 
  336   track.internal = true;
  337   uint64_t tsc = ponyint_cpu_tick();
  338 
  339   track.stack = pool_track_push(track.stack, POOL_TRACK_PULL_LIST);
  340   track.stack = pool_track_push(track.stack, (void*)length);
  341   track.stack = pool_track_push(track.stack, (void*)size);
  342   track.stack = pool_track_push(track.stack, (void*)tsc);
  343 
  344   while(p != NULL)
  345   {
  346     track.stack = pool_track_push(track.stack, POOL_TRACK_PULL);
  347     track.stack = pool_track_push(track.stack, p);
  348     track.stack = pool_track_push(track.stack, (void*)size);
  349     track.stack = pool_track_push(track.stack, (void*)tsc);
  350     p = p->next;
  351   }
  352 
  353   track.internal = false;
  354 }
  355 
  356 #define TRACK_ALLOC(PTR, SIZE) track_alloc(PTR, SIZE)
  357 #define TRACK_FREE(PTR, SIZE) track_free(PTR, SIZE)
  358 #define TRACK_PUSH(PTR, LEN, SIZE) track_push(PTR, LEN, SIZE)
  359 #define TRACK_PULL(PTR, LEN, SIZE) track_pull(PTR, LEN, SIZE)
  360 #define TRACK_EXTERNAL() (!track.internal)
  361 
  362 #else
  363 
  364 #define TRACK_ALLOC(PTR, SIZE)
  365 #define TRACK_FREE(PTR, SIZE)
  366 #define TRACK_PUSH(PTR, LEN, SIZE)
  367 #define TRACK_PULL(PTR, LEN, SIZE)
  368 #define TRACK_EXTERNAL() (true)
  369 
  370 #endif
  371 
  372 static void pool_block_remove(pool_block_t* block)
  373 {
  374   pool_block_t* prev = block->prev;
  375   pool_block_t* next = block->next;
  376 
  377   if(prev != NULL)
  378     prev->next = next;
  379   else
  380     pool_block_header.head = next;
  381 
  382   if(next != NULL)
  383     next->prev = prev;
  384 }
  385 
  386 static void pool_block_insert(pool_block_t* block)
  387 {
  388   pool_block_t* next = pool_block_header.head;
  389   pool_block_t* prev = NULL;
  390 
  391   while(next != NULL)
  392   {
  393     if(block->size <= next->size)
  394       break;
  395 
  396     prev = next;
  397     next = next->next;
  398   }
  399 
  400   block->prev = prev;
  401   block->next = next;
  402 
  403   if(prev != NULL)
  404     prev->next = block;
  405   else
  406     pool_block_header.head = block;
  407 
  408   if(next != NULL)
  409     next->prev = block;
  410 }
  411 
  412 static void pool_block_push(pool_block_t* block)
  413 {
  414   atomic_store_explicit(&block->acquired, false, memory_order_relaxed);
  415   atomic_fetch_add_explicit(&in_pool_block_global, 1, memory_order_acquire);
  416 #ifdef USE_VALGRIND
  417   ANNOTATE_HAPPENS_AFTER(&in_pool_block_global);
  418 #endif
  419 
  420   while(true)
  421   {
  422     pool_block_t* pos = atomic_load_explicit(&pool_block_global.global,
  423       memory_order_acquire);
  424 #ifdef USE_VALGRIND
  425     ANNOTATE_HAPPENS_AFTER(&pool_block_global.global);
  426 #endif
  427 
  428     // Find an insertion position. The list is sorted and stays sorted after an
  429     // insertion.
  430     pool_block_t* prev = &pool_block_global;
  431     while((pos != NULL) && (block->size > pos->size))
  432     {
  433       prev = pos;
  434       pos = atomic_load_explicit(&pos->global, memory_order_acquire);
  435 #ifdef USE_VALGRIND
  436       ANNOTATE_HAPPENS_AFTER(&pos->global);
  437 #endif
  438     }
  439 
  440     if(atomic_exchange_explicit(&prev->acquired, true, memory_order_acquire))
  441       continue;
  442 
  443 #ifdef USE_VALGRIND
  444     ANNOTATE_HAPPENS_AFTER(&prev->acquired);
  445 #endif
  446 
  447     pool_block_t* check_pos = atomic_load_explicit(&prev->global,
  448       memory_order_relaxed);
  449 
  450     if(pos != check_pos)
  451     {
  452       atomic_store_explicit(&prev->acquired, false, memory_order_relaxed);
  453       continue;
  454     }
  455 
  456     atomic_store_explicit(&block->global, pos, memory_order_relaxed);
  457 #ifdef USE_VALGRIND
  458     ANNOTATE_HAPPENS_BEFORE(&prev->global);
  459 #endif
  460     atomic_store_explicit(&prev->global, block, memory_order_release);
  461 #ifdef USE_VALGRIND
  462     ANNOTATE_HAPPENS_BEFORE(&prev->acquired);
  463 #endif
  464     atomic_store_explicit(&prev->acquired, false, memory_order_release);
  465 
  466     break;
  467   }
  468 
  469 #ifdef USE_VALGRIND
  470   ANNOTATE_HAPPENS_BEFORE(&in_pool_block_global);
  471 #endif
  472   atomic_fetch_sub_explicit(&in_pool_block_global, 1, memory_order_release);
  473 }
  474 
  475 static pool_block_t* pool_block_pull( size_t size)
  476 {
  477   pool_block_t* block = atomic_load_explicit(&pool_block_global.global,
  478     memory_order_relaxed);
  479 
  480   if(block == NULL)
  481     return NULL;
  482 
  483   atomic_fetch_add_explicit(&in_pool_block_global, 1, memory_order_acquire);
  484 #ifdef USE_VALGRIND
  485   ANNOTATE_HAPPENS_AFTER(&in_pool_block_global);
  486 #endif
  487 
  488   while(true)
  489   {
  490     block = atomic_load_explicit(&pool_block_global.global,
  491       memory_order_relaxed);
  492 
  493     if(block == NULL)
  494     {
  495       atomic_fetch_sub_explicit(&in_pool_block_global, 1, memory_order_relaxed);
  496       return NULL;
  497     }
  498 
  499     atomic_thread_fence(memory_order_acquire);
  500 #ifdef USE_VALGRIND
  501     ANNOTATE_HAPPENS_AFTER(&pool_block_global.global);
  502 #endif
  503 
  504     pool_block_t* prev = &pool_block_global;
  505 
  506     // Find a big enough block. The list is sorted.
  507     while((block != NULL) && (size > block->size))
  508     {
  509       prev = block;
  510       block = atomic_load_explicit(&block->global, memory_order_acquire);
  511 #ifdef USE_VALGRIND
  512       ANNOTATE_HAPPENS_AFTER(&block->global);
  513 #endif
  514     }
  515 
  516     // No suitable block.
  517     if(block == NULL)
  518     {
  519       atomic_fetch_sub_explicit(&in_pool_block_global, 1, memory_order_relaxed);
  520       return NULL;
  521     }
  522 
  523     if(atomic_exchange_explicit(&prev->acquired, true, memory_order_acquire))
  524       continue;
  525 
  526 #ifdef USE_VALGRIND
  527     ANNOTATE_HAPPENS_AFTER(&prev->acquired);
  528 #endif
  529 
  530     pool_block_t* check_block = atomic_load_explicit(&prev->global,
  531       memory_order_relaxed);
  532 
  533     if((block != check_block) ||
  534       atomic_exchange_explicit(&block->acquired, true, memory_order_relaxed))
  535     {
  536       atomic_store_explicit(&prev->acquired, false, memory_order_relaxed);
  537       continue;
  538     }
  539 
  540     atomic_thread_fence(memory_order_acquire);
  541 #ifdef USE_VALGRIND
  542     ANNOTATE_HAPPENS_AFTER(&block->acquired);
  543 #endif
  544 
  545     pool_block_t* next = atomic_load_explicit(&block->global,
  546       memory_order_relaxed);
  547     atomic_store_explicit(&prev->global, next, memory_order_relaxed);
  548 
  549     // Don't release block.
  550 #ifdef USE_VALGRIND
  551     ANNOTATE_HAPPENS_BEFORE(&prev->acquired);
  552 #endif
  553     atomic_store_explicit(&prev->acquired, false, memory_order_release);
  554 
  555     break;
  556   }
  557 
  558 #ifdef USE_VALGRIND
  559   ANNOTATE_HAPPENS_BEFORE(&in_pool_block_global);
  560 #endif
  561   atomic_fetch_sub_explicit(&in_pool_block_global, 1, memory_order_release);
  562 
  563   // We can't modify block until we're sure no other thread will try to read
  564   // from it (e.g. to check if it can be popped from the global list). To do
  565   // this, we wait until nobody is trying to either push or pull.
  566   while(atomic_load_explicit(&in_pool_block_global, memory_order_relaxed) != 0)
  567     ponyint_cpu_relax();
  568 
  569   atomic_thread_fence(memory_order_acquire);
  570 #ifdef USE_VALGRIND
  571   ANNOTATE_HAPPENS_AFTER(&in_pool_block_global);
  572 #endif
  573 
  574   pony_assert(size <= block->size);
  575 
  576 #ifdef USE_VALGRIND
  577   ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&block->global);
  578   ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&block->acquired);
  579 #endif
  580 
  581   return block;
  582 }
  583 
  584 static void* pool_block_get(size_t size)
  585 {
  586   if(pool_block_header.largest_size >= size)
  587   {
  588     pool_block_t* block = pool_block_header.head;
  589 
  590     while(block != NULL)
  591     {
  592       if(block->size > size)
  593       {
  594         // Use size bytes from the end of the block. This allows us to keep the
  595         // block info inside the block instead of using another data structure.
  596         size_t rem = block->size - size;
  597         block->size = rem;
  598         pool_block_header.total_size -= size;
  599 
  600         if((block->prev != NULL) && (block->prev->size > block->size))
  601         {
  602           // If we are now smaller than the previous block, move us forward in
  603           // the list.
  604           if(block->next == NULL)
  605             pool_block_header.largest_size = block->prev->size;
  606 
  607           pool_block_remove(block);
  608           pool_block_insert(block);
  609         } else if(block->next == NULL) {
  610           pool_block_header.largest_size = rem;
  611         }
  612 
  613         return (char*)block + rem;
  614       } else if(block->size == size) {
  615         if(block->next == NULL)
  616         {
  617           pool_block_header.largest_size =
  618             (block->prev == NULL) ? 0 : block->prev->size;
  619         }
  620 
  621         // Remove the block from the list.
  622         pool_block_remove(block);
  623 
  624         // Return the block pointer itself.
  625         pool_block_header.total_size -= size;
  626         return block;
  627       }
  628 
  629       block = block->next;
  630     }
  631 
  632     // If we didn't find any suitable block, something has gone really wrong.
  633     pony_assert(false);
  634   }
  635 
  636   pool_block_t* block = pool_block_pull(size);
  637 
  638   if(block == NULL)
  639     return NULL;
  640 
  641   if(size == block->size)
  642     return block;
  643 
  644   size_t rem = block->size - size;
  645   block->size = rem;
  646   pool_block_insert(block);
  647   pool_block_header.total_size += rem;
  648 
  649   if(pool_block_header.largest_size < rem)
  650     pool_block_header.largest_size = rem;
  651 
  652   return (char*)block + rem;
  653 }
  654 
  655 static void* pool_alloc_pages(size_t size)
  656 {
  657   void* p = pool_block_get(size);
  658 
  659   if(p != NULL)
  660     return p;
  661 
  662   // We have no free blocks big enough.
  663   if(size >= POOL_MMAP)
  664     return ponyint_virt_alloc(size);
  665 
  666   pool_block_t* block = (pool_block_t*)ponyint_virt_alloc(POOL_MMAP);
  667   size_t rem = POOL_MMAP - size;
  668 
  669   block->size = rem;
  670   block->next = NULL;
  671   block->prev = NULL;
  672   pool_block_insert(block);
  673   pool_block_header.total_size += rem;
  674   if(pool_block_header.largest_size < rem)
  675     pool_block_header.largest_size = rem;
  676 
  677   return (char*)block + rem;
  678 }
  679 
  680 static void pool_free_pages(void* p, size_t size)
  681 {
  682   if(pool_block_header.total_size >= POOL_MMAP)
  683   {
  684     // TODO: ???
  685   }
  686 
  687   pool_block_t* block = (pool_block_t*)p;
  688   block->prev = NULL;
  689   block->next = NULL;
  690   block->size = size;
  691 
  692   pool_block_insert(block);
  693   pool_block_header.total_size += size;
  694   if(pool_block_header.largest_size < size)
  695     pool_block_header.largest_size = size;
  696 }
  697 
  698 static void pool_push(pool_local_t* thread, pool_global_t* global)
  699 {
  700   pool_central_t* p = (pool_central_t*)thread->pool;
  701   p->length = thread->length;
  702 
  703   thread->pool = NULL;
  704   thread->length = 0;
  705 
  706   pony_assert((p->length > 0) && (p->length <= global->count));
  707   TRACK_PUSH((pool_item_t*)p, p->length, global->size);
  708 
  709   pool_central_t* top;
  710 #ifdef PLATFORM_IS_X86
  711   PONY_ABA_PROTECTED_PTR(pool_central_t) cmp;
  712   PONY_ABA_PROTECTED_PTR(pool_central_t) xchg;
  713   cmp.object = global->central.object;
  714   cmp.counter = global->central.counter;
  715 
  716   xchg.object = p;
  717 #else
  718   top = atomic_load_explicit(&global->central, memory_order_relaxed);
  719 #endif
  720 
  721   do
  722   {
  723 #ifdef PLATFORM_IS_X86
  724     top = cmp.object;
  725     xchg.counter = cmp.counter + 1;
  726 #endif
  727     p->central = top;
  728 
  729 #ifdef USE_VALGRIND
  730     ANNOTATE_HAPPENS_BEFORE(&global->central);
  731 #endif
  732   }
  733 #ifdef PLATFORM_IS_X86
  734   while(!bigatomic_compare_exchange_weak_explicit(&global->central, &cmp,
  735     xchg, memory_order_release, memory_order_relaxed));
  736 #else
  737   while(!atomic_compare_exchange_weak_explicit(&global->central, &top, p,
  738     memory_order_release, memory_order_relaxed));
  739 #endif
  740 }
  741 
  742 static pool_item_t* pool_pull(pool_local_t* thread, pool_global_t* global)
  743 {
  744   pool_central_t* top;
  745 #ifdef PLATFORM_IS_X86
  746   PONY_ABA_PROTECTED_PTR(pool_central_t) cmp;
  747   PONY_ABA_PROTECTED_PTR(pool_central_t) xchg;
  748   cmp.object = global->central.object;
  749   cmp.counter = global->central.counter;
  750 #else
  751   top = atomic_load_explicit(&global->central, memory_order_relaxed);
  752 #endif
  753   pool_central_t* next;
  754 
  755   do
  756   {
  757 #ifdef PLATFORM_IS_X86
  758     top = cmp.object;
  759 #endif
  760     if(top == NULL)
  761       return NULL;
  762 
  763     atomic_thread_fence(memory_order_acquire);
  764 #ifdef USE_VALGRIND
  765     ANNOTATE_HAPPENS_AFTER(&global->central);
  766 #endif
  767     next = top->central;
  768 #ifdef PLATFORM_IS_X86
  769     xchg.object = next;
  770     xchg.counter = cmp.counter + 1;
  771 #endif
  772   }
  773 #ifdef PLATFORM_IS_X86
  774   while(!bigatomic_compare_exchange_weak_explicit(&global->central, &cmp,
  775     xchg, memory_order_acquire, memory_order_relaxed));
  776 #else
  777   while(!atomic_compare_exchange_weak_explicit(&global->central, &top, next,
  778     memory_order_acquire, memory_order_relaxed));
  779 #endif
  780 
  781   // We need to synchronise twice on global->central to make sure we see every
  782   // previous write to the memory we're going to use before we use it.
  783 #ifdef USE_VALGRIND
  784   ANNOTATE_HAPPENS_AFTER(&global->central);
  785 #endif
  786 
  787   pool_item_t* p = (pool_item_t*)top;
  788 
  789   pony_assert((top->length > 0) && (top->length <= global->count));
  790   TRACK_PULL(p, top->length, global->size);
  791 
  792   thread->pool = p->next;
  793   thread->length = top->length - 1;
  794 
  795   return p;
  796 }
  797 
  798 static void* pool_get(pool_local_t* pool, size_t index)
  799 {
  800   // Try per-size thread-local free list first.
  801   pool_local_t* thread = &pool[index];
  802   pool_global_t* global = &pool_global[index];
  803   pool_item_t* p = thread->pool;
  804 
  805   if(p != NULL)
  806   {
  807     thread->pool = p->next;
  808     thread->length--;
  809     return p;
  810   }
  811 
  812   if(TRACK_EXTERNAL())
  813   {
  814     // Try to get a new free list from the per-size global list of free lists.
  815     p = pool_pull(thread, global);
  816 
  817     if(p != NULL)
  818       return p;
  819   }
  820 
  821   if(global->size < POOL_ALIGN)
  822   {
  823     // Check our per-size thread-local free block.
  824     if(thread->start < thread->end)
  825     {
  826       void* p = thread->start;
  827       thread->start += global->size;
  828       return p;
  829     }
  830 
  831     // Use the pool allocator to get a block POOL_ALIGN bytes in size
  832     // and treat it as a free block.
  833     char* mem = (char*)pool_get(pool, POOL_ALIGN_INDEX);
  834     thread->start = mem + global->size;
  835     thread->end = mem + POOL_ALIGN;
  836     return mem;
  837   }
  838 
  839   // Pull size bytes from the list of free blocks. Don't use a size-specific
  840   // free block.
  841   return pool_alloc_pages(global->size);
  842 }
  843 
  844 void* ponyint_pool_alloc(size_t index)
  845 {
  846 #ifdef USE_VALGRIND
  847   VALGRIND_DISABLE_ERROR_REPORTING;
  848 #endif
  849 
  850   pool_local_t* pool = pool_local;
  851   void* p = pool_get(pool, index);
  852   TRACK_ALLOC(p, POOL_MIN << index);
  853 
  854 #ifdef USE_VALGRIND
  855   VALGRIND_ENABLE_ERROR_REPORTING;
  856   VALGRIND_HG_CLEAN_MEMORY(p, POOL_SIZE(index));
  857   VALGRIND_MALLOCLIKE_BLOCK(p, POOL_SIZE(index), 0, 0);
  858 #endif
  859 
  860   return p;
  861 }
  862 
  863 void ponyint_pool_free(size_t index, void* p)
  864 {
  865 #ifdef USE_VALGRIND
  866   VALGRIND_HG_CLEAN_MEMORY(p, POOL_SIZE(index));
  867   VALGRIND_DISABLE_ERROR_REPORTING;
  868 #endif
  869 
  870   pony_assert(index < POOL_COUNT);
  871   TRACK_FREE(p, POOL_MIN << index);
  872 
  873   pool_local_t* thread = &pool_local[index];
  874   pool_global_t* global = &pool_global[index];
  875 
  876   if(thread->length == global->count)
  877     pool_push(thread, global);
  878 
  879   pony_assert(thread->length < global->count);
  880   pool_item_t* lp = (pool_item_t*)p;
  881   lp->next = thread->pool;
  882   thread->pool = lp;
  883   thread->length++;
  884 
  885 #ifdef USE_VALGRIND
  886   VALGRIND_ENABLE_ERROR_REPORTING;
  887   VALGRIND_FREELIKE_BLOCK(p, 0);
  888 #endif
  889 }
  890 
  891 static void* pool_alloc_size(size_t size)
  892 {
  893 #ifdef USE_VALGRIND
  894   VALGRIND_DISABLE_ERROR_REPORTING;
  895 #endif
  896 
  897   void* p = pool_alloc_pages(size);
  898   TRACK_ALLOC(p, size);
  899 
  900 #ifdef USE_VALGRIND
  901   VALGRIND_ENABLE_ERROR_REPORTING;
  902   VALGRIND_HG_CLEAN_MEMORY(p, size);
  903   VALGRIND_MALLOCLIKE_BLOCK(p, size, 0, 0);
  904 #endif
  905 
  906   return p;
  907 }
  908 
  909 void* ponyint_pool_alloc_size(size_t size)
  910 {
  911   size_t index = ponyint_pool_index(size);
  912 
  913   if(index < POOL_COUNT)
  914     return ponyint_pool_alloc(index);
  915 
  916   size = ponyint_pool_adjust_size(size);
  917   void* p = pool_alloc_size(size);
  918 
  919   return p;
  920 }
  921 
  922 static void pool_free_size(size_t size, void* p)
  923 {
  924 #ifdef USE_VALGRIND
  925   VALGRIND_HG_CLEAN_MEMORY(p, size);
  926   VALGRIND_DISABLE_ERROR_REPORTING;
  927 #endif
  928 
  929   TRACK_FREE(p, size);
  930   pool_free_pages(p, size);
  931 
  932 #ifdef USE_VALGRIND
  933   VALGRIND_ENABLE_ERROR_REPORTING;
  934   VALGRIND_FREELIKE_BLOCK(p, 0);
  935 #endif
  936 }
  937 
  938 void ponyint_pool_free_size(size_t size, void* p)
  939 {
  940   size_t index = ponyint_pool_index(size);
  941 
  942   if(index < POOL_COUNT)
  943     return ponyint_pool_free(index, p);
  944 
  945   size = ponyint_pool_adjust_size(size);
  946   pool_free_size(size, p);
  947 }
  948 
  949 void* ponyint_pool_realloc_size(size_t old_size, size_t new_size, void* p)
  950 {
  951   // Can only reuse the old pointer if the old index/adjusted size is equal to
  952   // the new one, not greater.
  953 
  954   if(p == NULL)
  955     return ponyint_pool_alloc_size(new_size);
  956 
  957   size_t old_index = ponyint_pool_index(old_size);
  958   size_t new_index = ponyint_pool_index(new_size);
  959   size_t old_adj_size = 0;
  960 
  961   void* new_p;
  962 
  963   if(new_index < POOL_COUNT)
  964   {
  965     if(old_index == new_index)
  966       return p;
  967 
  968     new_p = ponyint_pool_alloc(new_index);
  969   } else {
  970     size_t new_adj_size = ponyint_pool_adjust_size(new_size);
  971 
  972     if(old_index >= POOL_COUNT)
  973     {
  974       old_adj_size = ponyint_pool_adjust_size(old_size);
  975 
  976       if(old_adj_size == new_adj_size)
  977         return p;
  978     }
  979 
  980     new_p = pool_alloc_size(new_adj_size);
  981   }
  982 
  983   memcpy(new_p, p, old_size < new_size ? old_size : new_size);
  984 
  985   if(old_index < POOL_COUNT)
  986     ponyint_pool_free(old_index, p);
  987   else
  988     pool_free_size(old_adj_size, p);
  989 
  990   return new_p;
  991 }
  992 
  993 void ponyint_pool_thread_cleanup()
  994 {
  995   for(size_t index = 0; index < POOL_COUNT; index++)
  996   {
  997     pool_local_t* thread = &pool_local[index];
  998     pool_global_t* global = &pool_global[index];
  999 
 1000     while(thread->start < thread->end)
 1001     {
 1002       if(thread->length == global->count)
 1003         pool_push(thread, global);
 1004 
 1005       pool_item_t* item = (pool_item_t*)thread->start;
 1006       thread->start += global->size;
 1007       item->next = thread->pool;
 1008       thread->pool = item;
 1009       thread->length++;
 1010     }
 1011 
 1012     if(thread->length > 0)
 1013       pool_push(thread, global);
 1014   }
 1015 
 1016   pool_block_t* block = pool_block_header.head;
 1017   while(block != NULL)
 1018   {
 1019     pool_block_t* next = block->next;
 1020     pool_block_remove(block);
 1021     pool_block_push(block);
 1022     block = next;
 1023   }
 1024 
 1025   pool_block_header.total_size = 0;
 1026   pool_block_header.largest_size = 0;
 1027 }
 1028 
 1029 size_t ponyint_pool_index(size_t size)
 1030 {
 1031 #ifdef PLATFORM_IS_ILP32
 1032 #define BITS (32 - POOL_MIN_BITS)
 1033 #else
 1034 #define BITS (64 - POOL_MIN_BITS)
 1035 #endif
 1036 
 1037   // The condition is in that order for better branch prediction: non-zero pool
 1038   // indices are more likely than zero pool indices.
 1039   if(size > POOL_MIN)
 1040     return (size_t)BITS - __pony_clzzu(size) - (!(size & (size - 1)));
 1041 
 1042   return 0;
 1043 }
 1044 
 1045 size_t ponyint_pool_used_size(size_t size)
 1046 {
 1047   size_t index = ponyint_pool_index(size);
 1048 
 1049   if(index < POOL_COUNT)
 1050     return POOL_SIZE(index);
 1051 
 1052   return ponyint_pool_adjust_size(size);
 1053 }
 1054 
 1055 size_t ponyint_pool_adjust_size(size_t size)
 1056 {
 1057   if((size & POOL_ALIGN_MASK) != 0)
 1058     size = (size & ~POOL_ALIGN_MASK) + POOL_ALIGN;
 1059 
 1060   // we've overflowed the `size_t` datatype
 1061   if(size == 0)
 1062     size = size - 1;
 1063 
 1064   return size;
 1065 }