"Fossies" - the Fresh Open Source Software Archive

Member "openpa-1.0.4/src/opa_queue.h" (5 Dec 2012, 14699 Bytes) of package /linux/misc/openpa-1.0.4.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "opa_queue.h" see the Fossies "Dox" file reference documentation.

    1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
    2 /*  
    3  *  (C) 2008 by Argonne National Laboratory.
    4  *      See COPYRIGHT in top-level directory.
    5  */
    6 
    7 /* Implements a fast, lockfree, multi-producer, single-consumer queue.  It's
    8  * important to note the *single-consumer* piece of this, since multithreaded
    9  * consumption will surely lead to data corruption and/or other problems. */
   10 
   11 #ifndef OPA_QUEUE_H_INCLUDED
   12 #define OPA_QUEUE_H_INCLUDED
   13 
   14 #include "opa_primitives.h"
   15 #ifdef OPA_HAVE_STDDEF_H
   16 #include <stddef.h>
   17 #endif /* OPA_HAVE_STDDEF_H */
   18 
   19 /* This value is used to indicate NULL in the OPA_Shm_asymm_base_addr
   20    variable.  It is non-zero because one of the likely base addresses is zero
   21    (indicating memory is actually symmetrically mapped).  The value 64 was used
   22    because it is unlikely that mmap will choose an address this low for a
   23    mapping.  */
   24 /* XXX DJG TODO put some conditionally compiled error checking in that uses this */
   25 #define OPA_SHM_ASYMM_NULL_VAL    64
   26 
   27 extern char *OPA_Shm_asymm_base_addr;
   28 
   29 /* Used to initialize the base address for relative pointers.  This interface
   30    assumes that there is only one shared memory segment.  If this turns out to
   31    not be the case in the future, we should probably add support for multiple
   32    shm segments.
   33    
   34    This function will return an error if it has already been called. */
   35 int OPA_Shm_asymm_init(char *base);
   36 
   37 /* Relative addressing macros.  These are for manipulating addresses relative
   38    to the start of a shared memory region. */
   39 #define OPA_SHM_REL_NULL (0x0)
   40 #define OPA_SHM_IS_REL_NULL(rel_ptr) (OPA_load_ptr(&(rel_ptr).offset) == OPA_SHM_REL_NULL)
   41 #define OPA_SHM_SET_REL_NULL(rel_ptr) (OPA_store_ptr(&(rel_ptr).offset, OPA_SHM_REL_NULL))
   42 #define OPA_SHM_REL_ARE_EQUAL(rel_ptr1, rel_ptr2) \
   43     (OPA_load_ptr(&(rel_ptr1).offset) == OPA_load_ptr(&(rel_ptr2).offset))
   44 
   45 /* This structure exists such that it is possible to expand the expressiveness
   46    of a relative address at some point in the future.  It also provides a
   47    modicum of type safety to help prevent certain flavors of errors.
   48    
   49    For example, instead of referencing an offset from a global base address, it
   50    might make sense for there to be multiple base addresses.  These base
   51    addresses could correspond to the start of a segment or region of shared
   52    memory.  This structure could store the segment number that is used to lookup
   53    a base address in a non-shared table.  Note that you would have to be very
   54    careful about all of this because if you add the segment number as a separate
   55    field you can no longer (compare and) swap a relative address atomically.  So
   56    you'll either have to shave bits from the pointer or make some sort of
   57    requirement that relative addresses can only be swapped within the same
   58    segment.  */
   59 typedef struct OPA_Shm_rel_addr_t {
   60     OPA_ptr_t offset;
   61 } OPA_Shm_rel_addr_t;
   62 
   63 /* converts a relative pointer to an absolute pointer */
   64 static _opa_inline
   65 void *OPA_Shm_rel_to_abs(OPA_Shm_rel_addr_t r)
   66 {
   67     void *offset = OPA_load_ptr(&r.offset);
   68     OPA_assert((size_t)OPA_Shm_asymm_base_addr != OPA_SHM_ASYMM_NULL_VAL);
   69     return (void*)(OPA_Shm_asymm_base_addr + (size_t)offset);
   70 }
   71 
   72 /* converts an absolute pointer to a relative pointer */
   73 static _opa_inline
   74 OPA_Shm_rel_addr_t OPA_Shm_abs_to_rel(void *a)
   75 {
   76     OPA_Shm_rel_addr_t ret;
   77 
   78     OPA_assert((size_t)OPA_Shm_asymm_base_addr != OPA_SHM_ASYMM_NULL_VAL);
   79     OPA_store_ptr(&ret.offset, (void*)((size_t)a - (size_t)OPA_Shm_asymm_base_addr));
   80     return ret;
   81 }
   82 
   83 static _opa_inline
   84 OPA_Shm_rel_addr_t OPA_Shm_swap_rel(OPA_Shm_rel_addr_t *addr, OPA_Shm_rel_addr_t newv) {
   85     OPA_Shm_rel_addr_t oldv;
   86     OPA_store_ptr(&oldv.offset, OPA_swap_ptr(&addr->offset, OPA_load_ptr(&newv.offset)));
   87     return oldv;
   88 }
   89 
   90 /* Compare the relative pointer to (relative) null and swap if equal.  Prevents
   91    the guts of the _rel_addr_t abstraction from bleeding up into the
   92    enqueue/dequeue operations. */
   93 static _opa_inline
   94 OPA_Shm_rel_addr_t OPA_Shm_cas_rel_null(OPA_Shm_rel_addr_t *addr, OPA_Shm_rel_addr_t oldv) {
   95     OPA_Shm_rel_addr_t prev;
   96     OPA_store_ptr(&prev.offset, OPA_cas_ptr(&(addr->offset), OPA_load_ptr(&oldv.offset), (void*)OPA_SHM_REL_NULL));
   97     return prev;
   98 }
   99 
  100 /* The shadow head pointer makes this queue implementation perform much better
  101    than a standard queue.  Unfortunately, it also makes it a bit non-intuitive
  102    when read the code.  The following is an excerpt from "Design and Evaluation
  103    of Nemesis,  a Scalable, Low-Latency, Message-Passing Communication
  104    Subsystem" by D. Buntinas, G.  Mercier, and W. Gropp that gives an
  105    explanation:
  106 
  107       A process must access both the head and tail of the queue when it is
  108       enqueuing an element on an empty queue or when it is dequeuing an element
  109       that is the last element in the queue. In these cases, if the head and
  110       tail were in the same cache line, only one L2 cache miss would be
  111       encountered. If the queue has more elements in it, however, then the
  112       enqueuer only needs to access the tail, and the dequeuer only needs to
  113       access the head. If the head and tail were in the same cache line, then
  114       there would be L2 misses encountered as a result of false sharing each
  115       time a process enqueues an element after another has been dequeued from
  116       the same queue, and vice versa. In this case it would be better if the
  117       head and tail were in separate cache lines.
  118 
  119       Our solution is to put the head and tail in the same cache line and have a
  120       shadow head pointer in a separate cache line. The shadow head is
  121       initialized to NULL. The dequeuer uses the shadow head in place of the
  122       real head except when the shadow head is NULL, meaning that the queue has
  123       become empty. If the shadow head is NULL when the dequeuer tries to
  124       dequeue, it checks the value of the real head. If the real head is not
  125       NULL, meaning that an element has been enqueued on the queue since the
  126       last time the queue became empty, the dequeuer initializes its shadow head
  127       to the value of the real head and sets the real head to NULL. In this way,
  128       only one L2 cache miss is encountered when enqueuing onto an empty queue
  129       or dequeuing from a queue with one element. And because the tail and
  130       shadow head are in separate cache lines, there are no L2 cache misses from
  131       false sharing. 
  132 
  133       We found that using a shadow head pointer reduced one-way latency by about
  134       200 ns on a dual 2 GHz Xeon node.
  135 */
  136 
  137 /* Pick an arbitrary cacheline size for now, we can setup a mechanism to detect
  138    it at build time later on.  This should work well on most intel systems at
  139    the very least. */
  140 #define OPA_QUEUE_CACHELINE_PADDING 128
  141 
  142 /* All absolute and relative pointers point to the start of the enclosing element. */
  143 typedef struct OPA_Queue_info_t {
  144     OPA_Shm_rel_addr_t head; /* holds the offset pointer, not the abs ptr */
  145     OPA_Shm_rel_addr_t tail; /* holds the offset pointer, not the abs ptr */
  146     char padding1[OPA_QUEUE_CACHELINE_PADDING-2*sizeof(OPA_Shm_rel_addr_t)];
  147     OPA_Shm_rel_addr_t  shadow_head; /* holds the offset pointer, not the abs ptr */
  148     char padding2[OPA_QUEUE_CACHELINE_PADDING-sizeof(OPA_Shm_rel_addr_t)];
  149 } OPA_Queue_info_t;
  150 
  151 /* Using this header struct even though it's just one element gives us the
  152    opportunity to vary the implementation more easily in the future without
  153    updating all callers. */
  154 typedef struct OPA_Queue_element_hdr_t {
  155     OPA_Shm_rel_addr_t next; /* holds the offset pointer, not the abs ptr */
  156 } OPA_Queue_element_hdr_t;
  157 
  158 
  159 /* Used to initialize a queue structure. */
  160 void OPA_Queue_init(OPA_Queue_info_t *qhead);
  161 
  162 /* Used to initialize a queue element header. */
  163 void OPA_Queue_header_init(OPA_Queue_element_hdr_t *hdr);
  164 
  165 static _opa_inline
  166 int OPA_Queue_is_empty(OPA_Queue_info_t *qhead)
  167 {
  168     int __ret = 0;
  169     if (OPA_SHM_IS_REL_NULL (qhead->shadow_head)) {
  170         if (OPA_SHM_IS_REL_NULL(qhead->head)) {
  171             __ret = 1;
  172         }
  173         else {
  174             qhead->shadow_head = qhead->head;
  175             OPA_SHM_SET_REL_NULL(qhead->head); /* reset it for next time */
  176         }
  177     }
  178     return __ret;
  179 }
  180 
  181 /* Returns a pointer to the element at the head of the queue.  The current
  182    implementation of these queue algorithms imposes several notable
  183    restrictions on the use of this function:
  184     - The caller must currently hold the critical section, just as if you were
  185       calling OPA_Queue_dequeue.  Failure to do could easily produce incorrect
  186       results.
  187     - OPA_Queue_is_empty must be called on this queue prior to calling this
  188       function.  Furthermore, there must be no intervening calls to any
  189       OPA_Queue_* functions (for this queue) between _is_empty and _peek_head.
  190       Failure to do so will produce incorrect results.
  191 
  192    This operation is effectively the same as the dequeue operation (insofar as
  193    it shares the same calling restrictions) but it does not disturb the actual
  194    contents of the queue. */
  195 static _opa_inline
  196 void *OPA_Queue_peek_head(OPA_Queue_info_t *qhead_ptr)
  197 {
  198     OPA_assert(qhead_ptr != NULL);
  199 
  200     if (OPA_SHM_IS_REL_NULL(qhead_ptr->shadow_head)) {
  201         return NULL;
  202     }
  203     return OPA_Shm_rel_to_abs(qhead_ptr->shadow_head);
  204 }
  205 
  206 /* This macro atomically enqueues an element (elt for short) into the queue
  207    indicated by qhead_ptr.  You need to pass several arguments:
  208      qhead_ptr - a pointer to a OPA_Queue_info_t structure to which the
  209                  element should be enqueued
  210      elt_ptr - a pointer to an element structure type that is to be enqueued
  211      elt_type - The base type of elt_ptr.  That is, if elt_ptr is a
  212                 '(struct example_t *)' then elt_type is 'struct example_t'.
  213      elt_hdr_field - This is the member name of elt_type that is a
  214                      OPA_Queue_element_hdr_t.
  215 
  216     This queue implementation is loosely modeled after the linked lists used in
  217     the linux kernel.  You put the list header structure inside of the client
  218     structure, rather than the other way around.
  219    */
  220 #define OPA_Queue_enqueue(qhead_ptr, elt_ptr, elt_type, elt_hdr_field)    \
  221 do {                                                                      \
  222     OPA_Shm_rel_addr_t r_prev;                                            \
  223     OPA_Shm_rel_addr_t r_elt = OPA_Shm_abs_to_rel(elt_ptr);               \
  224                                                                           \
  225     OPA_SHM_SET_REL_NULL((elt_ptr)->elt_hdr_field.next);                  \
  226                                                                           \
  227     OPA_write_barrier();                                                  \
  228                                                                           \
  229     r_prev = OPA_Shm_swap_rel(&((qhead_ptr)->tail), r_elt);               \
  230                                                                           \
  231     if (OPA_SHM_IS_REL_NULL(r_prev)) {                                    \
  232         (qhead_ptr)->head = r_elt;                                        \
  233     }                                                                     \
  234     else {                                                                \
  235         elt_type *abs_prev = (elt_type *)OPA_Shm_rel_to_abs(r_prev);      \
  236         abs_prev->elt_hdr_field.next = r_elt;                             \
  237     }                                                                     \
  238 } while (0)
  239 
  240 /* This macro atomically dequeues an element (elt for short) from the queue
  241    indicated by qhead_ptr.  You need to pass several arguments:
  242      qhead_ptr - a pointer to a OPA_Queue_info_t structure to which the
  243                  element should be dequeued
  244      elt_ptr - A pointer to an element structure type that should be populated
  245                with the dequeued value.  Must be an lvalue.
  246      elt_type - The base type of elt_ptr.  That is, if elt_ptr is a
  247                 '(struct example_t *)' then elt_type is 'struct example_t'.
  248      elt_hdr_field - This is the member name of elt_type that is a
  249                      OPA_Queue_element_hdr_t.
  250 
  251     This queue implementation is loosely modeled after the linked lists used in
  252     the linux kernel.  You put the list header structure inside of the client
  253     structure, rather than the other way around.
  254 
  255     NOTE: you must *always* call _is_empty() prior to this function */
  256 #define OPA_Queue_dequeue(qhead_ptr, elt_ptr, elt_type, elt_hdr_field)        \
  257 do {                                                                          \
  258     elt_type *_e;                                                             \
  259     OPA_Shm_rel_addr_t _r_e;                                                  \
  260                                                                               \
  261     _r_e = (qhead_ptr)->shadow_head;                                          \
  262     _e = OPA_Shm_rel_to_abs(_r_e);                                            \
  263                                                                               \
  264     if (!OPA_SHM_IS_REL_NULL(_e->elt_hdr_field.next)) {                       \
  265         (qhead_ptr)->shadow_head = _e->elt_hdr_field.next;                    \
  266     }                                                                         \
  267     else {                                                                    \
  268         OPA_Shm_rel_addr_t old_tail;                                          \
  269                                                                               \
  270         OPA_SHM_SET_REL_NULL((qhead_ptr)->shadow_head);                       \
  271                                                                               \
  272         old_tail = OPA_Shm_cas_rel_null(&((qhead_ptr)->tail), _r_e);          \
  273                                                                               \
  274         if (!OPA_SHM_REL_ARE_EQUAL(old_tail, _r_e)) {                         \
  275             while (OPA_SHM_IS_REL_NULL(_e->elt_hdr_field.next)) {             \
  276                 OPA_busy_wait();                                              \
  277             }                                                                 \
  278             (qhead_ptr)->shadow_head = _e->elt_hdr_field.next;                \
  279         }                                                                     \
  280     }                                                                         \
  281     OPA_SHM_SET_REL_NULL(_e->elt_hdr_field.next);                             \
  282     OPA_read_barrier();                                                       \
  283     elt_ptr = _e;                                                             \
  284 } while (0)
  285 
  286 #endif /* OPA_QUEUE_H_INCLUDED */