"Fossies" - the Fresh Open Source Software Archive

Member "vfplot-1.0.15/src/libvfplot/dim2.c" (7 Nov 2019, 37698 Bytes) of package /linux/privat/vfplot-1.0.15.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "dim2.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.0.14_vs_1.0.15.

    1 /*
    2   dim2.c
    3   vfplot adaptive plot,  dimension 2
    4   J.J.Green 2007,  2012
    5 */
    6 
    7 /*
    8   the _GNU_SOURCE needed to enable the use of strsignal()
    9   which is a gnu extension to POSIX.  On non-gnu systems
   10   this will have no effect and the strsignal() will be
   11   ifdef-ed out by configure anyway
   12 */
   13 
   14 #define _GNU_SOURCE
   15 
   16 #ifdef HAVE_CONFIG_H
   17 #include <config.h>
   18 #endif
   19 
   20 #include <math.h>
   21 #include <stdlib.h>
   22 #include <string.h>
   23 
   24 #ifdef HAVE_PTHREAD_H
   25 #include <pthread.h>
   26 #ifdef WITH_PTHREAD_EXTRA
   27 #include "pthreadextra.h"
   28 #endif
   29 #endif
   30 
   31 #include "dim2.h"
   32 
   33 #include "constants.h"
   34 #include "error.h"
   35 #include "evaluate.h"
   36 #include "contact.h"
   37 #include "potential.h"
   38 #include "rmdup.h"
   39 #include "flag.h"
   40 #include "macros.h"
   41 #include "status.h"
   42 
   43 #include <kdtree.h>
   44 
   45 #ifdef HAVE_SIGNAL_H
   46 #include <signal.h>
   47 #endif
   48 
   49 
   50 #ifndef INFINITY
   51 #define INFINITY HUGE_VALF
   52 #endif
   53 
   54 /*
   55   the schedule defines a series of parameters
   56   varied over the course of the dynamics run
   57   (essentially an annealing schedule) and applied
   58   over a subset of the particles.
   59 
   60   charge : the charge is multipled by factor
   61   mass   : likewise
   62   rt     : potential truncation radius (from slj.h)
   63   rd     : deletion radius
   64   dmax   : maximum number deleted
   65 */
   66 
   67 typedef struct
   68 {
   69   double charge, mass, rd, rt;
   70   size_t dmax;
   71 } schedule_t;
   72 
   73 /* particle system */
   74 
   75 #define PARTICLE_FIXED  FLAG(0)
   76 #define PARTICLE_STALE  FLAG(1)
   77 
   78 typedef struct
   79 {
   80   flag_t flag;
   81   double charge, mass;
   82   double major, minor;
   83 #ifdef MINPW
   84   double minpw;
   85 #endif
   86   m2_t M;
   87   vector_t v, dv, F;
   88 } particle_t;
   89 
   90 /*
   91    we use pthreads for force accumulation and use
   92    these structures to pass arguments to the threads.
   93 
   94    tdata_t has the thread number and the offset and
   95    size of the edges array it processes. F and flag
   96    are private arrays of the forces and flags for the
   97    dim2 ellipses (so of size n2). tshared_t holds the
   98    shared data used by all threads.
   99 
  100    In the case that threads are not supported then
  101    we still used this structure, but all of the
  102    thread-specific parts of the structure are ifdef-ed
  103    out (and are, obviously, not used in this case)
  104 */
  105 
  106 #ifdef HAVE_PTHREAD_H
  107 #define PTHREAD_FORCES
  108 #endif
  109 
  110 typedef struct
  111 {
  112   int *edge;
  113   particle_t *p;
  114   double rd, rt;
  115   size_t n1, n2;
  116 
  117 #ifdef PTHREAD_FORCES
  118   pthread_mutex_t mutex;
  119   pthread_barrier_t barrier[2];
  120   bool terminate;
  121 #endif
  122 
  123 } tshared_t;
  124 
  125 typedef struct
  126 {
  127   size_t id, off, size;
  128   vector_t *F;
  129   flag_t *flag;
  130   tshared_t *shared;
  131 } tdata_t;
  132 
  133 static int subdivide(size_t, size_t, size_t*, size_t*);
  134 static void* forces(tdata_t*);
  135 
  136 #ifdef PTHREAD_FORCES
  137 
  138 static void* forces_worker(tdata_t*);
  139 static int get_terminate_status(pthread_mutex_t*, bool*, bool*, int);
  140 static int set_terminate_status(pthread_mutex_t*, bool*, bool);
  141 
  142 #endif
  143 
  144 /* temporary pw-distance struct */
  145 
  146 typedef struct
  147 {
  148   int id;
  149   double d;
  150 } pw_t;
  151 
  152 /* expand p so it fits n1+n2 particles (and put its new size in na) */
  153 
  154 static int ensure_alloc(int n1, int n2, particle_t **pp, int *na)
  155 {
  156   particle_t *p;
  157 
  158   if (n1+n2 <= *na)
  159     return 0;
  160 
  161   if ((p = realloc(*pp, (n1+n2)*sizeof(particle_t))) == NULL)
  162     return 1;
  163 
  164   *pp = p;
  165   *na = n1+n2;
  166 
  167   return 0;
  168 }
  169 
  170 /*
  171    signal handler
  172 
  173    the dim2 iteration can take a while, so we install
  174    a handler for SIGINT (control-c) which schedules
  175    a graceful exit at the end of the next cycle.
  176 
  177    this needs a file-scope value exitflag which is
  178    checked at the end of each outer iteration
  179 */
  180 
  181 #ifdef HAVE_SIGNAL_H
  182 
  183 static int exitflag = 0;
  184 
  185 static void setexitflag(int sig)
  186 {
  187   exitflag = 1;
  188 
  189 #ifdef HAVE_STRSIGNAL
  190   fprintf(stderr,
  191       "[signal] caught %i (%s), halt scheduled\n",
  192       sig, strsignal(sig));
  193 #else
  194   fprintf(stderr, "[signal] caught %i, halt scheduled\n", sig);
  195 #endif
  196 }
  197 
  198 #endif
  199 
  200 static int neighbours(particle_t*, int, int, int**, int*);
  201 static nbs_t* nbs_populate(int, int*, int, particle_t*);
  202 
  203 /* compares particles by flag */
  204 
  205 static int ptcomp(particle_t *a, particle_t *b)
  206 {
  207   return
  208     GET_FLAG(a->flag, PARTICLE_STALE) -
  209     GET_FLAG(b->flag, PARTICLE_STALE);
  210 }
  211 
  212 /* compare pw_ts by d */
  213 
  214 static int pwcomp(pw_t *a, pw_t *b)
  215 {
  216   return a->d > b->d;
  217 }
  218 
  219 /*
  220   for t in [0, 1], defines a spline which is constant z0 in [0, t0],
  221   constant z1 in [t1, 1], and a sinusoid inbetween.
  222 */
  223 
  224 static double sinspline(double t, double t0, double z0, double t1, double z1)
  225 {
  226   if (t<t0) return z0;
  227   if (t>t1) return z1;
  228 
  229   return z0 + (z1-z0)*(1.0 - cos(M_PI*(t-t0)/(t1-t0)))/2.0;
  230 }
  231 
  232 /*
  233   phases of the schedule
  234   - start, for charge buildup
  235   - clean, delete overlappers
  236   - detrunc, de-truncate the potential
  237 */
  238 
  239 #define START_T0 0.0
  240 #define START_T1 0.1
  241 
  242 #define CONTAIN_T0 0.0
  243 #define CONTAIN_T1 0.8
  244 
  245 #define CLEAN_T0 0.3
  246 #define CLEAN_T1 0.6
  247 
  248 #define CLEAN_RADIUS 0.5
  249 #define CLEAN_DELMAX 128
  250 
  251 #define DETRUNC_T0 0.6
  252 #define DETRUNC_T1 0.7
  253 
  254 #define DETRUNC_R0 0.90
  255 #define DETRUNC_R1 0.00
  256 
  257 /* breakpoints defined in terms of these */
  258 
  259 #define BREAK_SUPER     (0.95*CLEAN_T0)
  260 #define BREAK_MIDCLEAN  ((CLEAN_T1+CLEAN_T0)/2.0)
  261 #define BREAK_POSTCLEAN (1.05*CLEAN_T1)
  262 
  263 /*
  264    the boundary schedule is functionally moot (only
  265    the charge is used, and that doesn't vary over
  266    time) but it does provide a certain symmetry in
  267    the handling of the boundary and interior ellipses.
  268    perhaps remove it at some point
  269 */
  270 
  271 #define BOUNDARY_CHARGE 2.5
  272 
  273 static void boundary_schedule(double t, schedule_t *s)
  274 {
  275   s->mass   = 1.0;
  276   s->charge = BOUNDARY_CHARGE;
  277   s->rd     = 0.0;
  278   s->rt     = 0.0;
  279   s->dmax   = 0;
  280 }
  281 
  282 static void interior_schedule(double t, schedule_t *s)
  283 {
  284   s->mass   = 1.0;
  285   s->charge = sinspline(t,
  286             START_T0, 0.0,
  287             START_T1, 1.0);
  288   s->rd     = sinspline(t,
  289             CLEAN_T0, 0.0,
  290             CLEAN_T1, CLEAN_RADIUS);
  291   s->rt     = sinspline(t,
  292             DETRUNC_T0, DETRUNC_R0,
  293             DETRUNC_T1, DETRUNC_R1);
  294   s->dmax   = sinspline(t,
  295             CLEAN_T0, 0.0,
  296             CLEAN_T1, CLEAN_DELMAX);
  297 }
  298 
  299 static void schedule(double t, schedule_t *sB, schedule_t *sI)
  300 {
  301   boundary_schedule(t, sB);
  302   interior_schedule(t, sI);
  303 }
  304 
  305 /* perram-werthiem distance failures, always a bug */
  306 
  307 static void pw_error_p(size_t k, particle_t p)
  308 {
  309   fprintf(stderr, "  e%i (%g, %g), [%g, %g, %g]\n",
  310       (int)k,
  311       p.v.x, p.v.y,
  312       M2A(p.M), M2B(p.M), M2D(p.M));
  313 }
  314 
  315 static void pw_error(vector_t rAB, particle_t p1, particle_t p2)
  316 {
  317   fprintf(stderr, "BUG: pw fails, rAB = (%g, %g)\n", rAB.x, rAB.y);
  318   pw_error_p(1, p1);
  319   pw_error_p(2, p2);
  320 }
  321 
  322 /*
  323   set mass & charge on a particle p, for a circle equal to the
  324   radius by mutiplied by a time dependant constant
  325 */
  326 
  327 static void set_mq(particle_t *p, double mC, double qC)
  328 {
  329   double r =
  330 #if 0
  331     sqrt(p->minor * p->major)
  332 #else
  333     /*
  334       here one can play with various scalings, if these
  335       are circles then the above is right (ie, large and
  336       small exert the same pressure) but for ellipses
  337       r ~ major might be best (by anisotropy the pressure
  338       is felt along the major axis). Anything superlinear
  339       should favour the large against the small, a bit of
  340       this might be desirable. so mayby pow(p->major, 1.2)
  341       or something?
  342     */
  343     p->major
  344 #endif
  345     ;
  346 
  347   p->mass   = r * mC;
  348   p->charge = r * qC;
  349 }
  350 
  351 /*
  352   the force function is parameterised by x and x0,
  353   it is constant for d<x, linear from x<d<1 and zero
  354   for 1<d.  When x=x0, this constant is 1, and the
  355   gradient from x<d<1 is constant.
  356 
  357   the idea is that we have plateau up to x and then
  358   a linear spline to zero, but as we reduce x the
  359   plateau from 0 to x gets higher
  360 
  361   this should be moved to potential.c FIXME
  362 */
  363 
  364 static double force(double d, double x, double x0)
  365 {
  366   double K = 1/(1-x0);
  367 
  368   if (d < x) return K*(1-x);
  369   if (d < 1) return K*(1-d);
  370   return 0;
  371 }
  372 
  373 /* utility struct for kinetic energy drop -k option */
  374 
  375 typedef struct {
  376   int iter;
  377   bool done;
  378   double kedB, drop;
  379 } wait_t;
  380 
  381 extern int dim2(dim2_opt_t *opt, size_t *nA, arrow_t **pA, size_t *nN, nbs_t **pN)
  382 {
  383   int err;
  384   vector_t zero = {0, 0};
  385 
  386   /* timestep */
  387 
  388   double dt = opt->v.place.adaptive.timestep;
  389 
  390   /* initialise schedules */
  391 
  392   schedule_t schedB, schedI;
  393 
  394   schedule(0.0, &schedB, &schedI);
  395 
  396   /*
  397     n1 number of dim 0/1 arrows
  398     n2 number of dim 2 arrows
  399     na number of arrows allocated
  400   */
  401 
  402   int n1, n2, na;
  403 
  404   n2 = 0;
  405   n1 = na = *nA;
  406 
  407   /* domain dimensions */
  408 
  409   double
  410     w  = bbox_width(opt->v.bbox),
  411     h  = bbox_height(opt->v.bbox),
  412     x0 = opt->v.bbox.x.min,
  413     y0 = opt->v.bbox.y.min;
  414 
  415   /*
  416     darea is supposed to be the area of the domain which
  417     should properly be calculated from the domain (a
  418     triangulation would do it, but seems excessive), for
  419     now we take the area of the domain of definition of
  420     the metric tensor, which will be no greater than the
  421     real domain area.
  422   */
  423 
  424   double darea;
  425 
  426   if (bilinear_defarea(opt->mt.a, &darea) != 0) return ERROR_BUG;
  427 
  428   /*
  429     estimate number we can fit in, the density of the optimal
  430     circle packing is pi/sqrt(12), the area of the ellipse is
  431     opt->area - then we account for the ones there already
  432   */
  433 
  434 #define EPACKOPT (M_PI/sqrt(12))
  435 
  436   int
  437     no = darea*EPACKOPT/opt->area,
  438     ni = no-n1;
  439 
  440   if (opt->v.verbose) status("estimate", no);
  441 
  442   ni *= opt->v.place.adaptive.overfill;
  443 
  444   if (ni<1)
  445     {
  446       fprintf(stderr, "bad dim2 estimate, dim1 %i, dim2 %i\n", n1, ni);
  447       return ERROR_NODATA;
  448     }
  449 
  450   /* find the grid size */
  451 
  452   double R = w/h;
  453   int
  454     nx = (int)floor(sqrt(ni*R)),
  455     ny = (int)floor(sqrt(ni/R));
  456 
  457   if ((nx<1) || (ny<1))
  458     {
  459       fprintf(stderr,
  460           "bad initial dim2 grid is %ix%i, strange domain?\n", nx, ny);
  461       return ERROR_NODATA;
  462     }
  463 
  464   if (opt->v.verbose) status("fill grid", nx*ny);
  465 
  466   /*
  467      allocate for ni > nx.ny, we will probably be
  468      adding more arrows later
  469   */
  470 
  471   particle_t *p = NULL;
  472 
  473   if (ensure_alloc(n1, ni, &p, &na) != 0)
  474     return ERROR_MALLOC;
  475 
  476   /* transfer dim 0/1 arrows */
  477 
  478   for (int i = 0 ; i < n1 ; i++)
  479     {
  480       ellipse_t E;
  481 
  482       /* FIXME - use mt instead */
  483 
  484       arrow_ellipse((*pA)+i, &(E));
  485 
  486       p[i].M     = ellipse_mt(E);
  487       p[i].major = E.major;
  488       p[i].minor = E.minor;
  489 
  490 #ifdef MINPW
  491       p[i].minpw = INFINITY;
  492 #endif
  493 
  494       p[i].v     = E.centre;
  495       p[i].dv    = zero;
  496       p[i].F     = zero;
  497       p[i].flag  = 0;
  498 
  499       SET_FLAG(p[i].flag, PARTICLE_FIXED);
  500     }
  501 
  502 #ifdef PTHREAD_FORCES
  503 
  504   size_t nt = opt->v.threads;
  505 
  506 #else
  507 
  508   size_t nt = 1;
  509 
  510 #endif
  511 
  512 #ifdef HAVE_SIGNAL_H
  513 
  514   static struct sigaction act, oldact;
  515 
  516   act.sa_handler = setexitflag;
  517   act.sa_flags   = 0;
  518   sigemptyset(&act.sa_mask);
  519 
  520   if (sigaction(SIGINT, &act, &oldact) == -1)
  521     {
  522       fprintf(stderr, "failed to install signal handler\n");
  523     }
  524 
  525 #endif
  526 
  527   /* ratio of total ellipse area to domain area */
  528 
  529   double eprop = 0.0;
  530 
  531   /* filename extension */
  532 
  533   char ext[4];
  534 
  535   switch (opt->v.file.output.format)
  536     {
  537     case output_format_eps:
  538       strcpy(ext, "eps");
  539       break;
  540     case output_format_povray:
  541       strcpy(ext, "inc");
  542       break;
  543     }
  544 
  545   /* generate an initial dim2 particle set on a regular grid */
  546 
  547   double dx = w/(nx+2);
  548   double dy = h/(ny+2);
  549 
  550   for (int i = 0 ; i < nx ; i++)
  551     {
  552       double x = x0 + (i+1.5)*dx;
  553 
  554       for (int j = 0 ; j < ny ; j++)
  555         {
  556           double y = y0 + (j+1.5)*dy;
  557           vector_t v = {x, y};
  558 
  559           if (! domain_inside(v, opt->dom)) continue;
  560 
  561       arrow_t A;
  562 
  563           A.centre = v;
  564 
  565           err = evaluate(&A);
  566 
  567           switch (err)
  568             {
  569           ellipse_t E;
  570 
  571             case ERROR_OK :
  572           arrow_ellipse(&A, &E);
  573           p[n1+n2].v     = E.centre;
  574           p[n1+n2].dv    = zero;
  575           p[n1+n2].M     = ellipse_mt(E);
  576           p[n1+n2].major = E.major;
  577           p[n1+n2].minor = E.minor;
  578           p[n1+n2].flag  = 0;
  579           n2++ ;
  580           break;
  581             case ERROR_NODATA: break;
  582             default: return err;
  583             }
  584         }
  585     }
  586 
  587   if (opt->v.verbose) status("initial", n1+n2);
  588 
  589   /* initial neighbour mesh */
  590 
  591   int
  592     nedge = 0,
  593     *edge = NULL;
  594 
  595   if ((err = neighbours(p, n1, n2, &edge, &nedge)) != ERROR_OK)
  596     {
  597       fprintf(stderr, "failed to generate initial neighbour mesh\n");
  598       return err;
  599     }
  600 
  601   if (nedge < 2)
  602     {
  603       fprintf(stderr, "only %i edges\n", nedge);
  604       return ERROR_NODATA;
  605     }
  606 
  607   /*
  608      pw-distance histogram - note that hist_st needs to be
  609      initialised before any possble jumps to output:
  610   */
  611 
  612 #define HIST_BINWIDTH 0.025
  613 #define HIST_BINS 80
  614 #define HIST_DP 3
  615 
  616   FILE *hist_st = NULL;
  617 
  618   if (opt->v.place.adaptive.histogram)
  619     {
  620       hist_st = fopen(opt->v.place.adaptive.histogram, "w");
  621 
  622       if (! hist_st)
  623     {
  624       fprintf(stderr, "failed to open %s for writing\n",
  625          opt->v.place.adaptive.histogram);
  626     }
  627     }
  628 
  629   /* setup data shared between threads */
  630 
  631   tshared_t tshared;
  632 
  633 #ifdef PTHREAD_FORCES
  634 
  635   tshared.terminate = false;
  636 
  637   /* mutex (for global flags) and barriers */
  638 
  639   if ((err = pthread_mutex_init(&(tshared.mutex), NULL)) != 0)
  640     {
  641       fprintf(stderr, "failed to init mutex: %s\n", strerror(err));
  642       return ERROR_PTHREAD;
  643     }
  644 
  645   pthread_barrierattr_t battr;
  646 
  647   if ((err = pthread_barrierattr_init(&battr)) != 0)
  648     {
  649       fprintf(stderr, "error at barrier attribute init: %s\n",
  650           strerror(err));
  651       return ERROR_PTHREAD;
  652     }
  653 
  654   if ((err = pthread_barrierattr_setpshared(&battr, PTHREAD_PROCESS_PRIVATE)) != 0)
  655     {
  656       fprintf(stderr, "error at barrier set shared: %s\n",
  657           strerror(err));
  658       return ERROR_PTHREAD;
  659     }
  660   else
  661     {
  662       for (int k = 0 ; k < 2 ; k++)
  663     {
  664       if ((err = pthread_barrier_init(tshared.barrier + k, &battr, nt+1)) != 0)
  665         {
  666           fprintf(stderr, "error at barrier %i init: %s\n",
  667               k, strerror(err));
  668           return ERROR_PTHREAD;
  669         }
  670     }
  671 
  672       if ((err = pthread_barrierattr_destroy(&battr)) != 0)
  673     {
  674       fprintf(stderr, "failed to destroy thread attribute: %s\n",
  675           strerror(err));
  676       return ERROR_PTHREAD;
  677     }
  678     }
  679 
  680 #endif
  681 
  682   /* thread data */
  683 
  684   tdata_t tdata[nt];
  685 
  686 #ifdef PTHREAD_FORCES
  687 
  688   /* thread attribute */
  689 
  690   pthread_attr_t attr;
  691 
  692   if ((err = pthread_attr_init(&attr)) != 0)
  693     {
  694       fprintf(stderr, "failed to init thread attribute: %s\n", strerror(err));
  695       return ERROR_PTHREAD;
  696     }
  697 
  698   if ((err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE)) != 0)
  699     {
  700       fprintf(stderr, "failed to set detach state: %s\n", strerror(err));
  701       return ERROR_PTHREAD;
  702     }
  703 
  704   pthread_t thread[nt];
  705 
  706   for (int k = 0 ; k<nt ; k++)
  707     {
  708       tdata[k].id = k;
  709       tdata[k].shared = &tshared;
  710       err = pthread_create(thread+k,
  711                &attr,
  712                (void* (*)(void*))forces_worker,
  713                (void*)(tdata+k));
  714       if (err)
  715     {
  716       fprintf(stderr, "failed to create thread %i: %s\n",
  717           k, strerror(err));
  718       return ERROR_PTHREAD;
  719     }
  720     }
  721 
  722   /* clean up attribute */
  723 
  724   if ((err = pthread_attr_destroy(&attr)) != 0)
  725     {
  726       fprintf(stderr, "failed to destroy thread attribute: %s\n",
  727           strerror(err));
  728       return ERROR_PTHREAD;
  729     }
  730 
  731 #else
  732 
  733   tdata[0].shared = &tshared;
  734 
  735 #endif
  736 
  737   /* grid break */
  738 
  739   if (opt->v.place.adaptive.breakout == break_grid)
  740     {
  741       if (opt->v.verbose) printf("[break at grid generation]\n");
  742       goto output;
  743     }
  744 
  745   /* set the initial physics */
  746 
  747   for (int i = 0 ; i < n1 ; i++)
  748     set_mq(p+i, schedB.mass, schedB.charge);
  749 
  750   for (int i = n1 ; i < n1+n2 ; i++)
  751     set_mq(p+i, schedI.mass, schedI.charge);
  752 
  753   /* particle cycle */
  754 
  755   const char
  756     hline[] = "-------------------------------------------\n",
  757     head[]  = "  n glyph ocl  edge   e/g       ke  prop\n";
  758 
  759   if (opt->v.verbose)
  760     {
  761       printf(hline);
  762       printf(head);
  763       printf(hline);
  764     }
  765 
  766   iterations_t iter = opt->v.place.adaptive.iter;
  767   wait_t wait;
  768 
  769   wait.drop = opt->v.place.adaptive.kedrop;
  770   wait.iter = iter.main * DETRUNC_T1;
  771   wait.kedB = 0.0;
  772   wait.done = ! (wait.drop > 0);
  773 
  774   for (int i = 0 ; (i < iter.main) || (!wait.done) ; i++)
  775     {
  776       if (hist_st)
  777     {
  778       unsigned int hist[HIST_BINS] = {0};
  779 
  780       /* pw distance histogram */
  781 
  782       for (int j = 0 ; j < nedge ; j++)
  783         {
  784           int id[2] = {edge[2*j], edge[2*j+1]};
  785 
  786           vector_t rAB = vsub(p[id[1]].v, p[id[0]].v);
  787           double x = contact_mt(rAB, p[id[0]].M, p[id[1]].M);
  788 
  789           if (x<0)
  790         {
  791           pw_error(rAB, p[id[0]], p[id[1]]);
  792           continue;
  793         }
  794 
  795           double d = sqrt(x);
  796           size_t bid = (int)round(d/HIST_BINWIDTH);
  797 
  798           if (bid < HIST_BINS) hist[bid]++;
  799         }
  800 
  801       for (int j=0 ; j<HIST_BINS ; j++)
  802         fprintf(hist_st, "%i %.*f %u\n", i,
  803             HIST_DP, j*HIST_BINWIDTH, hist[j]);
  804     }
  805 
  806 #ifdef MINPW
  807 
  808       /*
  809      calculate the minimum pw-distance of each particle
  810      to its neighbours - this should be moved to force()
  811      if it is integrated into the program proper
  812       */
  813 
  814 #define MINPWBUF 32
  815 
  816       char minpwname[MINPWBUF];
  817       snprintf(minpwname, MINPWBUF, "minpw.%03i.dat", i);
  818 
  819       FILE *minpw_st = fopen(minpwname, "w");
  820 
  821       if (minpw_st)
  822     {
  823       for (int j = n1 ; j < n1+n2 ; j++)
  824         p[j].minpw = INFINITY;
  825 
  826       for (int j = 0 ; j < nedge ; j++)
  827         {
  828           int id[2] = {edge[2*j], edge[2*j+1]};
  829 
  830           vector_t rAB = vsub(p[id[1]].v, p[id[0]].v);
  831           double x = contact_mt(rAB, p[id[0]].M, p[id[1]].M);
  832 
  833           if (x < 0)
  834         {
  835           pw_error(rAB, p[id[0]], p[id[1]]);
  836           continue;
  837         }
  838 
  839           double d = sqrt(x);
  840 
  841           p[id[0]].minpw = MIN(p[id[0]].minpw, d);
  842           p[id[1]].minpw = MIN(p[id[1]].minpw, d);
  843         }
  844 
  845       for (int j = n1 ; j < n1+n2 ; j++)
  846         fprintf(minpw_st, "%f %f\n", p[j].minor*p[j].major, p[j].minpw);
  847 
  848       fclose(minpw_st);
  849     }
  850 
  851 #endif
  852 
  853       /*
  854      inner cycle which should be short -- a duration
  855      over which the neighbours network is valid.
  856       */
  857 
  858       double T = 0;
  859       int nesc = 0;
  860 
  861       for (int j = 0 ; j < iter.euler ; j++)
  862     {
  863       T = ((double)(i*iter.euler + j))/((double)(iter.euler*iter.main));
  864 
  865       schedule(T, &schedB, &schedI);
  866 
  867       if (opt->v.place.adaptive.animate)
  868         {
  869           int  bufsz = 32;
  870           char buf[bufsz];
  871 
  872           vfp_opt_t v = opt->v;
  873 
  874           snprintf(buf, bufsz, "anim.%.4i.%.4i.%s", i, j, ext);
  875 
  876           v.file.output.path = buf;
  877           v.verbose = 0;
  878 
  879           if (n1+n2 > *nA)
  880         {
  881           arrow_t* A = realloc(*pA, (n1+n2)*sizeof(arrow_t));
  882 
  883           if (!A) return ERROR_MALLOC;
  884 
  885           *pA = A;
  886         }
  887 
  888           *nA = n1 + n2;
  889 
  890           for (int k = n1 ; k < n1+n2 ; k++)
  891         {
  892           (*pA)[k].centre = p[k].v;
  893           evaluate((*pA)+k);
  894         }
  895 
  896           nbs_t* nbs = nbs_populate(nedge, edge, n1+n2, p);
  897           if (!nbs) return ERROR_BUG;
  898 
  899           if ((err = vfplot_output(opt->dom, *nA, *pA, nedge, nbs, &v))
  900           != ERROR_OK)
  901         {
  902           fprintf(stderr, "failed animate write of %zi arrows to %s\n",
  903               *nA, v.file.output.path);
  904           return err;
  905         }
  906 
  907           free(nbs);
  908         }
  909 
  910       /* reset forces */
  911 
  912       for (int k = n1 ; k < n1+n2 ; k++)
  913         p[k].F = zero;
  914 
  915       /* accumulate forces */
  916 
  917           size_t eoff[nt];
  918       size_t esz[nt];
  919 
  920       if (subdivide(nt, nedge, eoff, esz) != 0)
  921         {
  922           fprintf(stderr, "failed %zi-partition of ellipse set\n", nt);
  923           return ERROR_BUG;
  924         }
  925       else
  926         {
  927           /*
  928          each thread gets its own array of vectors
  929          to store the accumulated forces, so there
  930          is no need for a mutex
  931           */
  932 
  933           /*
  934         clang static analysis reports a possible problem
  935         here with the case n2 = 0 - FIXME
  936           */
  937 
  938           vector_t F[nt*n2];
  939           flag_t flag[nt*n2];
  940 
  941           for (int k = 0 ; k < nt*n2 ; k++)
  942         {
  943           F[k] = zero;
  944           flag[k] = 0;
  945         }
  946 
  947           tshared.edge = edge;
  948           tshared.p = p;
  949           tshared.rd = schedI.rd;
  950           tshared.rt = schedI.rt;
  951           tshared.n1 = n1;
  952           tshared.n2 = n2;
  953 
  954           for (int k = 0 ; k < nt ; k++)
  955         {
  956           tdata[k].off = eoff[k];
  957           tdata[k].size = esz[k];
  958           tdata[k].F = F + k*n2;
  959           tdata[k].flag = flag + k*n2;
  960         }
  961 
  962 #ifdef PTHREAD_FORCES
  963 
  964           err = pthread_barrier_wait( &(tshared.barrier[0]) );
  965           if ((err != 0) && (err != PTHREAD_BARRIER_SERIAL_THREAD) )
  966         {
  967           fprintf(stderr, "error on barrier 0 wait: %s\n",
  968               strerror(err));
  969           return ERROR_PTHREAD;
  970         }
  971 
  972           /* the threads calculate forces here */
  973 
  974           err = pthread_barrier_wait( &(tshared.barrier[1]) );
  975           if ((err != 0) && (err != PTHREAD_BARRIER_SERIAL_THREAD) )
  976         {
  977           fprintf(stderr, "error on barrier 1 wait: %s\n",
  978               strerror(err));
  979           return ERROR_PTHREAD;
  980         }
  981 
  982 #else
  983           /*
  984         in the non-threaded version we just call the
  985         forces() function directly, rather than
  986         having each thread's forces_worker()
  987         do it -- we pass it the thread data struct,
  988         which, due to ifdefs, contains no information
  989         about threads (so this a bad name for it)
  990           */
  991 
  992           forces((void*)&tdata);
  993 #endif
  994           /*
  995         now dump the forces which are nt blocks of size n2
  996 
  997            F = [F1, ... Fn2, F1, ... Fn2, ... ]
  998 
  999             into the particle array
 1000           */
 1001 
 1002           for (int k = 0 ; k < n2 ; k++)
 1003         {
 1004           vector_t Fsum = zero;
 1005 
 1006           for (int m = 0 ; m<nt ; m++)
 1007             {
 1008               Fsum = vadd(Fsum, F[k+n2*m]);
 1009 
 1010               if (GET_FLAG(flag[k+n2*m], PARTICLE_STALE))
 1011             SET_FLAG(p[n1+k].flag, PARTICLE_STALE);
 1012             }
 1013 
 1014           p[n1+k].F = Fsum;
 1015         }
 1016 
 1017 #ifdef DUMP_THREAD_DATA
 1018 
 1019 #define THREAD_DATA "thread.dat"
 1020 
 1021           FILE* st = fopen(THREAD_DATA, "w");
 1022 
 1023           for (int k = 0 ; k < n2 ; k++)
 1024         {
 1025           vector_t F = p[n1+k].F;
 1026 
 1027           fprintf(st, "%i %f %f\n", (int)(p[n1+k].flag), F.x, F.y);
 1028         }
 1029 
 1030           fclose(st);
 1031 
 1032           printf("thread data in %s, terminating\n", THREAD_DATA);
 1033 
 1034           return ERROR_OK;
 1035 
 1036 #endif
 1037         }
 1038 
 1039       /*
 1040          this implements the leapfrog method commonly used
 1041          in molecular dynamics
 1042 
 1043            v(t+dt/2) = v(t-dt/2) + a(t) dt
 1044            x(t+dt)   = x(t) + v(t+dt/2) dt
 1045 
 1046          here x, v, a are the position, velocity, acceleration;
 1047          our struct uses different conventions.
 1048       */
 1049 
 1050       for (int k = n1 ; k < n1+n2 ; k++)
 1051         {
 1052           /*
 1053          scale invariant viscosity - we originally
 1054          had viscous force F1 = Cd v = O(L), but this
 1055          force should be O(L^2) so that the acceleration
 1056          produced by it is O(L). So we multiply the
 1057          viscous force by the mass (which is proportional
 1058          to L), which could be interpreted as the viscous
 1059          force being proportional to the area presented
 1060          to the medium in real-world physics
 1061 
 1062          FIXME - write this in terms of the acceleration
 1063          and so remove the mass mult/div
 1064           */
 1065 
 1066               double   Cd = 14.5;
 1067           vector_t F1 = smul(-Cd*p[k].mass, p[k].dv);
 1068               vector_t F = vadd(p[k].F, F1);
 1069 
 1070           p[k].dv = vadd(p[k].dv, smul(dt/p[k].mass, F));
 1071           p[k].v  = vadd(p[k].v, smul(dt, p[k].dv));
 1072         }
 1073 
 1074       /* reset the physics */
 1075 
 1076       for (int k = 0 ; k < n1 ; k++)
 1077         set_mq(p+k, schedB.mass, schedB.charge);
 1078 
 1079       for (int k = n1 ; k < n1+n2 ; k++)
 1080         set_mq(p+k, schedI.mass, schedI.charge);
 1081     }
 1082 
 1083       /* back in the main iteration */
 1084 
 1085       /*
 1086      mark those with overclose neighbours, here we
 1087      - for each internal particle find the minimal
 1088        pw-distance from amongst its neighbours
 1089      - sort by this value and take the top 2n
 1090      - create the intersection graph from this 2n
 1091        and ensure there are no intersections
 1092      - take the top n of the remander and mark those
 1093        as stale
 1094       */
 1095 
 1096       int nocl = 0;
 1097 
 1098       if ((n2>0) && (schedI.dmax>0) && (schedI.rd>0.0))
 1099     {
 1100           pw_t pw[n2];
 1101 
 1102           for (int j = 0 ; j < n2 ; j++)
 1103             {
 1104               pw[j].d = INFINITY;
 1105           pw[j].id = j+n1;
 1106             }
 1107 
 1108           for (int j = 0 ; j < nedge ; j++)
 1109             {
 1110               int id[2] = {edge[2*j], edge[2*j+1]};
 1111 
 1112           /*
 1113         we are only interested in internal points
 1114         (and edges are increasing pairs, so this
 1115         catches them all)
 1116           */
 1117 
 1118           if (id[0] < n1) continue;
 1119 
 1120               vector_t rAB = vsub(p[id[1]].v, p[id[0]].v);
 1121               double x = contact_mt(rAB, p[id[0]].M, p[id[1]].M);
 1122 
 1123               if (x<0)
 1124         {
 1125           pw_error(rAB, p[id[0]], p[id[1]]);
 1126           continue;
 1127         }
 1128 
 1129               double d = sqrt(x);
 1130 
 1131           /*
 1132         only going to k=1 means the minimum is attached
 1133         to the smaller id - quick hack till we implement
 1134         the non-interesct subset check
 1135           */
 1136 
 1137               for (int k = 0 ; k < 1 ; k++)
 1138                 {
 1139           size_t idk = id[k] - n1;
 1140           double d1 = pw[idk].d;
 1141 
 1142                   pw[idk].d = MIN(d, d1);
 1143                 }
 1144             }
 1145 
 1146       qsort(pw, n2, sizeof(pw_t), (int(*)(const void*, const void*))pwcomp);
 1147 
 1148       double r = pw[0].d;
 1149 
 1150           for (int j = 0 ;
 1151            (r < schedI.rd) && (nocl < schedI.dmax) && (j < n2) ;
 1152            r = pw[++j].d)
 1153             {
 1154           SET_FLAG(p[pw[j].id].flag, PARTICLE_STALE);
 1155           nocl++;
 1156             }
 1157     }
 1158 
 1159       /* re-evaluate */
 1160 
 1161       for (int j = n1 ; j < n1+n2 ; j++)
 1162     {
 1163       if (GET_FLAG(p[j].flag, PARTICLE_STALE)) continue;
 1164 
 1165       switch (err = metric_tensor(p[j].v, opt->mt, &(p[j].M)))
 1166         {
 1167           ellipse_t E;
 1168 
 1169         case ERROR_OK:
 1170           if ((err = mt_ellipse(p[j].M, &E)) != ERROR_OK)
 1171         return err;
 1172           p[j].major = E.major;
 1173           p[j].minor = E.minor;
 1174           break;
 1175 
 1176         case ERROR_NODATA:
 1177           SET_FLAG(p[j].flag, PARTICLE_STALE);
 1178           break;
 1179 
 1180         default: return err;
 1181         }
 1182     }
 1183 
 1184       /* mark escapees */
 1185 
 1186       for (int j = n1 ; j < n1+n2 ; j++)
 1187     {
 1188       if (GET_FLAG(p[j].flag, PARTICLE_STALE)) continue;
 1189 
 1190       if (! domain_inside(p[j].v, opt->dom))
 1191         {
 1192           SET_FLAG(p[j].flag, PARTICLE_STALE);
 1193           nesc++;
 1194         }
 1195     }
 1196 
 1197       /*
 1198      sort to get stale particles at the end
 1199      note that this destroys the validity of the
 1200      neigbour network so must be outside the inner
 1201      loop (unless you want neighbour lists), this
 1202      could be done faster with a "packing"
 1203       */
 1204 
 1205       qsort(p+n1, n2, sizeof(particle_t), (int (*)(const void*, const void*))ptcomp);
 1206 
 1207       /* adjust n2 to discard stale particles */
 1208 
 1209       while (n2 && GET_FLAG(p[n1+n2-1].flag, PARTICLE_STALE)) n2--;
 1210 
 1211       if (!n2)
 1212     {
 1213       fprintf(stderr, "all glyphs lost, bad topology?\n");
 1214       return ERROR_NODATA;
 1215     }
 1216 
 1217       /* recreate neighbours for the next cycle */
 1218 
 1219       free(edge); edge = NULL;
 1220 
 1221       if ((err = neighbours(p, n1, n2, &edge, &nedge)) != ERROR_OK)
 1222     {
 1223       fprintf(stderr, "failed to generate neighbour mesh\n");
 1224       return err;
 1225     }
 1226 
 1227       /* kinetic energy */
 1228 
 1229       double ke = 0.0;
 1230 
 1231       for (int j = n1 ; j < n1+n2 ; j++)
 1232     {
 1233       double v2 = vabs2(p[j].dv);
 1234       ke += p[j].mass * v2;
 1235     }
 1236 
 1237       ke = ke/(2.0*n2);
 1238 
 1239       /* handle db drop wait */
 1240 
 1241       if (i == wait.iter)
 1242     {
 1243       wait.kedB = 10*log10(ke);
 1244 
 1245 #ifdef WAIT_DEBUG
 1246       printf("waiting for %f\n", wait.kedB - wait.drop);
 1247 #endif
 1248     }
 1249       else if (i > wait.iter)
 1250     {
 1251       if (10*log10(ke) < wait.kedB - wait.drop)
 1252         {
 1253           wait.done = true;
 1254 
 1255 #ifdef WAIT_DEBUG
 1256           printf("waiting over\n");
 1257 #endif
 1258         }
 1259     }
 1260 
 1261       /* ellipse area and proportion of domain */
 1262 
 1263       double earea = 0.0;
 1264 
 1265       for (int j = 0 ; j < n1+n2 ; j++)
 1266     earea += p[j].minor * p[j].major;
 1267 
 1268       earea *= M_PI;
 1269 
 1270       eprop = earea/darea;
 1271 
 1272       /* edges per point */
 1273 
 1274       double epp = ((double)nedge)/(n1+n2);
 1275 
 1276       /* user statistics */
 1277 
 1278       if (opt->v.verbose)
 1279     printf("%3i %5i %3i %5i %6.3f %7.2f %5.3f\n",
 1280            i, n1+n2, nocl, nedge, epp,
 1281            (ke > 0 ? 10*log10(ke) : -INFINITY),
 1282            eprop);
 1283 
 1284       /* breakouts */
 1285 
 1286       switch (opt->v.place.adaptive.breakout)
 1287     {
 1288     case break_super:
 1289       if (T > BREAK_SUPER)
 1290         {
 1291           if (opt->v.verbose) printf("[break during superposition]\n");
 1292           goto output;
 1293         }
 1294       break;
 1295 
 1296     case break_midclean:
 1297       if (T > BREAK_MIDCLEAN)
 1298         {
 1299           if (opt->v.verbose) printf("[break in middle of cleaning]\n");
 1300           goto output;
 1301         }
 1302       break;
 1303 
 1304     case break_postclean:
 1305       if ((T > BREAK_POSTCLEAN) && (nocl == 0))
 1306         {
 1307           if (opt->v.verbose) printf("[break after cleaning]\n");
 1308           goto output;
 1309         }
 1310       break;
 1311 
 1312     default:
 1313       break;
 1314     }
 1315 
 1316 #ifdef HAVE_SIGNAL_H
 1317 
 1318       if (exitflag)
 1319     {
 1320       if (sigaction(SIGINT, &oldact, NULL) == -1)
 1321         fprintf(stderr, "failed to restore signal handler\n");
 1322 
 1323       goto output;
 1324     }
 1325 
 1326 #endif
 1327 
 1328     }
 1329 
 1330   if (opt->v.verbose)
 1331     printf(hline);
 1332 
 1333  output:
 1334 
 1335 #ifdef PTHREAD_FORCES
 1336 
 1337   if (set_terminate_status(&(tshared.mutex), &(tshared.terminate), true) != 0)
 1338     return 0;
 1339 
 1340   err = pthread_barrier_wait( &(tshared.barrier[0]) );
 1341   if ((err != 0) && (err != PTHREAD_BARRIER_SERIAL_THREAD) )
 1342     {
 1343       fprintf(stderr, "error on barrier 0 wait: %s\n",
 1344           strerror(err));
 1345       return ERROR_PTHREAD;
 1346     }
 1347 
 1348 #endif
 1349 
 1350   /* report results */
 1351 
 1352 #define EDENS_UNDERFULL 0.9
 1353 #define EDENS_OVERFULL  1.2
 1354 #define EDENS_DEFECT    0.2
 1355 
 1356   if (opt->v.verbose)
 1357     {
 1358       double
 1359     earat = eprop/EPACKOPT,
 1360     edens = (double)(n1+n2)/(double)no;
 1361 
 1362       printf("ellipse area ratio %.0f%%, density %.0f%%\n",
 1363          100.0*earat, 100.0*edens);
 1364 
 1365       if (edens < EDENS_UNDERFULL)
 1366     printf("looks underfull, try larger overfill\n");
 1367 
 1368       if ((edens > EDENS_OVERFULL) || (edens - earat > EDENS_DEFECT))
 1369     printf("looks overfull, try more iterations\n");
 1370     }
 1371 
 1372   /* close histogram stream */
 1373 
 1374   if (hist_st)
 1375     {
 1376       if (fclose(hist_st) != 0)
 1377         fprintf(stderr, "failed to close histogram stream\n");
 1378     }
 1379 
 1380   /*
 1381      encapulate the network data in array of nbr_t
 1382      for output
 1383   */
 1384 
 1385   nbs_t *nbs = nbs_populate(nedge, edge, n1+n2, p);
 1386 
 1387   if (!nbs) return ERROR_BUG;
 1388 
 1389   *nN = nedge;
 1390   *pN = nbs;
 1391 
 1392   free(edge);
 1393 
 1394   /* reallocate the output arrow array and transfer data from p */
 1395 
 1396   if (n1+n2 > *nA)
 1397     {
 1398       arrow_t* A = realloc(*pA, (n1+n2)*sizeof(arrow_t));
 1399 
 1400       if (!A)
 1401     return ERROR_MALLOC;
 1402 
 1403       *pA = A;
 1404     }
 1405 
 1406   for (int i = n1 ; i < n1+n2 ; i++)
 1407     {
 1408       (*pA)[i].centre = p[i].v;
 1409       evaluate((*pA)+i);
 1410     }
 1411 
 1412   *nA = n1+n2;
 1413 
 1414   free(p);
 1415 
 1416 #ifdef PTHREAD_FORCES
 1417 
 1418   /* harvest workers as they exit */
 1419 
 1420   for (int k = 0 ; k < nt ; k++)
 1421     {
 1422       err = pthread_join(thread[k], NULL);
 1423       if (err)
 1424     {
 1425       fprintf(stderr, "error joining thread %i: %s\n",
 1426           k, strerror(err));
 1427 
 1428       return ERROR_PTHREAD;
 1429     }
 1430     }
 1431 
 1432   /* destroy barriers */
 1433 
 1434   for (int k = 0 ; k < 2 ; k++)
 1435     {
 1436       err = pthread_barrier_destroy( tshared.barrier + k );
 1437       if (err)
 1438     {
 1439       fprintf(stderr, "error on barrier %i destroy: %s\n",
 1440           k, strerror(err));
 1441       return ERROR_PTHREAD;
 1442     }
 1443     }
 1444 
 1445 #endif
 1446 
 1447   return ERROR_OK;
 1448 }
 1449 
 1450 /*
 1451   return a nbs array populated from the edge list
 1452 */
 1453 
 1454 static nbs_t* nbs_populate(int nedge, int* edge, int np, particle_t *p)
 1455 {
 1456   nbs_t *nbs = malloc(nedge*sizeof(nbs_t));
 1457 
 1458   if (!nbs)
 1459     return NULL;
 1460 
 1461   for (int i = 0 ; i < nedge ; i++)
 1462     {
 1463       int id[2];
 1464 
 1465       for (int j = 0 ; j < 2 ; j++)
 1466     {
 1467       int idj = edge[2*i+j];
 1468 
 1469       if ((idj < 0) || (idj >= np))
 1470         {
 1471           fprintf(stderr, "edge (%i, %i) id of %i\n", i, j, idj);
 1472           goto cleanup;
 1473         }
 1474 
 1475       id[j] = idj;
 1476     }
 1477 
 1478       nbs[i].a.id = id[0];
 1479       nbs[i].b.id = id[1];
 1480 
 1481       nbs[i].a.v = p[id[0]].v;
 1482       nbs[i].b.v = p[id[1]].v;
 1483     }
 1484 
 1485   return nbs;
 1486 
 1487  cleanup:
 1488 
 1489   free(nbs);
 1490 
 1491   return NULL;
 1492 }
 1493 
 1494 /*
 1495    kd-tree neighbour parameters
 1496 
 1497    The KD_RNG_INITIAL = r has a big effect on performance,
 1498    a factor of 2 for r=2 rather than r=4.  Because the
 1499    potential is zero for argument bigger than one, there
 1500    is a value of r above which the dynamics are independant
 1501    of r. I tried a binary subdivision to determine this
 1502    visually and find r around 2.125 -- we take a value a
 1503    little larger, at 2.3 the output is diff-identical
 1504    (at least on the cylinder test problem)
 1505 */
 1506 
 1507 #define KD_RNG_INITIAL   2.3
 1508 #define KD_EXPAND_MAX    3.0
 1509 #define KD_EXPAND_FACTOR 1.5
 1510 #define KD_NBS_MIN    4
 1511 #define KD_NBS_MAX   32
 1512 
 1513 typedef struct
 1514 {
 1515   int n[2];
 1516   double d2;
 1517 } edged_t;
 1518 
 1519 static int edcmp(const edged_t *ed1, const edged_t *ed2)
 1520 {
 1521   return ed1->d2 > ed2->d2;
 1522 }
 1523 
 1524 static int ecmp(const int *e1, const int *e2)
 1525 {
 1526   int i;
 1527 
 1528   for (i=0 ; i<2 ; i++)
 1529     {
 1530       if (e1[i] > e2[i]) return  1;
 1531       if (e1[i] < e2[i]) return -1;
 1532     }
 1533 
 1534   return 0;
 1535 }
 1536 
 1537 static int neighbours(particle_t* p, int n1, int n2, int **pe, int *pne)
 1538 {
 1539   int np = n1+n2, id[np], e[2*np*KD_NBS_MAX], ne = 0;
 1540   struct kdtree *kd = kd_create(2);
 1541 
 1542   *pe  = NULL;
 1543   *pne = 0;
 1544 
 1545   for (int i = 0 ; i < np ; i++) id[i] = i;
 1546 
 1547   if (!kd) return ERROR_BUG;
 1548 
 1549   for (int i = 0 ; i < np ; i++)
 1550     {
 1551       double
 1552     v[2] = {p[i].v.x, p[i].v.y};
 1553 
 1554       kd_insert(kd, v, id+i);
 1555     }
 1556 
 1557   struct {int nx, nn; } stat = {0, 0};
 1558 
 1559   for (int i = n1 ; i < np ; i++)
 1560     {
 1561       double
 1562     v[2] = {p[i].v.x, p[i].v.y},
 1563     rng  = KD_RNG_INITIAL * p[i].major;
 1564       struct kdres *res;
 1565 
 1566       /* find neighbours */
 1567 
 1568       if (!(res = kd_nearest_range(kd, v, rng))) return ERROR_BUG;
 1569 
 1570       int n = kd_res_size(res);
 1571 
 1572       /* expand search radius if required */
 1573 
 1574       for (int j = 0 ; (j < KD_EXPAND_MAX) && (n < KD_NBS_MIN) ; j++)
 1575     {
 1576       kd_res_free(res);
 1577       rng *= KD_EXPAND_FACTOR;
 1578 
 1579       stat.nx++;
 1580 
 1581       if (!(res = kd_nearest_range(kd, v, rng))) return ERROR_BUG;
 1582       n = kd_res_size(res);
 1583     }
 1584 
 1585       stat.nn += n;
 1586 
 1587       /* select nearest if too many */
 1588 
 1589       if (n > 0)
 1590     {
 1591       /* dump results to temporary edge & distance array */
 1592 
 1593       int ned=0;
 1594       edged_t ed[n];
 1595 
 1596       while (! kd_res_end(res))
 1597         {
 1598           int
 1599         *nid = kd_res_item_data(res),
 1600         j = *nid;
 1601 
 1602           kd_res_next(res);
 1603 
 1604           if (i == j) continue;
 1605 
 1606           ed[ned].n[i>j] = i;
 1607           ed[ned].n[i<j] = j;
 1608 
 1609           ed[ned].d2 = vabs2(vsub(p[i].v, p[j].v));
 1610 
 1611           ned++;
 1612         }
 1613 
 1614       /* sort by distance */
 1615 
 1616       qsort((void*)ed,
 1617         (size_t)ned,
 1618         sizeof(edged_t),
 1619         (int (*)(const void*, const void*))edcmp);
 1620 
 1621       /* transfer at most KD_NBS_MAX to edge struct */
 1622 
 1623       ned = MIN(ned, KD_NBS_MAX);
 1624 
 1625       for (int k = 0 ; k < ned ; k++)
 1626         {
 1627           e[2*(ne+k)]   = ed[k].n[0];
 1628           e[2*(ne+k)+1] = ed[k].n[1];
 1629         }
 1630 
 1631       ne += ned;
 1632     }
 1633       else
 1634     {
 1635       /*
 1636         this can happen and means a particle is nowhere
 1637         near any of the others, often caused by naive
 1638         initial placement and small ellipse margins.
 1639         We just ignore it -- it is probably too small to
 1640         see.
 1641       */
 1642     }
 1643 
 1644       kd_res_free(res);
 1645     }
 1646 
 1647   kd_free(kd);
 1648 
 1649   if (!ne) return ERROR_NODATA;
 1650 
 1651   /* now remove duplicates from e */
 1652 
 1653   qsort((void*)e,
 1654     (size_t)ne,
 1655     2*sizeof(int),
 1656     (int (*)(const void*, const void*))ecmp);
 1657 
 1658   ne = rmdup((void*)e,
 1659          (size_t)ne,
 1660          2*sizeof(int),
 1661          (int (*)(const void*, const void*))ecmp);
 1662 
 1663   if (!ne) return ERROR_NODATA;
 1664 
 1665   /* allocated edge array to return */
 1666 
 1667   size_t aesz = 2*ne*sizeof(int);
 1668 
 1669   int *ae = malloc(aesz);
 1670 
 1671   if (!ae) return ERROR_MALLOC;
 1672 
 1673   memcpy(ae, e, aesz);
 1674 
 1675   *pe  = ae;
 1676   *pne = ne;
 1677 
 1678   return ERROR_OK;
 1679 }
 1680 
 1681 /*
 1682   subdivide a range 0..ne into nt subranges specified
 1683   by offset and size. eg 0..20 by 2 -> 0..10, 11..20
 1684 */
 1685 
 1686 static int subdivide(size_t nt, size_t ne, size_t* off, size_t* size)
 1687 {
 1688   if ((nt<1) || (ne<1)) return 1;
 1689 
 1690   size_t m = ne/nt;
 1691 
 1692   for (int i = 0 ; i < nt-1 ; i++)
 1693     {
 1694       off[i] = i*m;
 1695       size[i] = m;
 1696     }
 1697 
 1698   off[nt-1] = (nt-1)*m;
 1699   size[nt-1] = ne - (nt-1)*m;
 1700 
 1701   return 0;
 1702 }
 1703 
 1704 #ifdef PTHREAD_FORCES
 1705 
 1706 static int get_terminate_status(pthread_mutex_t *mutex, bool *pterm, bool *pval, int id)
 1707 {
 1708   int err;
 1709 
 1710   err = pthread_mutex_lock(mutex);
 1711   if (err)
 1712     {
 1713       fprintf(stderr, "error on terminate check mutex for thread %i: %s\n",
 1714           id, strerror(err));
 1715       return 1;
 1716     }
 1717 
 1718   *pval = *pterm;
 1719 
 1720   err = pthread_mutex_unlock(mutex);
 1721   if (err)
 1722     {
 1723       fprintf(stderr,
 1724           "error on terminate check mutex for thread %i: %s\n",
 1725           id, strerror(err));
 1726       return 1;
 1727     }
 1728 
 1729   return 0;
 1730 }
 1731 
 1732 static int set_terminate_status(pthread_mutex_t* mutex, bool* pterm, bool value)
 1733 {
 1734   int err;
 1735 
 1736   err = pthread_mutex_lock(mutex);
 1737   if (err)
 1738     {
 1739       fprintf(stderr, "error on terminate set mutex: %s\n",
 1740           strerror(err));
 1741       return 1;
 1742     }
 1743 
 1744   *pterm = value;
 1745 
 1746   err = pthread_mutex_unlock(mutex);
 1747   if (err)
 1748     {
 1749       fprintf(stderr,
 1750           "error on terminate set mute: %s\n",
 1751           strerror(err));
 1752       return 1;
 1753     }
 1754 
 1755   return 0;
 1756 }
 1757 
 1758 /* FIXME use a sensible return value here */
 1759 
 1760 static void* forces_worker(tdata_t* pt)
 1761 {
 1762   int err, id = pt->id;
 1763   bool terminate;
 1764 
 1765   while (1)
 1766     {
 1767       err = pthread_barrier_wait(&(pt->shared->barrier[0]));
 1768       if ((err != 0) && (err != PTHREAD_BARRIER_SERIAL_THREAD))
 1769     {
 1770       fprintf(stderr, "error at barrier 0 wait for thread %i: %s\n",
 1771           id, strerror(err));
 1772       return NULL;
 1773     }
 1774 
 1775       if ((get_terminate_status(&(pt->shared->mutex),
 1776                 &(pt->shared->terminate),
 1777                 &terminate, id) != 0) || terminate )
 1778     return NULL;
 1779 
 1780       forces(pt);
 1781 
 1782       err = pthread_barrier_wait(&(pt->shared->barrier[1]));
 1783       if ((err != 0) && (err != PTHREAD_BARRIER_SERIAL_THREAD))
 1784     {
 1785       fprintf(stderr, "error at barrier 1 wait for thread %i: %s\n",
 1786           id, strerror(err));
 1787       return NULL;
 1788     }
 1789     }
 1790 }
 1791 
 1792 #endif
 1793 
 1794 /*
 1795   this accumulates the forces for the edges
 1796   g.edge[t.off] ... g.edge[t.off + t.size -1]
 1797   and puts the results in t.F, a private vector
 1798   array (so no mutex required).
 1799 */
 1800 
 1801 static void* forces(tdata_t* pt)
 1802 {
 1803   tdata_t t = *pt;
 1804   tshared_t s = *(t.shared);
 1805 
 1806   for (int i = 0 ; i < t.size ; i++)
 1807     {
 1808       int k = i+t.off;
 1809       int idA = s.edge[2*k], idB = s.edge[2*k+1];
 1810       vector_t
 1811     rAB = vsub(s.p[idB].v, s.p[idA].v),
 1812     uAB = vunit(rAB);
 1813 
 1814       double x = contact_mt(rAB, s.p[idA].M, s.p[idB].M);
 1815 
 1816       if (x<0)
 1817     {
 1818       pw_error(rAB, s.p[idA], s.p[idB]);
 1819       continue;
 1820     }
 1821 
 1822       double d = sqrt(x);
 1823       double f =
 1824     force(d, s.rt, DETRUNC_R0) *
 1825     s.p[idA].charge *
 1826     s.p[idB].charge * 60;
 1827 
 1828       /*
 1829      note that we read data from the particle
 1830      array s.p[], but write to our private data
 1831      area. since the force ids idA, idB are the
 1832      offsets in the paticle array we must
 1833      subtract n1 (only the forces on particles
 1834      n1 .. n2-1  are calculated)
 1835       */
 1836 
 1837       if (GET_FLAG(s.p[idA].flag, PARTICLE_FIXED))
 1838     {
 1839       if (! GET_FLAG(s.p[idB].flag, PARTICLE_FIXED))
 1840         {
 1841           t.F[idB-s.n1] = vadd(t.F[idB-s.n1], smul(f, uAB));
 1842 
 1843           if (d < s.rd)
 1844         SET_FLAG(t.flag[idB-s.n1], PARTICLE_STALE);
 1845         }
 1846     }
 1847       else
 1848     {
 1849       t.F[idA-s.n1] = vadd(t.F[idA-s.n1], smul(-f, uAB));
 1850 
 1851       if (GET_FLAG(s.p[idB].flag, PARTICLE_FIXED))
 1852         {
 1853           if (d < s.rd)
 1854         SET_FLAG(t.flag[idA-s.n1], PARTICLE_STALE);
 1855         }
 1856       else
 1857         t.F[idB-s.n1] = vadd(t.F[idB-s.n1], smul(f, uAB));
 1858     }
 1859     }
 1860 
 1861   return NULL;
 1862 }