"Fossies" - the Fresh Open Source Software Archive

Member "PDL-2.080/Basic/Primitive/primitive.pd" (25 May 2022, 109916 Bytes) of package /linux/misc/PDL-2.080.tar.gz:


As a special service "Fossies" has tried to format the requested text file into HTML format (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "primitive.pd": 2.079_vs_2.080.

    1 use strict;
    2 use warnings;
    3 use PDL::Types qw(ppdefs_all types);
    4 my $F = [map $_->ppsym, grep $_->real && !$_->integer, types()];
    5 
    6 pp_addpm({At=>'Top'},<<'EOD');
    7 use strict;
    8 use warnings;
    9 use PDL::Slices;
   10 use Carp;
   11 
   12 { package PDL;
   13   use overload (
   14     'x' => sub {
   15       PDL::Primitive::matmult(@_[0,1], my $foo=$_[0]->null());
   16       $foo;
   17     },
   18   );
   19 }
   20 
   21 =head1 NAME
   22 
   23 PDL::Primitive - primitive operations for pdl
   24 
   25 =head1 DESCRIPTION
   26 
   27 This module provides some primitive and useful functions defined
   28 using PDL::PP and able to use the new indexing tricks.
   29 
   30 See L<PDL::Indexing> for how to use indices creatively.
   31 For explanation of the signature format, see L<PDL::PP>.
   32 
   33 =head1 SYNOPSIS
   34 
   35  # Pulls in PDL::Primitive, among other modules.
   36  use PDL;
   37 
   38  # Only pull in PDL::Primitive:
   39  use PDL::Primitive;
   40 
   41 =cut
   42 
   43 EOD
   44 
   45 ################################################################
   46 #  a whole bunch of quite basic functions for inner, outer
   47 #  and matrix products (operations that are not normally
   48 #  available via operator overloading)
   49 ################################################################
   50 
   51 pp_def(
   52        'inner',
   53        HandleBad => 1,
   54        Pars => 'a(n); b(n); [o]c();',
   55        GenericTypes => [ppdefs_all],
   56        Code =>
   57        'double tmp = 0;
   58         int badflag = 0;
   59         loop(n) %{
   60            PDL_IF_BAD(if (!($ISGOOD(a()) && $ISGOOD(b()))) { badflag = 1; break; }
   61            else,)                                          { tmp += $a() * $b(); }
   62         %}
   63         PDL_IF_BAD(if (badflag) { $SETBAD(c()); $PDLSTATESETBAD(c); }
   64         else,)                  { $c() = tmp;  }',
   65        Doc => '
   66 =for ref
   67 
   68 Inner product over one dimension
   69 
   70  c = sum_i a_i * b_i
   71 ',
   72        BadDoc => '
   73 =for bad
   74 
   75 If C<a() * b()> contains only bad data,
   76 C<c()> is set bad. Otherwise C<c()> will have its bad flag cleared,
   77 as it will not contain any bad values.
   78 ',
   79        ); # pp_def( inner )
   80 
   81 pp_def(
   82        'outer',
   83        HandleBad => 1,
   84        Pars => 'a(n); b(m); [o]c(n,m);',
   85        GenericTypes => [ppdefs_all],
   86        Code =>
   87        'loop(n,m) %{
   88           PDL_IF_BAD(if ( $ISBAD(a()) || $ISBAD(b()) ) {
   89              $SETBAD(c());
   90           } else,) {
   91              $c() = $a() * $b();
   92           }
   93         %}',
   94        Doc => '
   95 =for ref
   96 
   97 outer product over one dimension
   98 
   99 Naturally, it is possible to achieve the effects of outer
  100 product simply by broadcasting over the "C<*>"
  101 operator but this function is provided for convenience.
  102 '); # pp_def( outer )
  103 
  104 pp_addpm(<<'EOD');
  105 =head2 x
  106 
  107 =for sig
  108 
  109  Signature: (a(i,z), b(x,i),[o]c(x,z))
  110 
  111 =for ref
  112 
  113 Matrix multiplication
  114 
  115 PDL overloads the C<x> operator (normally the repeat operator) for
  116 matrix multiplication.  The number of columns (size of the 0
  117 dimension) in the left-hand argument must normally equal the number of
  118 rows (size of the 1 dimension) in the right-hand argument.
  119 
  120 Row vectors are represented as (N x 1) two-dimensional PDLs, or you
  121 may be sloppy and use a one-dimensional PDL.  Column vectors are
  122 represented as (1 x N) two-dimensional PDLs.
  123 
  124 Broadcasting occurs in the usual way, but as both the 0 and 1 dimension
  125 (if present) are included in the operation, you must be sure that
  126 you don't try to broadcast over either of those dims.
  127 
  128 Of note, due to how Perl v5.14.0 and above implement operator overloading of
  129 the C<x> operator, the use of parentheses for the left operand creates a list
  130 context, that is
  131 
  132  pdl> ( $x * $y ) x $z
  133  ERROR: Argument "..." isn't numeric in repeat (x) ...
  134 
  135 treats C<$z> as a numeric count for the list repeat operation and does not call
  136 the scalar form of the overloaded operator. To use the operator in this case,
  137 use a scalar context:
  138 
  139  pdl> scalar( $x * $y ) x $z
  140 
  141 or by calling L</matmult> directly:
  142 
  143  pdl> ( $x * $y )->matmult( $z )
  144 
  145 EXAMPLES
  146 
  147 Here are some simple ways to define vectors and matrices:
  148 
  149  pdl> $r = pdl(1,2);                # A row vector
  150  pdl> $c = pdl([[3],[4]]);          # A column vector
  151  pdl> $c = pdl(3,4)->(*1);          # A column vector, using NiceSlice
  152  pdl> $m = pdl([[1,2],[3,4]]);      # A 2x2 matrix
  153 
  154 Now that we have a few objects prepared, here is how to
  155 matrix-multiply them:
  156 
  157  pdl> print $r x $m                 # row x matrix = row
  158  [
  159   [ 7 10]
  160  ]
  161 
  162  pdl> print $m x $r                 # matrix x row = ERROR
  163  PDL: Dim mismatch in matmult of [2x2] x [2x1]: 2 != 1
  164 
  165  pdl> print $m x $c                 # matrix x column = column
  166  [
  167   [ 5]
  168   [11]
  169  ]
  170 
  171  pdl> print $m x 2                  # Trivial case: scalar mult.
  172  [
  173   [2 4]
  174   [6 8]
  175  ]
  176 
  177  pdl> print $r x $c                 # row x column = scalar
  178  [
  179   [11]
  180  ]
  181 
  182  pdl> print $c x $r                 # column x row = matrix
  183  [
  184   [3 6]
  185   [4 8]
  186  ]
  187 
  188 INTERNALS
  189 
  190 The mechanics of the multiplication are carried out by the
  191 L</matmult> method.
  192 
  193 =cut
  194 
  195 EOD
  196 
  197 pp_def('matmult',
  198 	HandleBad=>0,
  199 	Pars => 'a(t,h); b(w,t); [o]c(w,h);',
  200 	GenericTypes => [ppdefs_all],
  201 	PMCode => <<'EOPM',
  202 sub PDL::matmult {
  203     my ($x,$y,$c) = @_;
  204     $y = PDL->topdl($y);
  205     $c = PDL->null unless do { local $@; eval { $c->isa('PDL') } };
  206     while($x->getndims < 2) {$x = $x->dummy(-1)}
  207     while($y->getndims < 2) {$y = $y->dummy(-1)}
  208     return ($c .= $x * $y) if( ($x->dim(0)==1 && $x->dim(1)==1) ||
  209                                ($y->dim(0)==1 && $y->dim(1)==1) );
  210     barf sprintf 'Dim mismatch in matmult of [%1$dx%2$d] x [%3$dx%4$d]: %1$d != %4$d',$x->dim(0),$x->dim(1),$y->dim(0),$y->dim(1)
  211       if $y->dim(1) != $x->dim(0);
  212     PDL::_matmult_int($x,$y,$c);
  213     $c;
  214 }
  215 EOPM
  216 	Code => <<'EOC',
  217 	PDL_Indx ih, iw, it, ow, oh, ot, wlim, hlim, tlim;
  218 	PDL_Indx tsiz = 8 * sizeof(double) / sizeof($GENERIC());
  219 
  220 	// Cache the dimincs to avoid constant lookups
  221 	PDL_Indx atdi = PDL_REPRINCS($PDL(a))[0];
  222 	PDL_Indx btdi = PDL_REPRINCS($PDL(b))[1];
  223 
  224 	// Loop over tiles
  225 	for(   oh=0;   oh < $SIZE(h);   oh += tsiz   ) {
  226 	   hlim = PDLMIN(oh + tsiz, $SIZE(h));
  227 
  228 	   for(   ow=0;   ow < $SIZE(w);   ow += tsiz   ) {
  229 	      wlim = PDLMIN(ow + tsiz, $SIZE(w));
  230 
  231 	      // Zero the output for this tile
  232 	      for(  ih=oh; ih<hlim; ih++  )
  233 		 for(  iw=ow; iw<wlim; iw++  )
  234 		    $c(w=>iw, h=>ih) = 0;
  235 
  236 	      for(   ot=0;   ot < $SIZE(t);  ot += tsiz   ) {
  237 	         tlim = PDLMIN(ot + tsiz, $SIZE(t));
  238 
  239 	         for(  ih=oh; ih<hlim; ih++  ) {
  240 		    for(  iw=ow; iw<wlim; iw++  ) {
  241 		       // Cache the accumulated value for the output
  242 		       $GENERIC() cc = $c(w=>iw, h=>ih);
  243 
  244 		       // Cache data pointers before 't' run through tile
  245 		       $GENERIC() *ad = &($a(t=>ot, h=>ih));
  246 		       $GENERIC() *bd = &($b(w=>iw, t=>ot));
  247 
  248 		       // Hotspot - run the 't' summation
  249 		       for( it=ot; it<tlim; it++  ) {
  250 			    cc += *ad * *bd;
  251 			    ad += atdi;
  252 			    bd += btdi;
  253 		       }
  254 
  255 		       // put the output back to be further accumulated later
  256 		       $c(w=>iw, h=>ih) = cc;
  257 		    }
  258 		 }
  259 	      }
  260 	   }
  261 	}
  262 EOC
  263 	Doc => <<'EOD'
  264 =for ref
  265 
  266 Matrix multiplication
  267 
  268 Notionally, matrix multiplication $x x $y is equivalent to the
  269 broadcasting expression
  270 
  271     $x->dummy(1)->inner($y->xchg(0,1)->dummy(2),$c);
  272 
  273 but for large matrices that breaks CPU cache and is slow.  Instead,
  274 matmult calculates its result in 32x32x32 tiles, to keep the memory
  275 footprint within cache as long as possible on most modern CPUs.
  276 
  277 For usage, see L</x>, a description of the overloaded 'x' operator
  278 
  279 EOD
  280 	);
  281 
  282 pp_def(
  283        'innerwt',
  284        HandleBad => 1,
  285        Pars => 'a(n); b(n); c(n); [o]d();',
  286        GenericTypes => [ppdefs_all],
  287        Code =>
  288        'long double tmp = 0;
  289         int flag = 0;
  290 	loop(n) %{
  291            PDL_IF_BAD(if ( $ISGOOD(a()) && $ISGOOD(b()) && $ISGOOD(c()) ),) {
  292               tmp += $a() * $b() * $c();
  293               flag = 1;
  294            }
  295         %}
  296         PDL_IF_BAD(if (!flag) { $SETBAD(d()); }
  297         else,)                { $d() = tmp; }',
  298        Doc => '
  299 
  300 =for ref
  301 
  302 Weighted (i.e. triple) inner product
  303 
  304  d = sum_i a(i) b(i) c(i)
  305 
  306 =cut
  307 
  308 '
  309        );
  310 
  311 pp_def(
  312        'inner2',
  313        HandleBad => 1,
  314        Pars => 'a(n); b(n,m); c(m); [o]d();',
  315        GenericTypes => [ppdefs_all],
  316        Code =>
  317        'long double tmp = 0;
  318         int flag = 0;
  319 	loop(n,m) %{
  320            PDL_IF_BAD(if ( $ISGOOD(a()) && $ISGOOD(b()) && $ISGOOD(c()) ),) {
  321               tmp += $a() * $b() * $c();
  322               flag = 1;
  323            }
  324         %}
  325         PDL_IF_BAD(if (!flag) { $SETBAD(d()); }
  326         else,)                { $d() = tmp; }',
  327        Doc => '
  328 =for ref
  329 
  330 Inner product of two vectors and a matrix
  331 
  332  d = sum_ij a(i) b(i,j) c(j)
  333 
  334 Note that you should probably not broadcast over C<a> and C<c> since that would be
  335 very wasteful. Instead, you should use a temporary for C<b*c>.
  336 '
  337        );
  338 
  339 pp_def(
  340        'inner2d',
  341        HandleBad => 1,
  342        Pars => 'a(n,m); b(n,m); [o]c();',
  343        GenericTypes => [ppdefs_all],
  344        Code =>
  345        'long double tmp = 0;
  346         int flag = 0;
  347 	loop(n,m) %{
  348            PDL_IF_BAD(if ( $ISGOOD(a()) && $ISGOOD(b()) ),) {
  349               tmp += $a() * $b();
  350               flag = 1;
  351            }
  352         %}
  353         PDL_IF_BAD(if (!flag) { $SETBAD(c()); }
  354         else,)                { $c() = tmp; }',
  355        Doc => '
  356 
  357 =for ref
  358 
  359 Inner product over 2 dimensions.
  360 
  361 Equivalent to
  362 
  363  $c = inner($x->clump(2), $y->clump(2))
  364 
  365 =cut
  366 
  367 '
  368        );
  369 
  370 pp_def(
  371        'inner2t',
  372        HandleBad => 1,
  373        Pars => 'a(j,n); b(n,m); c(m,k); [t]tmp(n,k); [o]d(j,k));',
  374        GenericTypes => [ppdefs_all],
  375        Code =>
  376        'loop(n,k) %{
  377            long double tmp0 = 0;
  378            int flag = 0;
  379 	   loop(m) %{
  380               PDL_IF_BAD(if ( $ISGOOD(b()) && $ISGOOD(c()) ),) {
  381                  tmp0 += $b() * $c();
  382                  flag = 1;
  383               }
  384            %}
  385            PDL_IF_BAD(if (!flag) { $SETBAD(tmp()); }
  386            else,)                { $tmp() = tmp0; }
  387 	%}
  388 	loop(j,k) %{
  389            long double tmp1 = 0;
  390            int flag = 0;
  391 	   loop(n) %{
  392               if ( $ISGOOD(a()) && $ISGOOD(tmp()) ) {
  393                  tmp1 += $a() * $tmp();
  394                  flag = 1;
  395               }
  396            %}
  397            PDL_IF_BAD(if (!flag) { $SETBAD(d()); }
  398            else,)                { $d() = tmp1; }
  399 	%}',
  400        Doc => '
  401 =for ref
  402 
  403 Efficient Triple matrix product C<a*b*c>
  404 
  405 Efficiency comes from by using the temporary C<tmp>. This operation only
  406 scales as C<N**3> whereas broadcasting using L</inner2> would scale
  407 as C<N**4>.
  408 
  409 The reason for having this routine is that you do not need to
  410 have the same broadcast-dimensions for C<tmp> as for the other arguments,
  411 which in case of large numbers of matrices makes this much more
  412 memory-efficient.
  413 
  414 It is hoped that things like this could be taken care of as a kind of
  415 closures at some point.
  416 
  417 =cut
  418 '
  419        ); # pp_def inner2t()
  420 
  421 # a helper function for the cross product definition
  422 sub crassgn {
  423   "\$c(tri => $_[0]) = \$a(tri => $_[1])*\$b(tri => $_[2]) -
  424 	\$a(tri => $_[2])*\$b(tri => $_[1]);"
  425 }
  426 
  427 pp_def('crossp',
  428        Doc => <<'EOD',
  429 
  430 =for ref
  431 
  432 Cross product of two 3D vectors
  433 
  434 After
  435 
  436 =for example
  437 
  438  $c = crossp $x, $y
  439 
  440 the inner product C<$c*$x> and C<$c*$y> will be zero, i.e. C<$c> is
  441 orthogonal to C<$x> and C<$y>
  442 
  443 =cut
  444 
  445 EOD
  446        Pars => 'a(tri=3); b(tri); [o] c(tri)',
  447        GenericTypes => [ppdefs_all],
  448        Code =>
  449        crassgn(0,1,2)."\n".
  450        crassgn(1,2,0)."\n".
  451        crassgn(2,0,1),
  452        );
  453 
  454 pp_def('norm',
  455        HandleBad => 1,
  456        Pars => 'vec(n); [o] norm(n)',
  457        GenericTypes => [ppdefs_all],
  458        Doc => 'Normalises a vector to unit Euclidean length',
  459        Code =>
  460        'long double sum=0;
  461         int flag = 0;
  462 	loop(n) %{
  463            PDL_IF_BAD(if ( $ISGOOD(vec()) ) {,)
  464               sum +=
  465                 PDL_IF_GENTYPE_REAL(
  466                   $vec()*$vec(),
  467                   creall($vec())*creall($vec()) + cimagl($vec())*cimagl($vec())
  468                 );
  469            PDL_IF_BAD(flag = 1; },)
  470         %}
  471         PDL_IF_BAD(if ( flag ) {,)
  472 	   if (sum > 0) {
  473 	      sum = sqrt(sum);
  474 	      loop(n) %{
  475                  PDL_IF_BAD(if ( $ISBAD(vec()) ) { $SETBAD(norm()); }
  476                             else                 {,)
  477                     $norm() = $vec()/sum;
  478                  PDL_IF_BAD(},)
  479               %}
  480 	   } else {
  481 	      loop(n) %{
  482                  if ( $ISBAD(vec()) ) { $SETBAD(norm()); }
  483                  else                 { $norm() = $vec(); }
  484               %}
  485            }
  486         PDL_IF_BAD(} else {
  487 	   loop(n) %{
  488               $SETBAD(norm());
  489            %}
  490 	},)
  491 	',
  492 );
  493 
  494 # this one was motivated by the need to compute
  495 # the circular mean efficiently
  496 # without it could not be done efficiently or without
  497 # creating large intermediates (check pdl-porters for
  498 # discussion)
  499 # see PDL::ImageND for info about the circ_mean function
  500 
  501 pp_def(
  502     'indadd',
  503     HandleBad => 1,
  504     Pars => 'input(n); indx ind(n); [io] sum(m)',
  505     GenericTypes => [ppdefs_all],
  506     Code =>
  507     'loop(n) %{
  508         register PDL_Indx this_ind = $ind();
  509         if ( PDL_IF_BAD($ISBADVAR(this_ind,ind) ||,) this_ind<0 || this_ind>=$SIZE(m) )
  510            $CROAK("invalid index");
  511         PDL_IF_BAD(
  512         if ( $ISBAD(input()) ) { $SETBAD(sum(m => this_ind)); }
  513         else,)                 { $sum(m => this_ind) += $input(); }
  514     %}',
  515     BadDoc => '
  516 =for bad
  517 
  518 The routine barfs on bad indices, and bad inputs set target outputs bad.
  519 
  520 =cut
  521 ',
  522     Doc=>'
  523 =for ref
  524 
  525 Broadcasting index add: add C<input> to the C<ind> element of C<sum>, i.e:
  526 
  527  sum(ind) += input
  528 
  529 =for example
  530 
  531 Simple example:
  532 
  533   $x = 2;
  534   $ind = 3;
  535   $sum = zeroes(10);
  536   indadd($x,$ind, $sum);
  537   print $sum
  538   #Result: ( 2 added to element 3 of $sum)
  539   # [0 0 0 2 0 0 0 0 0 0]
  540 
  541 Broadcasting example:
  542 
  543   $x = pdl( 1,2,3);
  544   $ind = pdl( 1,4,6);
  545   $sum = zeroes(10);
  546   indadd($x,$ind, $sum);
  547   print $sum."\n";
  548   #Result: ( 1, 2, and 3 added to elements 1,4,6 $sum)
  549   # [0 1 0 0 2 0 3 0 0 0]
  550 
  551 =cut
  552 ');
  553 
  554 # 1D convolution
  555 # useful for broadcasted 1D filters
  556 pp_addhdr('
  557 /* Fast Modulus with proper negative behaviour */
  558 #define REALMOD(a,b) while ((a)>=(b)) (a) -= (b); while ((a)<0) (a) += (b);
  559 ');
  560 pp_def('conv1d',
  561        Doc => << 'EOD',
  562 
  563 =for ref
  564 
  565 1D convolution along first dimension
  566 
  567 The m-th element of the discrete convolution of an input ndarray
  568 C<$a> of size C<$M>, and a kernel ndarray C<$kern> of size C<$P>, is
  569 calculated as
  570 
  571                               n = ($P-1)/2
  572                               ====
  573                               \
  574   ($a conv1d $kern)[m]   =     >      $a_ext[m - n] * $kern[n]
  575                               /
  576                               ====
  577                               n = -($P-1)/2
  578 
  579 where C<$a_ext> is either the periodic (or reflected) extension of
  580 C<$a> so it is equal to C<$a> on C< 0..$M-1 > and equal to the
  581 corresponding periodic/reflected image of C<$a> outside that range.
  582 
  583 
  584 =for example
  585 
  586   $con = conv1d sequence(10), pdl(-1,0,1);
  587 
  588   $con = conv1d sequence(10), pdl(-1,0,1), {Boundary => 'reflect'};
  589 
  590 By default, periodic boundary conditions are assumed (i.e. wrap around).
  591 Alternatively, you can request reflective boundary conditions using
  592 the C<Boundary> option:
  593 
  594   {Boundary => 'reflect'} # case in 'reflect' doesn't matter
  595 
  596 The convolution is performed along the first dimension. To apply it across
  597 another dimension use the slicing routines, e.g.
  598 
  599   $y = $x->mv(2,0)->conv1d($kernel)->mv(0,2); # along third dim
  600 
  601 This function is useful for broadcasted filtering of 1D signals.
  602 
  603 Compare also L<conv2d|PDL::Image2D/conv2d>, L<convolve|PDL::ImageND/convolve>,
  604 L<fftconvolve|PDL::FFT/fftconvolve()>, L<fftwconv|PDL::FFTW/fftwconv>,
  605 L<rfftwconv|PDL::FFTW/rfftwconv>
  606 
  607 =for bad
  608 
  609 WARNING: C<conv1d> processes bad values in its inputs as
  610 the numeric value of C<< $pdl->badvalue >> so it is not
  611 recommended for processing pdls with bad values in them
  612 unless special care is taken.
  613 
  614 =cut
  615 
  616 EOD
  617         Pars => 'a(m); kern(p); [o]b(m);',
  618         GenericTypes => [ppdefs_all],
  619         OtherPars => 'int reflect;',
  620         HandleBad => 0,
  621         PMCode => '
  622 
  623 sub PDL::conv1d {
  624    my $opt = pop @_ if ref($_[-1]) eq \'HASH\';
  625    die \'Usage: conv1d( a(m), kern(p), [o]b(m), {Options} )\'
  626       if @_<2 || @_>3;
  627    my($x,$kern) = @_;
  628    my $c = @_ == 3 ? $_[2] : PDL->null;
  629    PDL::_conv1d_int($x,$kern,$c,
  630 		     !(defined $opt && exists $$opt{Boundary}) ? 0 :
  631 		     lc $$opt{Boundary} eq "reflect");
  632    return $c;
  633 }
  634 
  635 ',
  636         Code => '
  637            int i,i1,i2,poff,pflip;
  638            double tmp;
  639            int reflect = $COMP(reflect);
  640            int m_size = $SIZE(m);
  641            int p_size = $SIZE(p);
  642 
  643            poff = (p_size-1)/2;
  644            for(i=0; i<m_size; i++) {
  645               tmp = 0;
  646                   for(i1=0; i1<p_size; i1++) {
  647                      pflip = p_size - 1 - i1;
  648                      i2 = i+i1 - poff;
  649                      if (reflect && i2<0)
  650                      	i2 = -i2;
  651                      if (reflect && i2>=m_size)
  652                      	i2 = m_size-(i2-m_size+1);
  653 
  654                      REALMOD(i2,m_size);
  655                      tmp += $a(m=>i2) * $kern(p=>pflip);
  656                   }
  657               $b(m=>i) = tmp;
  658            }
  659 ');
  660 
  661 
  662 # this can be achieved by
  663 #  ($x->dummy(0) == $y)->orover
  664 # but this one avoids a larger intermediate and potentially shortcuts
  665 pp_def('in',
  666 	Pars => 'a(); b(n); [o] c()',
  667         GenericTypes => [ppdefs_all],
  668 	Code => '$c() = 0;
  669 		 loop(n) %{ if ($a() == $b()) {$c() = 1; break;} %}',
  670 	Doc => <<'EOD',
  671 
  672 =for ref
  673 
  674 test if a is in the set of values b
  675 
  676 =for example
  677 
  678    $goodmsk = $labels->in($goodlabels);
  679    print pdl(3,1,4,6,2)->in(pdl(2,3,3));
  680   [1 0 0 0 1]
  681 
  682 C<in> is akin to the I<is an element of> of set theory. In principle,
  683 PDL broadcasting could be used to achieve its functionality by using a
  684 construct like
  685 
  686    $msk = ($labels->dummy(0) == $goodlabels)->orover;
  687 
  688 However, C<in> doesn't create a (potentially large) intermediate
  689 and is generally faster.
  690 
  691 =cut
  692 
  693 EOD
  694 );
  695 
  696 pp_add_exported ('', 'uniq');
  697 pp_addpm (<< 'EOPM');
  698 =head2 uniq
  699 
  700 =for ref
  701 
  702 return all unique elements of an ndarray
  703 
  704 The unique elements are returned in ascending order.
  705 
  706 =for example
  707 
  708   PDL> p pdl(2,2,2,4,0,-1,6,6)->uniq
  709   [-1 0 2 4 6]     # 0 is returned 2nd (sorted order)
  710 
  711   PDL> p pdl(2,2,2,4,nan,-1,6,6)->uniq
  712   [-1 2 4 6 nan]   # NaN value is returned at end
  713 
  714 Note: The returned pdl is 1D; any structure of the input
  715 ndarray is lost.  C<NaN> values are never compare equal to
  716 any other values, even themselves.  As a result, they are
  717 always unique. C<uniq> returns the NaN values at the end
  718 of the result ndarray.  This follows the Matlab usage.
  719 
  720 See L</uniqind> if you need the indices of the unique
  721 elements rather than the values.
  722 
  723 =for bad
  724 
  725 Bad values are not considered unique by uniq and are ignored.
  726 
  727  $x=sequence(10);
  728  $x=$x->setbadif($x%3);
  729  print $x->uniq;
  730  [0 3 6 9]
  731 
  732 =cut
  733 
  734 *uniq = \&PDL::uniq;
  735 # return unique elements of array
  736 # find as jumps in the sorted array
  737 # flattens in the process
  738 sub PDL::uniq {
  739    use PDL::Core 'barf';
  740    my ($arr) = @_;
  741    return $arr if($arr->nelem == 0); # The null list is unique (CED)
  742    my $srt  = $arr->clump(-1)->where($arr==$arr)->qsort;  # no NaNs or BADs for qsort
  743    my $nans = $arr->clump(-1)->where($arr!=$arr);
  744    my $uniq = ($srt->nelem > 0) ? $srt->where($srt != $srt->rotate(-1)) : $srt;
  745    # make sure we return something if there is only one value
  746    my $answ = $nans;  # NaN values always uniq
  747    if ( $uniq->nelem > 0 ) {
  748       $answ = $uniq->append($answ);
  749    } else {
  750       $answ = ( ($srt->nelem == 0) ?  $srt : PDL::pdl( ref($srt), [$srt->index(0)] ) )->append($answ);
  751    }
  752    return $answ;
  753 }
  754 EOPM
  755 
  756 pp_add_exported ('', 'uniqind');
  757 pp_addpm (<< 'EOPM');
  758 =head2 uniqind
  759 
  760 =for ref
  761 
  762 Return the indices of all unique elements of an ndarray
  763 The order is in the order of the values to be consistent
  764 with uniq. C<NaN> values never compare equal with any
  765 other value and so are always unique.  This follows the
  766 Matlab usage.
  767 
  768 =for example
  769 
  770   PDL> p pdl(2,2,2,4,0,-1,6,6)->uniqind
  771   [5 4 1 3 6]     # the 0 at index 4 is returned 2nd, but...
  772 
  773   PDL> p pdl(2,2,2,4,nan,-1,6,6)->uniqind
  774   [5 1 3 6 4]     # ...the NaN at index 4 is returned at end
  775 
  776 
  777 Note: The returned pdl is 1D; any structure of the input
  778 ndarray is lost.
  779 
  780 See L</uniq> if you want the unique values instead of the
  781 indices.
  782 
  783 =for bad
  784 
  785 Bad values are not considered unique by uniqind and are ignored.
  786 
  787 =cut
  788 
  789 *uniqind = \&PDL::uniqind;
  790 # return unique elements of array
  791 # find as jumps in the sorted array
  792 # flattens in the process
  793 sub PDL::uniqind {
  794   use PDL::Core 'barf';
  795   my ($arr) = @_;
  796   return $arr if($arr->nelem == 0); # The null list is unique (CED)
  797   # Different from uniq we sort and store the result in an intermediary
  798   my $aflat = $arr->flat;
  799   my $nanind = which($aflat!=$aflat);                        # NaN indexes
  800   my $good = PDL->sequence(indx, $aflat->dims)->where($aflat==$aflat);  # good indexes
  801   my $i_srt = $aflat->where($aflat==$aflat)->qsorti;         # no BAD or NaN values for qsorti
  802   my $srt = $aflat->where($aflat==$aflat)->index($i_srt);
  803   my $uniqind;
  804   if ($srt->nelem > 0) {
  805      $uniqind = which($srt != $srt->rotate(-1));
  806      $uniqind = $i_srt->slice('0') if $uniqind->isempty;
  807   } else {
  808      $uniqind = which($srt);
  809   }
  810   # Now map back to the original space
  811   my $ansind = $nanind;
  812   if ( $uniqind->nelem > 0 ) {
  813      $ansind = ($good->index($i_srt->index($uniqind)))->append($ansind);
  814   } else {
  815      $ansind = $uniqind->append($ansind);
  816   }
  817   return $ansind;
  818 }
  819 
  820 EOPM
  821 
  822 pp_add_exported ('', 'uniqvec');
  823 pp_addpm (<< 'EOPM');
  824 =head2 uniqvec
  825 
  826 =for ref
  827 
  828 Return all unique vectors out of a collection
  829 
  830   NOTE: If any vectors in the input ndarray have NaN values
  831   they are returned at the end of the non-NaN ones.  This is
  832   because, by definition, NaN values never compare equal with
  833   any other value.
  834 
  835   NOTE: The current implementation does not sort the vectors
  836   containing NaN values.
  837 
  838 The unique vectors are returned in lexicographically sorted
  839 ascending order. The 0th dimension of the input PDL is treated
  840 as a dimensional index within each vector, and the 1st and any
  841 higher dimensions are taken to run across vectors. The return
  842 value is always 2D; any structure of the input PDL (beyond using
  843 the 0th dimension for vector index) is lost.
  844 
  845 See also L</uniq> for a unique list of scalars; and
  846 L<qsortvec|PDL::Ufunc/qsortvec> for sorting a list of vectors
  847 lexicographcally.
  848 
  849 =for bad
  850 
  851 If a vector contains all bad values, it is ignored as in L</uniq>.
  852 If some of the values are good, it is treated as a normal vector. For
  853 example, [1 2 BAD] and [BAD 2 3] could be returned, but [BAD BAD BAD]
  854 could not.  Vectors containing BAD values will be returned after any
  855 non-NaN and non-BAD containing vectors, followed by the NaN vectors.
  856 
  857 =cut
  858 
  859 sub PDL::uniqvec {
  860    my($pdl) = shift;
  861 
  862    return $pdl if ( $pdl->nelem == 0 || $pdl->ndims < 2 );
  863    return $pdl if ( $pdl->slice("(0)")->nelem < 2 );                     # slice isn't cheap but uniqvec isn't either
  864 
  865    my $pdl2d = $pdl->clump(1..$pdl->ndims-1);
  866    my $ngood = $pdl2d->ngoodover;
  867    $pdl2d = $pdl2d->mv(0,-1)->dice($ngood->which)->mv(-1,0);             # remove all-BAD vectors
  868 
  869    my $numnan = ($pdl2d!=$pdl2d)->sumover;                                  # works since no all-BADs to confuse
  870 
  871    my $presrt = $pdl2d->mv(0,-1)->dice($numnan->not->which)->mv(0,-1);      # remove vectors with any NaN values
  872    my $nanvec = $pdl2d->mv(0,-1)->dice($numnan->which)->mv(0,-1);           # the vectors with any NaN values
  873 
  874    my $srt = $presrt->qsortvec->mv(0,-1);                                   # BADs are sorted by qsortvec
  875    my $srtdice = $srt;
  876    my $somebad = null;
  877    if ($srt->badflag) {
  878       $srtdice = $srt->dice($srt->mv(0,-1)->nbadover->not->which);
  879       $somebad = $srt->dice($srt->mv(0,-1)->nbadover->which);
  880    }
  881 
  882    my $uniq = $srtdice->nelem > 0
  883      ? ($srtdice != $srtdice->rotate(-1))->mv(0,-1)->orover->which
  884      : $srtdice->orover->which;
  885 
  886    my $ans = $uniq->nelem > 0 ? $srtdice->dice($uniq) :
  887       ($srtdice->nelem > 0) ? $srtdice->slice("0,:") :
  888       $srtdice;
  889    return $ans->append($somebad)->append($nanvec->mv(0,-1))->mv(0,-1);
  890 }
  891 
  892 EOPM
  893 
  894 #####################################################################
  895 #  clipping routines
  896 #####################################################################
  897 
  898 # clipping
  899 
  900 for my $opt (
  901 	     ['hclip','PDLMIN'],
  902 	     ['lclip','PDLMAX']
  903 	     ) {
  904     my $name = $opt->[0];
  905     my $op   = $opt->[1];
  906     my $code = '$c() = '.$op.'($b(), $a());';
  907     pp_def(
  908 	   $name,
  909 	   HandleBad => 1,
  910 	   Pars => 'a(); b(); [o] c()',
  911 	   Code =>
  912 	   'PDL_IF_BAD(if ( $ISBAD(a()) || $ISBAD(b()) ) {
  913                $SETBAD(c());
  914             } else,) { '.$code.' }',
  915 	   Doc =>  'clip (threshold) C<$a> by C<$b> (C<$b> is '.
  916 	   ($name eq 'hclip' ? 'upper' : 'lower').' bound)',
  917           PMCode=><<"EOD",
  918 sub PDL::$name {
  919    my (\$x,\$y) = \@_;
  920    my \$c;
  921    if (\$x->is_inplace) {
  922        \$x->set_inplace(0); \$c = \$x;
  923    } elsif (\@_ > 2) {\$c=\$_[2]} else {\$c=PDL->nullcreate(\$x)}
  924    PDL::_${name}_int(\$x,\$y,\$c);
  925    return \$c;
  926 }
  927 EOD
  928     ); # pp_def $name
  929 
  930 } # for: my $opt
  931 
  932 pp_add_exported('', 'clip');
  933 pp_addpm(<<'EOD');
  934 =head2 clip
  935 
  936 =for ref
  937 
  938 Clip (threshold) an ndarray by (optional) upper or lower bounds.
  939 
  940 =for usage
  941 
  942  $y = $x->clip(0,3);
  943  $c = $x->clip(undef, $x);
  944 
  945 =for bad
  946 
  947 clip handles bad values since it is just a
  948 wrapper around L</hclip> and
  949 L</lclip>.
  950 
  951 =cut
  952 EOD
  953 
  954 pp_def(
  955 	'clip',
  956 	HandleBad => 1,
  957 	Pars => 'a(); l(); h(); [o] c()',
  958 	Code => <<'EOBC',
  959          PDL_IF_BAD(
  960 	 if( $ISBAD(a()) || $ISBAD(l()) || $ISBAD(h()) ) {
  961 	   $SETBAD(c());
  962          } else,) {
  963            $c() = PDLMIN($h(), PDLMAX($l(), $a()));
  964          }
  965 EOBC
  966        PMCode => <<'EOPM',
  967 *clip = \&PDL::clip;
  968 sub PDL::clip {
  969   my($x, $l, $h) = @_;
  970   my $d;
  971   unless(defined($l) || defined($h)) {
  972       # Deal with pathological case
  973       if($x->is_inplace) {
  974 	  $x->set_inplace(0);
  975 	  return $x;
  976       } else {
  977 	  return $x->copy;
  978       }
  979   }
  980 
  981   if($x->is_inplace) {
  982       $x->set_inplace(0); $d = $x
  983   } elsif (@_ > 3) {
  984       $d=$_[3]
  985   } else {
  986       $d = PDL->nullcreate($x);
  987   }
  988   if(defined($l) && defined($h)) {
  989       PDL::_clip_int($x,$l,$h,$d);
  990   } elsif( defined($l) ) {
  991       PDL::_lclip_int($x,$l,$d);
  992   } elsif( defined($h) ) {
  993       PDL::_hclip_int($x,$h,$d);
  994   } else {
  995       die "This can't happen (clip contingency) - file a bug";
  996   }
  997 
  998   return $d;
  999 }
 1000 EOPM
 1001     ); # end of clip pp_def call
 1002 
 1003 ############################################################
 1004 # elementary statistics and histograms
 1005 ############################################################
 1006 
 1007 pp_def(
 1008        'wtstat',
 1009        HandleBad => 1,
 1010        Pars => 'a(n); wt(n); avg(); [o]b();',
 1011        GenericTypes => [ppdefs_all],
 1012        OtherPars => 'int deg',
 1013        Code =>
 1014        'double wtsum = 0;
 1015 	double statsum = 0;
 1016         int flag = 0;
 1017         loop(n) %{
 1018            PDL_IF_BAD(if ($ISGOOD(wt()) && $ISGOOD(a()) && $ISGOOD(avg())),) {
 1019               long double tmp;
 1020               PDL_Indx i;
 1021               wtsum += $wt();
 1022               tmp=1;
 1023               for(i=0; i<$COMP(deg); i++)
 1024                  tmp *= $a();
 1025               statsum += $wt() * (tmp - $avg());
 1026               flag = 1;
 1027            }
 1028         %}
 1029         PDL_IF_BAD(if (!flag) { $SETBAD(b()); $PDLSTATESETBAD(b); }
 1030         else,)                { $b() = statsum / wtsum; }',
 1031        Doc => '
 1032 =for ref
 1033 
 1034 Weighted statistical moment of given degree
 1035 
 1036 This calculates a weighted statistic over the vector C<a>.
 1037 The formula is
 1038 
 1039  b() = (sum_i wt_i * (a_i ** degree - avg)) / (sum_i wt_i)
 1040 ',
 1041        BadDoc => '
 1042 =for bad
 1043 
 1044 Bad values are ignored in any calculation; C<$b> will only
 1045 have its bad flag set if the output contains any bad data.
 1046 ',
 1047        );
 1048 
 1049 pp_def('statsover',
 1050 	HandleBad => 1,
 1051 	Pars => 'a(n); w(n); float+ [o]avg(); float+ [o]prms(); int+ [o]median(); int+ [o]min(); int+ [o]max(); float+ [o]adev(); float+ [o]rms()',
 1052 	Code =>
 1053         '$GENERIC(avg) tmp = 0;
 1054          $GENERIC(avg) tmp1 = 0;
 1055          $GENERIC(avg) diff = 0;
 1056          $GENERIC(min) curmin = 0, curmax = 0;
 1057          $GENERIC(w) norm = 0;
 1058          int flag = 0;
 1059          loop(n) %{             /* Accumulate sum and summed weight. */
 1060             /* perhaps should check w() for bad values too ? */
 1061             PDL_IF_BAD(if ( $ISGOOD(a()) ),) {
 1062               tmp += $a()*$w();
 1063               norm += ($GENERIC(avg)) $w();
 1064               if (!flag) { curmin = $a(); curmax = $a(); flag=1; }
 1065               if ($a() < curmin) {
 1066                 curmin = $a();
 1067               } else if ($a() > curmax) {
 1068                 curmax = $a();
 1069               }
 1070             }
 1071          %}
 1072          /* have at least one valid point if flag true */
 1073          PDL_IF_BAD(if ( !flag ) {
 1074            $SETBAD(avg());  $PDLSTATESETBAD(avg);
 1075            $SETBAD(rms());  $PDLSTATESETBAD(rms);
 1076            $SETBAD(adev()); $PDLSTATESETBAD(adev);
 1077            $SETBAD(min());  $PDLSTATESETBAD(min);
 1078            $SETBAD(max());  $PDLSTATESETBAD(max);
 1079            $SETBAD(prms()); $PDLSTATESETBAD(prms);
 1080          } else,) {
 1081            $avg() = tmp / norm; /* Find mean */
 1082            $min() = curmin;
 1083            $max() = curmax;
 1084            /* Calculate the RMS and standard deviation. */
 1085            tmp = 0;
 1086            loop(n) %{
 1087               if ($ISGOOD(a())) {
 1088                  diff = $a()-$avg();
 1089                  tmp += diff * diff * $w();
 1090                  tmp1 += fabs(diff) * $w();
 1091               }
 1092            %}
 1093            $rms() = sqrt( tmp/norm );
 1094            if(norm>1)
 1095                    $prms() =  sqrt( tmp/(norm-1) );
 1096            else
 1097                    PDL_IF_BAD($SETBAD(prms()),$prms() = 0);
 1098            $adev() = tmp1 / norm ;
 1099          }',
 1100       PMCode => '
 1101 sub PDL::statsover {
 1102    barf(\'Usage: ($mean,[$prms, $median, $min, $max, $adev, $rms]) = statsover($data,[$weights])\') if @_>2;
 1103    my ($data, $weights) = @_;
 1104    $weights //= $data->ones();
 1105    my $median = $data->medover;
 1106    my $mean = PDL->nullcreate($data);
 1107    my $rms = PDL->nullcreate($data);
 1108    my $min = PDL->nullcreate($data);
 1109    my $max = PDL->nullcreate($data);
 1110    my $adev = PDL->nullcreate($data);
 1111    my $prms = PDL->nullcreate($data);
 1112    PDL::_statsover_int($data, $weights, $mean, $prms, $median, $min, $max, $adev, $rms);
 1113    wantarray ? ($mean, $prms, $median, $min, $max, $adev, $rms) : $mean;
 1114 }
 1115 ',
 1116       Doc => '
 1117 =for ref
 1118 
 1119 Calculate useful statistics over a dimension of an ndarray
 1120 
 1121 =for usage
 1122 
 1123   ($mean,$prms,$median,$min,$max,$adev,$rms) = statsover($ndarray, $weights);
 1124 
 1125 This utility function calculates various useful
 1126 quantities of an ndarray. These are:
 1127 
 1128 =over 3
 1129 
 1130 =item * the mean:
 1131 
 1132   MEAN = sum (x)/ N
 1133 
 1134 with C<N> being the number of elements in x
 1135 
 1136 =item * the population RMS deviation from the mean:
 1137 
 1138   PRMS = sqrt( sum( (x-mean(x))^2 )/(N-1)
 1139 
 1140 The population deviation is the best-estimate of the deviation
 1141 of the population from which a sample is drawn.
 1142 
 1143 =item * the median
 1144 
 1145 The median is the 50th percentile data value.  Median is found by
 1146 L<medover|PDL::Ufunc/medover>, so WEIGHTING IS IGNORED FOR THE MEDIAN CALCULATION.
 1147 
 1148 =item * the minimum
 1149 
 1150 =item * the maximum
 1151 
 1152 =item * the average absolute deviation:
 1153 
 1154   AADEV = sum( abs(x-mean(x)) )/N
 1155 
 1156 =item * RMS deviation from the mean:
 1157 
 1158   RMS = sqrt(sum( (x-mean(x))^2 )/N)
 1159 
 1160 (also known as the root-mean-square deviation, or the square root of the
 1161 variance)
 1162 
 1163 =back
 1164 
 1165 This operator is a projection operator so the calculation
 1166 will take place over the final dimension. Thus if the input
 1167 is N-dimensional each returned value will be N-1 dimensional,
 1168 to calculate the statistics for the entire ndarray either
 1169 use C<clump(-1)> directly on the ndarray or call C<stats>.
 1170 
 1171 =cut
 1172 ',
 1173      BadDoc =>'
 1174 =for bad
 1175 
 1176 Bad values are simply ignored in the calculation, effectively reducing
 1177 the sample size.  If all data are bad then the output data are marked bad.
 1178 
 1179 =cut
 1180 ',
 1181 );
 1182 
 1183 pp_add_exported('','stats');
 1184 pp_addpm(<<'EOD');
 1185 =head2 stats
 1186 
 1187 =for ref
 1188 
 1189 Calculates useful statistics on an ndarray
 1190 
 1191 =for usage
 1192 
 1193  ($mean,$prms,$median,$min,$max,$adev,$rms) = stats($ndarray,[$weights]);
 1194 
 1195 This utility calculates all the most useful quantities in one call.
 1196 It works the same way as L</statsover>, except that the quantities are
 1197 calculated considering the entire input PDL as a single sample, rather
 1198 than as a collection of rows. See L</statsover> for definitions of the
 1199 returned quantities.
 1200 
 1201 =for bad
 1202 
 1203 Bad values are handled; if all input values are bad, then all of the output
 1204 values are flagged bad.
 1205 
 1206 =cut
 1207 
 1208 *stats	  = \&PDL::stats;
 1209 sub PDL::stats {
 1210     barf('Usage: ($mean,[$rms]) = stats($data,[$weights])') if @_>2;
 1211     my ($data,$weights) = @_;
 1212 
 1213     # Ensure that $weights is properly broadcasted over; this could be
 1214     # done rather more efficiently...
 1215     if(defined $weights) {
 1216 	$weights = pdl($weights) unless UNIVERSAL::isa($weights,'PDL');
 1217 	if( ($weights->ndims != $data->ndims) or
 1218 	    (pdl($weights->dims) != pdl($data->dims))->or
 1219 	  ) {
 1220 		$weights = $weights + zeroes($data)
 1221 	}
 1222 	$weights = $weights->flat;
 1223     }
 1224 
 1225     return PDL::statsover($data->flat,$weights);
 1226 }
 1227 EOD
 1228 
 1229 my $histogram_doc = <<'EOD';
 1230 =for ref
 1231 
 1232 Calculates a histogram for given stepsize and minimum.
 1233 
 1234 =for usage
 1235 
 1236  $h = histogram($data, $step, $min, $numbins);
 1237  $hist = zeroes $numbins;  # Put histogram in existing ndarray.
 1238  histogram($data, $hist, $step, $min, $numbins);
 1239 
 1240 The histogram will contain C<$numbins> bins starting from C<$min>, each
 1241 C<$step> wide. The value in each bin is the number of
 1242 values in C<$data> that lie within the bin limits.
 1243 
 1244 
 1245 Data below the lower limit is put in the first bin, and data above the
 1246 upper limit is put in the last bin.
 1247 
 1248 The output is reset in a different broadcastloop so that you
 1249 can take a histogram of C<$a(10,12)> into C<$b(15)> and get the result
 1250 you want.
 1251 
 1252 For a higher-level interface, see L<hist|PDL::Basic/hist>.
 1253 
 1254 =for example
 1255 
 1256  pdl> p histogram(pdl(1,1,2),1,0,3)
 1257  [0 2 1]
 1258 
 1259 =cut
 1260 
 1261 EOD
 1262 
 1263 my $whistogram_doc = <<'EOD';
 1264 =for ref
 1265 
 1266 Calculates a histogram from weighted data for given stepsize and minimum.
 1267 
 1268 =for usage
 1269 
 1270  $h = whistogram($data, $weights, $step, $min, $numbins);
 1271  $hist = zeroes $numbins;  # Put histogram in existing ndarray.
 1272  whistogram($data, $weights, $hist, $step, $min, $numbins);
 1273 
 1274 The histogram will contain C<$numbins> bins starting from C<$min>, each
 1275 C<$step> wide. The value in each bin is the sum of the values in C<$weights>
 1276 that correspond to values in C<$data> that lie within the bin limits.
 1277 
 1278 Data below the lower limit is put in the first bin, and data above the
 1279 upper limit is put in the last bin.
 1280 
 1281 The output is reset in a different broadcastloop so that you
 1282 can take a histogram of C<$a(10,12)> into C<$b(15)> and get the result
 1283 you want.
 1284 
 1285 =for example
 1286 
 1287  pdl> p whistogram(pdl(1,1,2), pdl(0.1,0.1,0.5), 1, 0, 4)
 1288  [0 0.2 0.5 0]
 1289 
 1290 =cut
 1291 EOD
 1292 
 1293 for(
 1294     {Name => 'histogram',
 1295      WeightPar => '',
 1296      HistType => 'int+',
 1297      HistOp => '++',
 1298      Doc => $histogram_doc,
 1299      },
 1300     {Name => 'whistogram',
 1301      WeightPar => 'float+ wt(n);',
 1302      HistType => 'float+',
 1303      HistOp => '+= $wt()',
 1304      Doc => $whistogram_doc,
 1305      }
 1306     )
 1307 {
 1308 	my $code = pp_line_numbers __LINE__-1, '
 1309 	if ($SIZE(m) == 0) $CROAK("called with m dim of 0");
 1310 	register int j;
 1311 	register int maxj = $SIZE(m)-1;
 1312 	register double min  = $COMP(min);
 1313 	register double step = $COMP(step);
 1314 	broadcastloop %{
 1315 	   loop(m) %{ $hist() = 0; %}
 1316 	%}
 1317 	broadcastloop %{
 1318 	   loop(n) %{
 1319 	     PDL_IF_BAD(if ( $ISGOOD(in()) ),) {
 1320 	      j = (int) (($in()-min)/step);
 1321 	      if (j<0) j=0;
 1322 	      if (j > maxj) j = maxj;
 1323 	      ($hist(m => j))'.$_->{HistOp}.';
 1324              }
 1325 	   %}
 1326 	%}';
 1327 pp_def($_->{Name},
 1328        Pars => 'in(n); '.$_->{WeightPar}.$_->{HistType}.  '[o] hist(m)',
 1329        GenericTypes => [ppdefs_all],
 1330        # set outdim by Par!
 1331        OtherPars => 'double step; double min; int msize => m',
 1332        HandleBad => 1,
 1333        Code => $code,
 1334        Doc=>$_->{Doc});
 1335 }
 1336 
 1337 my $histogram2d_doc = <<'EOD';
 1338 =for ref
 1339 
 1340 Calculates a 2d histogram.
 1341 
 1342 =for usage
 1343 
 1344  $h = histogram2d($datax, $datay, $stepx, $minx,
 1345        $nbinx, $stepy, $miny, $nbiny);
 1346  $hist = zeroes $nbinx, $nbiny;  # Put histogram in existing ndarray.
 1347  histogram2d($datax, $datay, $hist, $stepx, $minx,
 1348        $nbinx, $stepy, $miny, $nbiny);
 1349 
 1350 The histogram will contain C<$nbinx> x C<$nbiny> bins, with the lower
 1351 limits of the first one at C<($minx, $miny)>, and with bin size
 1352 C<($stepx, $stepy)>.
 1353 The value in each bin is the number of
 1354 values in C<$datax> and C<$datay> that lie within the bin limits.
 1355 
 1356 Data below the lower limit is put in the first bin, and data above the
 1357 upper limit is put in the last bin.
 1358 
 1359 =for example
 1360 
 1361  pdl> p histogram2d(pdl(1,1,1,2,2),pdl(2,1,1,1,1),1,0,3,1,0,3)
 1362  [
 1363   [0 0 0]
 1364   [0 2 2]
 1365   [0 1 0]
 1366  ]
 1367 
 1368 =cut
 1369 EOD
 1370 
 1371 my $whistogram2d_doc = <<'EOD';
 1372 =for ref
 1373 
 1374 Calculates a 2d histogram from weighted data.
 1375 
 1376 =for usage
 1377 
 1378  $h = whistogram2d($datax, $datay, $weights,
 1379        $stepx, $minx, $nbinx, $stepy, $miny, $nbiny);
 1380  $hist = zeroes $nbinx, $nbiny;  # Put histogram in existing ndarray.
 1381  whistogram2d($datax, $datay, $weights, $hist,
 1382        $stepx, $minx, $nbinx, $stepy, $miny, $nbiny);
 1383 
 1384 The histogram will contain C<$nbinx> x C<$nbiny> bins, with the lower
 1385 limits of the first one at C<($minx, $miny)>, and with bin size
 1386 C<($stepx, $stepy)>.
 1387 The value in each bin is the sum of the values in
 1388 C<$weights> that correspond to values in C<$datax> and C<$datay> that lie within the bin limits.
 1389 
 1390 Data below the lower limit is put in the first bin, and data above the
 1391 upper limit is put in the last bin.
 1392 
 1393 =for example
 1394 
 1395  pdl> p whistogram2d(pdl(1,1,1,2,2),pdl(2,1,1,1,1),pdl(0.1,0.2,0.3,0.4,0.5),1,0,3,1,0,3)
 1396  [
 1397   [  0   0   0]
 1398   [  0 0.5 0.9]
 1399   [  0 0.1   0]
 1400  ]
 1401 
 1402 =cut
 1403 EOD
 1404 
 1405 for(
 1406     {Name => 'histogram2d',
 1407      WeightPar => '',
 1408      HistType => 'int+',
 1409      HistOp => '++',
 1410      Doc => $histogram2d_doc,
 1411 	},
 1412     {Name => 'whistogram2d',
 1413      WeightPar => 'float+ wt(n);',
 1414      HistType => 'float+',
 1415      HistOp => '+= $wt()',
 1416      Doc => $whistogram2d_doc,
 1417 	}
 1418     )
 1419 {
 1420 	my $code = pp_line_numbers __LINE__-1, '
 1421 	if ($SIZE(ma) == 0) $CROAK("called with ma dim of 0");
 1422 	if ($SIZE(mb) == 0) $CROAK("called with mb dim of 0");
 1423 	register int ja,jb;
 1424 	register int maxja = $SIZE(ma)-1;
 1425 	register int maxjb = $SIZE(mb)-1;
 1426 	register double mina = $COMP(mina);
 1427 	register double minb = $COMP(minb);
 1428 	register double stepa = $COMP(stepa);
 1429 	register double stepb = $COMP(stepb);
 1430 	broadcastloop %{
 1431 	   loop(ma,mb) %{ $hist() = 0; %}
 1432 	%}
 1433 	broadcastloop %{
 1434 	   loop(n) %{
 1435 	     PDL_IF_BAD(if ( $ISGOOD(ina()) && $ISGOOD(inb()) ),) {
 1436 	      ja = (int) (($ina()-mina)/stepa);
 1437 	      jb = (int) (($inb()-minb)/stepb);
 1438 	      if (ja<0) ja=0;
 1439 	      if (ja > maxja) ja = maxja;
 1440 	      if (jb<0) jb=0;
 1441 	      if (jb > maxjb) jb = maxjb;
 1442 	      ($hist(ma => ja,mb => jb))'.$_->{HistOp}.';
 1443 	     }
 1444 	   %}
 1445 	%}';
 1446 pp_def($_->{Name},
 1447        Pars => 'ina(n); inb(n); '.$_->{WeightPar}.$_->{HistType}.  '[o] hist(ma,mb)',
 1448        GenericTypes => [ppdefs_all],
 1449        # set outdim by Par!
 1450        OtherPars => 'double stepa; double mina; int masize => ma;
 1451 	             double stepb; double minb; int mbsize => mb;',
 1452        HandleBad => 1,
 1453        Code => $code,
 1454        Doc=> $_->{Doc});
 1455 }
 1456 
 1457 
 1458 ###########################################################
 1459 # a number of constructors: fibonacci, append, axisvalues &
 1460 # random numbers
 1461 ###########################################################
 1462 
 1463 pp_def('fibonacci',
 1464         Pars => 'i(n); indx [o]x(n)',
 1465         Inplace => 1,
 1466         GenericTypes => [ppdefs_all],
 1467 	Doc=>'Constructor - a vector with Fibonacci\'s sequence',
 1468 	PMFunc=>'',
 1469 	PMCode=><<'EOD',
 1470 sub fibonacci { ref($_[0]) && ref($_[0]) ne 'PDL::Type' ? $_[0]->fibonacci : PDL->fibonacci(@_) }
 1471 sub PDL::fibonacci{
 1472    my $x = &PDL::Core::_construct;
 1473    my $is_inplace = $x->is_inplace;
 1474    my ($in, $out) = $x->clump(-1);
 1475    $out = $is_inplace ? $in->inplace : PDL->null;
 1476    PDL::_fibonacci_int($in, $out);
 1477    $out;
 1478 }
 1479 EOD
 1480      Code => '
 1481         PDL_Indx i=0;
 1482         $GENERIC() x1, x2;
 1483         x1 = 1; x2 = 0;
 1484         loop(n) %{
 1485            $x() = x1 + x2;
 1486            if (i++>0) {
 1487               x2 = x1;
 1488               x1 = $x();
 1489            }
 1490         %}
 1491 ');
 1492 
 1493 pp_def('append',
 1494 	Pars => 'a(n); b(m); [o] c(mn)',
 1495         GenericTypes => [ppdefs_all],
 1496         PMCode => pp_line_numbers(__LINE__-1, '
 1497 sub PDL::append {
 1498   my ($i1, $i2, $o) = map PDL->topdl($_), @_;
 1499   if (grep $_->isempty, $i1, $i2) {
 1500     if (!defined $o) {
 1501       return $i2->isnull ? PDL->zeroes(0) : $i2->copy if $i1->isempty;
 1502       return $i1->isnull ? PDL->zeroes(0) : $i1->copy;
 1503     } else {
 1504       $o .= $i2->isnull ? PDL->zeroes(0) : $i2, return $o if $i1->isempty;
 1505       $o .= $i1->isnull ? PDL->zeroes(0) : $i1, return $o;
 1506     }
 1507   }
 1508   $o //= PDL->null;
 1509   PDL::_append_int($i1, $i2->convert($i1->type), $o);
 1510   $o;
 1511 }
 1512         '),
 1513 	RedoDimsCode => '$SIZE(mn) = $SIZE(m)+$SIZE(n);',
 1514 	Code => 'register PDL_Indx mnp;
 1515 		 PDL_Indx ns = $SIZE(n);
 1516                  broadcastloop %{
 1517                        loop(n) %{ $c(mn => n) = $a(); %}
 1518 		       loop(m) %{ mnp = m+ns; $c(mn => mnp) = $b(); %}
 1519 		 %}',
 1520 	Doc => '
 1521 =for ref
 1522 
 1523 append two ndarrays by concatenating along their first dimensions
 1524 
 1525 =for example
 1526 
 1527  $x = ones(2,4,7);
 1528  $y = sequence 5;
 1529  $c = $x->append($y);  # size of $c is now (7,4,7) (a jumbo-ndarray ;)
 1530 
 1531 C<append> appends two ndarrays along their first dimensions. The rest of the
 1532 dimensions must be compatible in the broadcasting sense. The resulting
 1533 size of the first dimension is the sum of the sizes of the first dimensions
 1534 of the two argument ndarrays - i.e. C<n + m>.
 1535 
 1536 Similar functions include L</glue> (below), which can append more
 1537 than two ndarrays along an arbitrary dimension, and
 1538 L<cat|PDL::Core/cat>, which can append more than two ndarrays that all
 1539 have the same sized dimensions.
 1540 '
 1541    );
 1542 
 1543 pp_addpm(<<'EOD');
 1544 =head2 glue
 1545 
 1546 =for usage
 1547 
 1548   $c = $x->glue(<dim>,$y,...)
 1549 
 1550 =for ref
 1551 
 1552 Glue two or more PDLs together along an arbitrary dimension
 1553 (N-D L</append>).
 1554 
 1555 Sticks $x, $y, and all following arguments together along the
 1556 specified dimension.  All other dimensions must be compatible in the
 1557 broadcasting sense.
 1558 
 1559 Glue is permissive, in the sense that every PDL is treated as having an
 1560 infinite number of trivial dimensions of order 1 -- so C<< $x->glue(3,$y) >>
 1561 works, even if $x and $y are only one dimensional.
 1562 
 1563 If one of the PDLs has no elements, it is ignored.  Likewise, if one
 1564 of them is actually the undefined value, it is treated as if it had no
 1565 elements.
 1566 
 1567 If the first parameter is a defined perl scalar rather than a pdl,
 1568 then it is taken as a dimension along which to glue everything else,
 1569 so you can say C<$cube = PDL::glue(3,@image_list);> if you like.
 1570 
 1571 C<glue> is implemented in pdl, using a combination of L<xchg|PDL::Slices/xchg> and
 1572 L</append>.  It should probably be updated (one day) to a pure PP
 1573 function.
 1574 
 1575 Similar functions include L</append> (above), which appends
 1576 only two ndarrays along their first dimension, and
 1577 L<cat|PDL::Core/cat>, which can append more than two ndarrays that all
 1578 have the same sized dimensions.
 1579 
 1580 =cut
 1581 
 1582 sub PDL::glue{
 1583     my($x) = shift;
 1584     my($dim) = shift;
 1585 
 1586     ($dim, $x) = ($x, $dim) if defined $x && !ref $x;
 1587     confess 'dimension must be Perl scalar' if ref $dim;
 1588 
 1589     if(!defined $x || $x->nelem==0) {
 1590 	return $x unless(@_);
 1591 	return shift() if(@_<=1);
 1592 	$x=shift;
 1593 	return PDL::glue($x,$dim,@_);
 1594     }
 1595 
 1596     if($dim - $x->dim(0) > 100) {
 1597 	print STDERR "warning:: PDL::glue allocating >100 dimensions!\n";
 1598     }
 1599     while($dim >= $x->ndims) {
 1600 	$x = $x->dummy(-1,1);
 1601     }
 1602     $x = $x->xchg(0,$dim);
 1603 
 1604     while(scalar(@_)){
 1605 	my $y = shift;
 1606 	next unless(defined $y && $y->nelem);
 1607 
 1608 	while($dim >= $y->ndims) {
 1609 		$y = $y->dummy(-1,1);
 1610         }
 1611 	$y = $y->xchg(0,$dim);
 1612 	$x = $x->append($y);
 1613     }
 1614     $x->xchg(0,$dim);
 1615 }
 1616 
 1617 EOD
 1618 
 1619 pp_def( 'axisvalues',
 1620 	Pars => 'i(n); [o]a(n)',
 1621 	Inplace => 1,
 1622 	Code => 'loop(n) %{ $a() = n; %}',
 1623 	GenericTypes => [ppdefs_all],
 1624 	Doc => undef,
 1625        ); # pp_def: axisvalues
 1626 
 1627 pp_add_macros(
 1628   CMPVEC => sub {
 1629     my ($a, $b, $dim, $ret, $anybad) = @_;
 1630     my $badbit = !defined $anybad ? '' : <<EOF;
 1631 PDL_IF_BAD(if (\$ISBAD($a) || \$ISBAD($b)) { $anybad = 1; break; } else,)
 1632 EOF
 1633     <<EOF;
 1634   $ret = 0;
 1635   loop($dim) %{ $badbit if ($a != $b) { $ret = $a < $b ? -1 : 1; break; } %}
 1636 EOF
 1637   },
 1638 );
 1639 
 1640 pp_def(
 1641     'cmpvec',
 1642     HandleBad => 1,
 1643     Pars => 'a(n); b(n); sbyte [o]c();',
 1644     Code => '
 1645      char anybad = 0;
 1646      broadcastloop %{ $CMPVEC($a(), $b(), n, $c(), anybad); %}
 1647      PDL_IF_BAD(if (anybad) { $SETBAD(c()); $PDLSTATESETBAD(c); },)
 1648     ',
 1649     Doc => '
 1650 =for ref
 1651 
 1652 Compare two vectors lexicographically.
 1653 
 1654 Returns -1 if a is less, 1 if greater, 0 if equal.
 1655 ',
 1656     BadDoc => '
 1657 The output is bad if any input values up to the point of inequality are
 1658 bad - any after are ignored.
 1659 ',
 1660 );
 1661 
 1662 pp_def(
 1663     'eqvec',
 1664     HandleBad => 1,
 1665     Pars => 'a(n); b(n); sbyte [o]c();',
 1666     Code => '
 1667      char anybad = 0;
 1668      broadcastloop %{
 1669        $c() = 1;
 1670        loop(n) %{
 1671          PDL_IF_BAD(if ($ISBAD(a()) || $ISBAD(b())) { $SETBAD(c()); anybad = 1; break; }
 1672          else,) if ($a() != $b()) { $c() = 0; PDL_IF_BAD(,break;) }
 1673        %}
 1674      %}
 1675      PDL_IF_BAD(if (anybad) $PDLSTATESETBAD(c);,)
 1676     ',
 1677     Doc => '
 1678 =for ref
 1679 
 1680 Compare two vectors, returning 1 if equal, 0 if not equal.
 1681 ',
 1682     BadDoc => 'The output is bad if any input values are bad.',
 1683 );
 1684 
 1685 pp_def('enumvec',
 1686        Pars => 'v(M,N); indx [o]k(N)',
 1687        Code =><<'EOC',
 1688   PDL_Indx vn, kn, sn=$SIZE(N), matches;
 1689   for (vn=0; vn<sn; vn=kn) {
 1690      for (kn=vn, matches=1; matches && kn<sn; ) {
 1691        $k(N=>kn) = kn-vn;
 1692        ++kn;
 1693        loop (M) %{
 1694          if ($v(N=>vn) != $v(N=>kn)) {
 1695            matches=0;
 1696            break;
 1697          }
 1698        %}
 1699      }
 1700    }
 1701 EOC
 1702        Doc =><<'EOD',
 1703 =for ref
 1704 
 1705 Enumerate a list of vectors with locally unique keys.
 1706 
 1707 Given a sorted list of vectors $v, generate a vector $k containing locally unique keys for the elements of $v
 1708 (where an "element" is a vector of length $M ocurring in $v).
 1709 
 1710 Note that the keys returned in $k are only unique over a run of a single vector in $v,
 1711 so that each unique vector in $v has at least one 0 (zero) index in $k associated with it.
 1712 If you need global keys, see enumvecg().
 1713 
 1714 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 1715 EOD
 1716 
 1717 );
 1718 
 1719 ##------------------------------------------------------
 1720 ## enumvecg()
 1721 pp_def('enumvecg',
 1722        Pars => 'v(M,N); indx [o]k(N)',
 1723        Code =><<'EOC',
 1724   PDL_Indx vn, vnprev, sn=$SIZE(N), ki;
 1725   if (sn > 0) {
 1726     $k(N=>0) = ki = 0;
 1727     for (vnprev=0, vn=1; vn<sn; vnprev=vn++) {
 1728        loop (M) %{
 1729          if ($v(N=>vnprev) != $v(N=>vn)) {
 1730            ++ki;
 1731            break;
 1732          }
 1733        %}
 1734        $k(N=>vn) = ki;
 1735      }
 1736    }
 1737 EOC
 1738        Doc =><<'EOD',
 1739 =for ref
 1740 
 1741 Enumerate a list of vectors with globally unique keys.
 1742 
 1743 Given a sorted list of vectors $v, generate a vector $k containing globally unique keys for the elements of $v
 1744 (where an "element" is a vector of length $M ocurring in $v).
 1745 Basically does the same thing as:
 1746 
 1747  $k = $v->vsearchvec($v->uniqvec);
 1748 
 1749 ... but somewhat more efficiently.
 1750 
 1751 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 1752 EOD
 1753 );
 1754 
 1755 pp_def('vsearchvec',
 1756    Pars => 'find(M); which(M,N); indx [o]found();',
 1757    Code => q(
 1758  int carp=0;
 1759 broadcastloop %{
 1760  PDL_Indx sizeM=$SIZE(M), sizeN=$SIZE(N), n1=sizeN-1;
 1761  PDL_Indx nlo=-1, nhi=n1, nn;
 1762  int cmpval, is_asc_sorted;
 1763  //
 1764  //-- get sort direction
 1765  $CMPVEC($which(N=>n1),$which(N=>0),M,cmpval);
 1766  is_asc_sorted = (cmpval > 0);
 1767  //
 1768  //-- binary search
 1769  while (nhi-nlo > 1) {
 1770    nn = (nhi+nlo) >> 1;
 1771    $CMPVEC($find(),$which(N=>nn),M,cmpval);
 1772    if (cmpval > 0 == is_asc_sorted)
 1773      nlo=nn;
 1774    else
 1775      nhi=nn;
 1776  }
 1777  if (nlo==-1) {
 1778    nhi=0;
 1779  } else if (nlo==n1) {
 1780    $CMPVEC($find(),$which(N=>n1),M,cmpval);
 1781    if (cmpval != 0) carp = 1;
 1782    nhi = n1;
 1783  } else {
 1784    nhi = nlo+1;
 1785  }
 1786  $found() = nhi;
 1787 %}
 1788  if (carp) warn("some values had to be extrapolated");
 1789 ),
 1790   Doc=><<'EOD'
 1791 =for ref
 1792 
 1793 Routine for searching N-dimensional values - akin to vsearch() for vectors.
 1794 
 1795 =for usage
 1796 
 1797  $found   = vsearchvec($find, $which);
 1798  $nearest = $which->dice_axis(1,$found);
 1799 
 1800 Returns for each row-vector in C<$find> the index along dimension N
 1801 of the least row vector of C<$which>
 1802 greater or equal to it.
 1803 C<$which> should be sorted in increasing order.
 1804 If the value of C<$find> is larger
 1805 than any member of C<$which>, the index to the last element of C<$which> is
 1806 returned.
 1807 
 1808 See also: L</vsearch>.
 1809 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 1810 EOD
 1811 );
 1812 
 1813 pp_def('unionvec',
 1814    Pars => 'a(M,NA); b(M,NB); [o]c(M,NC); indx [o]nc()',
 1815    RedoDimsCode => '$SIZE(NC) = $SIZE(NA) + $SIZE(NB);',
 1816    PMCode=> q(
 1817  sub PDL::unionvec {
 1818    my ($a,$b,$c,$nc) = @_;
 1819    $c = PDL->null if (!defined($nc));
 1820    $nc = PDL->null if (!defined($nc));
 1821    PDL::_unionvec_int($a,$b,$c,$nc);
 1822    return ($c,$nc) if (wantarray);
 1823    return $c->slice(",0:".($nc->max-1));
 1824  }
 1825 ),
 1826    Code => q(
 1827  PDL_Indx nai=0, nbi=0, nci=0, sizeNA=$SIZE(NA), sizeNB=$SIZE(NB), sizeNC=$SIZE(NC);
 1828  int cmpval;
 1829  for ( ; nci < sizeNC; nci++) {
 1830    if (nai < sizeNA && nbi < sizeNB) {
 1831      $CMPVEC($a(NA=>nai),$b(NB=>nbi),M,cmpval);
 1832    }
 1833    else if (nai < sizeNA) { cmpval = -1; }
 1834    else if (nbi < sizeNB) { cmpval =  1; }
 1835    else                   { break; }
 1836    //
 1837    if (cmpval < 0) {
 1838      //-- CASE: a < b
 1839      loop (M) %{ $c(NC=>nci) = $a(NA=>nai); %}
 1840      nai++;
 1841    }
 1842    else if (cmpval > 0) {
 1843      //-- CASE: a > b
 1844      loop (M) %{ $c(NC=>nci) = $b(NB=>nbi); %}
 1845      nbi++;
 1846    }
 1847    else {
 1848      //-- CASE: a == b
 1849      loop (M) %{ $c(NC=>nci) = $a(NA=>nai); %}
 1850      nai++;
 1851      nbi++;
 1852    }
 1853  }
 1854  $nc() = nci;
 1855  for ( ; nci < sizeNC; nci++) {
 1856   //-- zero unpopulated outputs
 1857   loop(M) %{ $c(NC=>nci) = 0; %}
 1858  }
 1859 ),
 1860    Doc=><<'EOD'
 1861 =for ref
 1862 
 1863 Union of two vector-valued PDLs.
 1864 
 1865 Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order.
 1866 On return, $nc() holds the actual number of vector-values in the union.
 1867 
 1868 In scalar context, slices $c() to the actual number of elements in the union
 1869 and returns the sliced PDL.
 1870 
 1871 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 1872 EOD
 1873 );
 1874 
 1875 pp_def('intersectvec',
 1876    Pars => 'a(M,NA); b(M,NB); [o]c(M,NC); indx [o]nc()',
 1877    RedoDimsCode => '$SIZE(NC) = PDLMIN($SIZE(NA),$SIZE(NB));',
 1878    PMCode=> q(
 1879  sub PDL::intersectvec {
 1880    my ($a,$b,$c,$nc) = @_;
 1881    $c = PDL->null if (!defined($c));
 1882    $nc = PDL->null if (!defined($nc));
 1883    PDL::_intersectvec_int($a,$b,$c,$nc);
 1884    return ($c,$nc) if (wantarray);
 1885    my $nc_max = $nc->max;
 1886    return ($nc_max > 0
 1887 	   ? $c->slice(",0:".($nc_max-1))
 1888 	   : $c->reshape($c->dim(0), 0, ($c->dims)[2..($c->ndims-1)]));
 1889  }
 1890 ),
 1891    Code => q(
 1892  PDL_Indx nai=0, nbi=0, nci=0, sizeNA=$SIZE(NA), sizeNB=$SIZE(NB), sizeNC=$SIZE(NC);
 1893  int cmpval;
 1894  for ( ; nci < sizeNC && nai < sizeNA && nbi < sizeNB; ) {
 1895    $CMPVEC($a(NA=>nai),$b(NB=>nbi),M,cmpval);
 1896    //
 1897    if (cmpval < 0) {
 1898      //-- CASE: a < b
 1899      nai++;
 1900    }
 1901    else if (cmpval > 0) {
 1902      //-- CASE: a > b
 1903      nbi++;
 1904    }
 1905    else {
 1906      //-- CASE: a == b
 1907      loop (M) %{ $c(NC=>nci) = $a(NA=>nai); %}
 1908      nai++;
 1909      nbi++;
 1910      nci++;
 1911    }
 1912  }
 1913  $nc() = nci;
 1914  for ( ; nci < sizeNC; nci++) {
 1915   //-- zero unpopulated outputs
 1916   loop(M) %{ $c(NC=>nci) = 0; %}
 1917  }
 1918 ),
 1919    Doc=><<'EOD'
 1920 =for ref
 1921 
 1922 Intersection of two vector-valued PDLs.
 1923 Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order.
 1924 On return, $nc() holds the actual number of vector-values in the intersection.
 1925 
 1926 In scalar context, slices $c() to the actual number of elements in the intersection
 1927 and returns the sliced PDL.
 1928 
 1929 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 1930 EOD
 1931 );
 1932 
 1933 pp_def('setdiffvec',
 1934    Pars => 'a(M,NA); b(M,NB); [o]c(M,NC); indx [o]nc()',
 1935    RedoDimsCode => '$SIZE(NC) = $SIZE(NA);',
 1936    PMCode=> q(
 1937  sub PDL::setdiffvec {
 1938   my ($a,$b,$c,$nc) = @_;
 1939   $c = PDL->null if (!defined($c));
 1940   $nc = PDL->null if (!defined($nc));
 1941   PDL::_setdiffvec_int($a,$b,$c,$nc);
 1942   return ($c,$nc) if (wantarray);
 1943   my $nc_max = $nc->max;
 1944   return ($nc_max > 0
 1945 	  ? $c->slice(",0:".($nc_max-1))
 1946 	  : $c->reshape($c->dim(0), 0, ($c->dims)[2..($c->ndims-1)]));
 1947  }
 1948 ),
 1949    Code => q(
 1950  PDL_Indx nai=0, nbi=0, nci=0, sizeNA=$SIZE(NA), sizeNB=$SIZE(NB), sizeNC=$SIZE(NC);
 1951  int cmpval;
 1952  for ( ; nci < sizeNC && nai < sizeNA && nbi < sizeNB ; ) {
 1953    $CMPVEC($a(NA=>nai),$b(NB=>nbi),M,cmpval);
 1954    //
 1955    if (cmpval < 0) {
 1956      //-- CASE: a < b
 1957      loop (M) %{ $c(NC=>nci) = $a(NA=>nai); %}
 1958      nai++;
 1959      nci++;
 1960    }
 1961    else if (cmpval > 0) {
 1962      //-- CASE: a > b
 1963      nbi++;
 1964    }
 1965    else {
 1966      //-- CASE: a == b
 1967      nai++;
 1968      nbi++;
 1969    }
 1970  }
 1971  for ( ; nci < sizeNC && nai < sizeNA ; nai++,nci++ ) {
 1972    loop (M) %{ $c(NC=>nci) = $a(NA=>nai); %}
 1973  }
 1974  $nc() = nci;
 1975  for ( ; nci < sizeNC; nci++) {
 1976   //-- zero unpopulated outputs
 1977   loop (M) %{ $c(NC=>nci) = 0; %}
 1978  }
 1979 ),
 1980    Doc=><<'EOD'
 1981 =for ref
 1982 
 1983 Set-difference ($a() \ $b()) of two vector-valued PDLs.
 1984 
 1985 Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order.
 1986 On return, $nc() holds the actual number of vector-values in the computed vector set.
 1987 
 1988 In scalar context, slices $c() to the actual number of elements in the output vector set
 1989 and returns the sliced PDL.
 1990 
 1991 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 1992 EOD
 1993 );
 1994 
 1995 pp_add_macros(
 1996   CMPVAL => sub {
 1997     my ($val1, $val2) = @_;
 1998     "(($val1) < ($val2) ? -1 : ($val1) > ($val2) ? 1 : 0)";
 1999   },
 2000 );
 2001 
 2002 pp_def('union_sorted',
 2003    Pars => 'a(NA); b(NB); [o]c(NC); indx [o]nc()',
 2004    RedoDimsCode => '$SIZE(NC) = $SIZE(NA) + $SIZE(NB);',
 2005    PMCode=> q(
 2006  sub PDL::union_sorted {
 2007    my ($a,$b,$c,$nc) = @_;
 2008    $c = PDL->null if (!defined($c));
 2009    $nc = PDL->null if (!defined($nc));
 2010    PDL::_union_sorted_int($a,$b,$c,$nc);
 2011    return ($c,$nc) if (wantarray);
 2012    return $c->slice("0:".($nc->max-1));
 2013  }
 2014 ),
 2015    Code => q(
 2016  PDL_Indx nai=0, nbi=0, nci=0, sizeNA=$SIZE(NA), sizeNB=$SIZE(NB), sizeNC=$SIZE(NC);
 2017  int cmpval;
 2018  for ( ; nci < sizeNC; nci++) {
 2019    if (nai < sizeNA && nbi < sizeNB) {
 2020      cmpval = $CMPVAL($a(NA=>nai), $b(NB=>nbi));
 2021    }
 2022    else if (nai < sizeNA) { cmpval = -1; }
 2023    else if (nbi < sizeNB) { cmpval =  1; }
 2024    else                   { break; }
 2025    //
 2026    if (cmpval < 0) {
 2027      //-- CASE: a < b
 2028      $c(NC=>nci) = $a(NA=>nai);
 2029      nai++;
 2030    }
 2031    else if (cmpval > 0) {
 2032      //-- CASE: a > b
 2033      $c(NC=>nci) = $b(NB=>nbi);
 2034      nbi++;
 2035    }
 2036    else {
 2037      //-- CASE: a == b
 2038      $c(NC=>nci) = $a(NA=>nai);
 2039      nai++;
 2040      nbi++;
 2041    }
 2042  }
 2043  $nc() = nci;
 2044  for ( ; nci < sizeNC; nci++) {
 2045   //-- zero unpopulated outputs
 2046   $c(NC=>nci) = 0;
 2047  }
 2048 ),
 2049    Doc=><<'EOD'
 2050 =for ref
 2051 
 2052 Union of two flat sorted unique-valued PDLs.
 2053 Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order and contain no duplicates.
 2054 On return, $nc() holds the actual number of values in the union.
 2055 
 2056 In scalar context, reshapes $c() to the actual number of elements in the union and returns it.
 2057 
 2058 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 2059 EOD
 2060 );
 2061 
 2062 pp_def('intersect_sorted',
 2063    Pars => 'a(NA); b(NB); [o]c(NC); indx [o]nc()',
 2064    RedoDimsCode => '$SIZE(NC) = PDLMIN($SIZE(NA),$SIZE(NB));',
 2065    PMCode=> q(
 2066  sub PDL::intersect_sorted {
 2067    my ($a,$b,$c,$nc) = @_;
 2068    $c = PDL->null if (!defined($c));
 2069    $nc = PDL->null if (!defined($nc));
 2070    PDL::_intersect_sorted_int($a,$b,$c,$nc);
 2071    return ($c,$nc) if (wantarray);
 2072    my $nc_max = $nc->max;
 2073    return ($nc_max > 0
 2074 	   ? $c->slice("0:".($nc_max-1))
 2075 	   : $c->reshape(0, ($c->dims)[1..($c->ndims-1)]));
 2076  }
 2077 ),
 2078    Code => q(
 2079  PDL_Indx nai=0, nbi=0, nci=0, sizeNA=$SIZE(NA), sizeNB=$SIZE(NB), sizeNC=$SIZE(NC);
 2080  int cmpval;
 2081  for ( ; nci < sizeNC && nai < sizeNA && nbi < sizeNB; ) {
 2082    cmpval = $CMPVAL($a(NA=>nai),$b(NB=>nbi));
 2083    //
 2084    if (cmpval < 0) {
 2085      //-- CASE: a < b
 2086      nai++;
 2087    }
 2088    else if (cmpval > 0) {
 2089      //-- CASE: a > b
 2090      nbi++;
 2091    }
 2092    else {
 2093      //-- CASE: a == b
 2094      $c(NC=>nci) = $a(NA=>nai);
 2095      nai++;
 2096      nbi++;
 2097      nci++;
 2098    }
 2099  }
 2100  $nc() = nci;
 2101  for ( ; nci < sizeNC; nci++) {
 2102   //-- zero unpopulated outputs
 2103   $c(NC=>nci) = 0;
 2104  }
 2105 ),
 2106    Doc=><<'EOD'
 2107 =for ref
 2108 
 2109 Intersection of two flat sorted unique-valued PDLs.
 2110 Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order and contain no duplicates.
 2111 On return, $nc() holds the actual number of values in the intersection.
 2112 
 2113 In scalar context, reshapes $c() to the actual number of elements in the intersection and returns it.
 2114 
 2115 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 2116 EOD
 2117 );
 2118 
 2119 pp_def('setdiff_sorted',
 2120    Pars => 'a(NA); b(NB); [o]c(NC); indx [o]nc()',
 2121    RedoDimsCode => '$SIZE(NC) = $SIZE(NA);',
 2122    PMCode=> q(
 2123  sub PDL::setdiff_sorted {
 2124    my ($a,$b,$c,$nc) = @_;
 2125    $c = PDL->null if (!defined($c));
 2126    $nc = PDL->null if (!defined($nc));
 2127    PDL::_setdiff_sorted_int($a,$b,$c,$nc);
 2128    return ($c,$nc) if (wantarray);
 2129    my $nc_max = $nc->max;
 2130    return ($nc_max > 0
 2131 	   ? $c->slice("0:".($nc_max-1))
 2132 	   : $c->reshape(0, ($c->dims)[1..($c->ndims-1)]));
 2133  }
 2134 ),
 2135    Code => q(
 2136  PDL_Indx nai=0, nbi=0, nci=0, sizeNA=$SIZE(NA), sizeNB=$SIZE(NB), sizeNC=$SIZE(NC);
 2137  int cmpval;
 2138  for ( ; nci < sizeNC && nai < sizeNA && nbi < sizeNB ; ) {
 2139    cmpval = $CMPVAL($a(NA=>nai),$b(NB=>nbi));
 2140    //
 2141    if (cmpval < 0) {
 2142      //-- CASE: a < b
 2143      $c(NC=>nci) = $a(NA=>nai);
 2144      nai++;
 2145      nci++;
 2146    }
 2147    else if (cmpval > 0) {
 2148      //-- CASE: a > b
 2149      nbi++;
 2150    }
 2151    else {
 2152      //-- CASE: a == b
 2153      nai++;
 2154      nbi++;
 2155    }
 2156  }
 2157  for ( ; nci < sizeNC && nai < sizeNA ; nai++,nci++ ) {
 2158    $c(NC=>nci) = $a(NA=>nai);
 2159  }
 2160  $nc() = nci;
 2161  for ( ; nci < sizeNC; nci++) {
 2162   //-- zero unpopulated outputs
 2163   $c(NC=>nci) = 0;
 2164  }
 2165 ),
 2166    Doc=><<'EOD'
 2167 =for ref
 2168 
 2169 Set-difference ($a() \ $b()) of two flat sorted unique-valued PDLs.
 2170 
 2171 Input PDLs $a() and $b() B<MUST> be sorted in lexicographic order and contain no duplicate values.
 2172 On return, $nc() holds the actual number of values in the computed vector set.
 2173 
 2174 In scalar context, reshapes $c() to the actual number of elements in the difference set and returns it.
 2175 
 2176 Contributed by Bryan Jurish E<lt>moocow@cpan.orgE<gt>.
 2177 EOD
 2178 );
 2179 
 2180 pp_addhdr(<<'EOH');
 2181 extern int pdl_srand_threads;
 2182 extern uint64_t *pdl_rand_state;
 2183 void pdl_srand(uint64_t **s, uint64_t seed, int n);
 2184 double pdl_drand(uint64_t *s);
 2185 #define PDL_MAYBE_SRAND \
 2186   if (pdl_srand_threads < 0) \
 2187     pdl_srand(&pdl_rand_state, PDL->pdl_seed(), PDL->online_cpus());
 2188 #define PDL_RAND_SET_OFFSET(v, thr, pdl) \
 2189   if (v < 0) { \
 2190     if (thr.mag_nthr >= 0) { \
 2191       int thr_no = PDL->magic_get_thread(pdl); \
 2192       if (thr_no < 0) return PDL->make_error_simple(PDL_EFATAL, "Invalid pdl_magic_get_thread!"); \
 2193       v = thr_no == 0 ? thr_no : thr_no % PDL->online_cpus(); \
 2194     } else { \
 2195       v = 0; \
 2196     } \
 2197   }
 2198 EOH
 2199 
 2200 pp_def(
 2201 	'srand',
 2202 	Pars=>'a();',
 2203 	GenericTypes => ['Q'],
 2204 	Code => <<'EOF',
 2205 pdl_srand(&pdl_rand_state, (uint64_t)$a(), PDL->online_cpus());
 2206 EOF
 2207 	NoPthread => 1,
 2208 	HaveBroadcasting => 0,
 2209 	Doc=> <<'EOF',
 2210 =for ref
 2211 
 2212 Seed random-number generator with a 64-bit int. Will generate seed data
 2213 for a number of threads equal to the return-value of
 2214 L<PDL::Core/online_cpus>.
 2215 
 2216 =for usage
 2217 
 2218  srand(); # uses current time
 2219  srand(5); # fixed number e.g. for testing
 2220 
 2221 EOF
 2222 	PMCode=><<'EOD',
 2223 *srand = \&PDL::srand;
 2224 sub PDL::srand { PDL::_srand_int($_[0] // PDL::Core::seed()) }
 2225 EOD
 2226 );
 2227 
 2228 pp_def(
 2229 	'random',
 2230 	Pars=>'a();',
 2231         GenericTypes => [ppdefs_all],
 2232 	PMFunc => '',
 2233 	Code => <<'EOF',
 2234 PDL_MAYBE_SRAND
 2235 int rand_offset = -1;
 2236 broadcastloop %{
 2237   PDL_RAND_SET_OFFSET(rand_offset, $PRIV(broadcast), $PDL(a));
 2238   $a() = pdl_drand(pdl_rand_state + 4*rand_offset);
 2239 %}
 2240 EOF
 2241 	Doc=> <<'EOF',
 2242 =for ref
 2243 
 2244 Constructor which returns ndarray of random numbers
 2245 
 2246 =for usage
 2247 
 2248  $x = random([type], $nx, $ny, $nz,...);
 2249  $x = random $y;
 2250 
 2251 etc (see L<zeroes|PDL::Core/zeroes>).
 2252 
 2253 This is the uniform distribution between 0 and 1 (assumedly
 2254 excluding 1 itself). The arguments are the same as C<zeroes>
 2255 (q.v.) - i.e. one can specify dimensions, types or give
 2256 a template.
 2257 
 2258 You can use the PDL function L</srand> to seed the random generator.
 2259 If it has not been called yet, it will be with the current time.
 2260 EOF
 2261 	PMCode=><<'EOD',
 2262 sub random { ref($_[0]) && ref($_[0]) ne 'PDL::Type' ? $_[0]->random : PDL->random(@_) }
 2263 sub PDL::random {
 2264    my $class = shift;
 2265    my $x = scalar(@_)? $class->new_from_specification(@_) : $class->new_or_inplace;
 2266    PDL::_random_int($x);
 2267    return $x;
 2268 }
 2269 EOD
 2270 );
 2271 
 2272 pp_def(
 2273 	'randsym',
 2274 	Pars=>'a();',
 2275         GenericTypes => [ppdefs_all],
 2276 	PMFunc => '',
 2277 	Code => <<'EOF',
 2278 PDL_MAYBE_SRAND
 2279 int rand_offset = -1;
 2280 broadcastloop %{
 2281   PDL_RAND_SET_OFFSET(rand_offset, $PRIV(broadcast), $PDL(a));
 2282   double tmp;
 2283   do tmp = pdl_drand(pdl_rand_state + 4*rand_offset); while (tmp == 0.0); /* 0 < tmp < 1 */
 2284   $a() = tmp;
 2285 %}
 2286 EOF
 2287 	Doc=> <<'EOF',
 2288 =for ref
 2289 
 2290 Constructor which returns ndarray of random numbers
 2291 
 2292 =for usage
 2293 
 2294  $x = randsym([type], $nx, $ny, $nz,...);
 2295  $x = randsym $y;
 2296 
 2297 etc (see L<zeroes|PDL::Core/zeroes>).
 2298 
 2299 This is the uniform distribution between 0 and 1 (excluding both 0 and
 2300 1, cf L</random>). The arguments are the same as C<zeroes> (q.v.) -
 2301 i.e. one can specify dimensions, types or give a template.
 2302 
 2303 You can use the PDL function L</srand> to seed the random generator.
 2304 If it has not been called yet, it will be with the current time.
 2305 EOF
 2306 	PMCode=><<'EOD',
 2307 sub randsym { ref($_[0]) && ref($_[0]) ne 'PDL::Type' ? $_[0]->randsym : PDL->randsym(@_) }
 2308 sub PDL::randsym {
 2309    my $class = shift;
 2310    my $x = scalar(@_)? $class->new_from_specification(@_) : $class->new_or_inplace;
 2311    PDL::_randsym_int($x);
 2312    return $x;
 2313 }
 2314 EOD
 2315 );
 2316 
 2317 pp_add_exported('','grandom');
 2318 pp_addpm(<<'EOD');
 2319 =head2 grandom
 2320 
 2321 =for ref
 2322 
 2323 Constructor which returns ndarray of Gaussian random numbers
 2324 
 2325 =for usage
 2326 
 2327  $x = grandom([type], $nx, $ny, $nz,...);
 2328  $x = grandom $y;
 2329 
 2330 etc (see L<zeroes|PDL::Core/zeroes>).
 2331 
 2332 This is generated using the math library routine C<ndtri>.
 2333 
 2334 Mean = 0, Stddev = 1
 2335 
 2336 You can use the PDL function L</srand> to seed the random generator.
 2337 If it has not been called yet, it will be with the current time.
 2338 
 2339 =cut
 2340 
 2341 sub grandom { ref($_[0]) && ref($_[0]) ne 'PDL::Type' ? $_[0]->grandom : PDL->grandom(@_) }
 2342 sub PDL::grandom {
 2343    my $class = shift;
 2344    my $x = scalar(@_)? $class->new_from_specification(@_) : $class->new_or_inplace;
 2345    use PDL::Math 'ndtri';
 2346    $x .= ndtri(randsym($x));
 2347    return $x;
 2348 }
 2349 EOD
 2350 
 2351 ###############################################################
 2352 # binary searches in an ndarray; various forms
 2353 ###############################################################
 2354 
 2355 # generic front end; defaults to vsearch_sample for backwards compatibility
 2356 
 2357 pp_add_exported('','vsearch');
 2358 pp_addpm(<<'EOD');
 2359 =head2 vsearch
 2360 
 2361 =for sig
 2362 
 2363   Signature: ( vals(); xs(n); [o] indx(); [\%options] )
 2364 
 2365 =for ref
 2366 
 2367 Efficiently search for values in a sorted ndarray, returning indices.
 2368 
 2369 =for usage
 2370 
 2371   $idx = vsearch( $vals, $x, [\%options] );
 2372   vsearch( $vals, $x, $idx, [\%options ] );
 2373 
 2374 B<vsearch> performs a binary search in the ordered ndarray C<$x>,
 2375 for the values from C<$vals> ndarray, returning indices into C<$x>.
 2376 What is a "match", and the meaning of the returned indices, are determined
 2377 by the options.
 2378 
 2379 The C<mode> option indicates which method of searching to use, and may
 2380 be one of:
 2381 
 2382 =over
 2383 
 2384 =item C<sample>
 2385 
 2386 invoke L<B<vsearch_sample>|/vsearch_sample>, returning indices appropriate for sampling
 2387 within a distribution.
 2388 
 2389 =item C<insert_leftmost>
 2390 
 2391 invoke L<B<vsearch_insert_leftmost>|/vsearch_insert_leftmost>, returning the left-most possible
 2392 insertion point which still leaves the ndarray sorted.
 2393 
 2394 =item C<insert_rightmost>
 2395 
 2396 invoke L<B<vsearch_insert_rightmost>|/vsearch_insert_rightmost>, returning the right-most possible
 2397 insertion point which still leaves the ndarray sorted.
 2398 
 2399 =item C<match>
 2400 
 2401 invoke L<B<vsearch_match>|/vsearch_match>, returning the index of a matching element,
 2402 else -(insertion point + 1)
 2403 
 2404 =item C<bin_inclusive>
 2405 
 2406 invoke L<B<vsearch_bin_inclusive>|/vsearch_bin_inclusive>, returning an index appropriate for binning
 2407 on a grid where the left bin edges are I<inclusive> of the bin. See
 2408 below for further explanation of the bin.
 2409 
 2410 =item C<bin_exclusive>
 2411 
 2412 invoke L<B<vsearch_bin_exclusive>|/vsearch_bin_exclusive>, returning an index appropriate for binning
 2413 on a grid where the left bin edges are I<exclusive> of the bin. See
 2414 below for further explanation of the bin.
 2415 
 2416 =back
 2417 
 2418 The default value of C<mode> is C<sample>.
 2419 
 2420 =for example
 2421 
 2422   use PDL;
 2423   
 2424   my @modes = qw( sample insert_leftmost insert_rightmost match
 2425                   bin_inclusive bin_exclusive );
 2426   
 2427   # Generate a sequence of 3 zeros, 3 ones, ..., 3 fours.
 2428   my $x = zeroes(3,5)->yvals->flat;
 2429   
 2430   for my $mode ( @modes ) {
 2431     # if the value is in $x
 2432     my $contained = 2;
 2433     my $idx_contained = vsearch( $contained, $x, { mode => $mode } );
 2434     my $x_contained = $x->copy;
 2435     $x_contained->slice( $idx_contained ) .= 9;
 2436     
 2437     # if the value is not in $x
 2438     my $not_contained = 1.5;
 2439     my $idx_not_contained = vsearch( $not_contained, $x, { mode => $mode } );
 2440     my $x_not_contained = $x->copy;
 2441     $x_not_contained->slice( $idx_not_contained ) .= 9;
 2442     
 2443     print sprintf("%-23s%30s\n", '$x', $x);
 2444     print sprintf("%-23s%30s\n",   "$mode ($contained)", $x_contained);
 2445     print sprintf("%-23s%30s\n\n", "$mode ($not_contained)", $x_not_contained);
 2446   }
 2447   
 2448   # $x                     [0 0 0 1 1 1 2 2 2 3 3 3 4 4 4]
 2449   # sample (2)             [0 0 0 1 1 1 9 2 2 3 3 3 4 4 4]
 2450   # sample (1.5)           [0 0 0 1 1 1 9 2 2 3 3 3 4 4 4]
 2451   # 
 2452   # $x                     [0 0 0 1 1 1 2 2 2 3 3 3 4 4 4]
 2453   # insert_leftmost (2)    [0 0 0 1 1 1 9 2 2 3 3 3 4 4 4]
 2454   # insert_leftmost (1.5)  [0 0 0 1 1 1 9 2 2 3 3 3 4 4 4]
 2455   # 
 2456   # $x                     [0 0 0 1 1 1 2 2 2 3 3 3 4 4 4]
 2457   # insert_rightmost (2)   [0 0 0 1 1 1 2 2 2 9 3 3 4 4 4]
 2458   # insert_rightmost (1.5) [0 0 0 1 1 1 9 2 2 3 3 3 4 4 4]
 2459   # 
 2460   # $x                     [0 0 0 1 1 1 2 2 2 3 3 3 4 4 4]
 2461   # match (2)              [0 0 0 1 1 1 2 9 2 3 3 3 4 4 4]
 2462   # match (1.5)            [0 0 0 1 1 1 2 2 9 3 3 3 4 4 4]
 2463   # 
 2464   # $x                     [0 0 0 1 1 1 2 2 2 3 3 3 4 4 4]
 2465   # bin_inclusive (2)      [0 0 0 1 1 1 2 2 9 3 3 3 4 4 4]
 2466   # bin_inclusive (1.5)    [0 0 0 1 1 9 2 2 2 3 3 3 4 4 4]
 2467   # 
 2468   # $x                     [0 0 0 1 1 1 2 2 2 3 3 3 4 4 4]
 2469   # bin_exclusive (2)      [0 0 0 1 1 9 2 2 2 3 3 3 4 4 4]
 2470   # bin_exclusive (1.5)    [0 0 0 1 1 9 2 2 2 3 3 3 4 4 4]
 2471 
 2472 
 2473 Also see
 2474 L<B<vsearch_sample>|/vsearch_sample>,
 2475 L<B<vsearch_insert_leftmost>|/vsearch_insert_leftmost>,
 2476 L<B<vsearch_insert_rightmost>|/vsearch_insert_rightmost>,
 2477 L<B<vsearch_match>|/vsearch_match>,
 2478 L<B<vsearch_bin_inclusive>|/vsearch_bin_inclusive>, and
 2479 L<B<vsearch_bin_exclusive>|/vsearch_bin_exclusive>
 2480 
 2481 =cut
 2482 
 2483 sub vsearch {
 2484     my $opt = 'HASH' eq ref $_[-1]
 2485             ? pop
 2486 	    : { mode => 'sample' };
 2487 
 2488     croak( "unknown options to vsearch\n" )
 2489 	if ( ! defined $opt->{mode} && keys %$opt )
 2490 	|| keys %$opt > 1;
 2491 
 2492     my $mode = $opt->{mode};
 2493     goto
 2494         $mode eq 'sample'           ? \&vsearch_sample
 2495       : $mode eq 'insert_leftmost'  ? \&vsearch_insert_leftmost
 2496       : $mode eq 'insert_rightmost' ? \&vsearch_insert_rightmost
 2497       : $mode eq 'match'            ? \&vsearch_match
 2498       : $mode eq 'bin_inclusive'    ? \&vsearch_bin_inclusive
 2499       : $mode eq 'bin_exclusive'    ? \&vsearch_bin_exclusive
 2500       :                               croak( "unknown vsearch mode: $mode\n" );
 2501 }
 2502 
 2503 *PDL::vsearch = \&vsearch;
 2504 
 2505 EOD
 2506 
 2507 use Text::Tabs qw[ expand ];
 2508 sub undent {
 2509     my $txt = expand( shift );
 2510 
 2511     $txt =~ s/^([ \t]+)-{4}.*$//m;
 2512     $txt =~ s/^$1//mg
 2513       if defined $1;
 2514     $txt;
 2515 }
 2516 
 2517 for my $func ( [
 2518         vsearch_sample => {
 2519             low  => -1,
 2520             high => '$SIZE(n)',
 2521 	    up   => '($x(n => n1) > $x(n => 0))',
 2522             code => q[
 2523                    while ( high - low > 1 ) {
 2524                        mid = %MID%;
 2525                        if ( ( value > $x(n => mid ) ) == up ) low = mid;
 2526                        else                                   high = mid;
 2527                    }
 2528                    $idx() = low >= n1 ? n1
 2529                          : up        ? low + 1
 2530                          : PDLMAX(low, 0);
 2531                    ----
 2532            ],
 2533             ref =>
 2534               'Search for values in a sorted array, return index appropriate for sampling from a distribution',
 2535 
 2536             doc_pre => q[
 2537 			 B<%FUNC%> returns an index I<I> for each value I<V> of C<$vals> appropriate
 2538                          for sampling C<$vals>
 2539 			 ----
 2540 			 ],
 2541             doc_incr => q[
 2542 				   V <= x[0]  : I = 0
 2543 			   x[0]  < V <= x[-1] : I s.t. x[I-1] < V <= x[I]
 2544 			   x[-1] < V          : I = $x->nelem -1
 2545 			 ----
 2546 			 ],
 2547 
 2548             doc_decr => q[
 2549 				    V > x[0]  : I = 0
 2550 			   x[0]  >= V > x[-1] : I s.t. x[I] >= V > x[I+1]
 2551 			   x[-1] >= V         : I = $x->nelem - 1
 2552 			 ----
 2553 			 ],
 2554 
 2555             doc_post => q[
 2556 			  If all elements of C<$x> are equal, I<< I = $x->nelem - 1 >>.
 2557 
 2558 			  If C<$x> contains duplicated elements, I<I> is the index of the
 2559 			  leftmost (by position in array) duplicate if I<V> matches.
 2560 
 2561 			  =for example
 2562 
 2563 			  This function is useful e.g. when you have a list of probabilities
 2564 			  for events and want to generate indices to events:
 2565 
 2566 			   $x = pdl(.01,.86,.93,1); # Barnsley IFS probabilities cumulatively
 2567 			   $y = random 20;
 2568 			   $c = %FUNC%($y, $x); # Now, $c will have the appropriate distr.
 2569 
 2570 			  It is possible to use the L<cumusumover|PDL::Ufunc/cumusumover>
 2571 			  function to obtain cumulative probabilities from absolute probabilities.
 2572 			  ----
 2573 			  ],
 2574 
 2575         },
 2576     ],
 2577 
 2578     [
 2579         # return left-most possible insertion point.
 2580         # lowest index where x[i] >= value
 2581         vsearch_insert_leftmost => {
 2582             low  => 0,
 2583             high => 'n1',
 2584             code => q[
 2585 		    while (low <= high ) {
 2586                         mid = %MID%;
 2587 			if ( ( $x(n => mid) >= value ) == up ) high = mid - 1;
 2588 			else                                   low  = mid + 1;
 2589 		    }
 2590 		    $idx() = up ? low : high;
 2591 	    ],
 2592             ref =>
 2593               'Determine the insertion point for values in a sorted array, inserting before duplicates.',
 2594 
 2595             doc_pre => q[
 2596 			 B<%FUNC%> returns an index I<I> for each value I<V> of
 2597 			 C<$vals> equal to the leftmost position (by index in array) within
 2598 			 C<$x> that I<V> may be inserted and still maintain the order in
 2599 			 C<$x>.
 2600 
 2601 			 Insertion at index I<I> involves shifting elements I<I> and higher of
 2602 			 C<$x> to the right by one and setting the now empty element at index
 2603 			 I<I> to I<V>.
 2604 			 ----
 2605 			 ],
 2606             doc_incr => q[
 2607 				   V <= x[0]  : I = 0
 2608 			   x[0]  < V <= x[-1] : I s.t. x[I-1] < V <= x[I]
 2609 			   x[-1] < V          : I = $x->nelem
 2610 			 ----
 2611 			 ],
 2612 
 2613             doc_decr => q[
 2614 				    V >  x[0]  : I = -1
 2615 			   x[0]  >= V >= x[-1] : I s.t. x[I] >= V > x[I+1]
 2616 			   x[-1] >= V          : I = $x->nelem -1
 2617 			 ----
 2618 			 ],
 2619 
 2620             doc_post => q[
 2621 			  If all elements of C<$x> are equal,
 2622 
 2623 			      i = 0
 2624 
 2625 			  If C<$x> contains duplicated elements, I<I> is the index of the
 2626 			  leftmost (by index in array) duplicate if I<V> matches.
 2627 			  ----
 2628 			  ],
 2629 
 2630         },
 2631     ],
 2632 
 2633     [
 2634         # return right-most possible insertion point.
 2635         # lowest index where x[i] > value
 2636         vsearch_insert_rightmost => {
 2637             low  => 0,
 2638             high => 'n1',
 2639             code => q[
 2640 		   while (low <= high ) {
 2641 		       mid = %MID%;
 2642 		       if ( ( $x(n => mid) > value ) == up ) high = mid - 1;
 2643 		       else                                  low  = mid + 1;
 2644 		   }
 2645 		   $idx() = up ? low : high;
 2646             ],
 2647             ref =>
 2648               'Determine the insertion point for values in a sorted array, inserting after duplicates.',
 2649 
 2650             doc_pre => q[
 2651 			 B<%FUNC%> returns an index I<I> for each value I<V> of
 2652 			 C<$vals> equal to the rightmost position (by index in array) within
 2653 			 C<$x> that I<V> may be inserted and still maintain the order in
 2654 			 C<$x>.
 2655 
 2656 			 Insertion at index I<I> involves shifting elements I<I> and higher of
 2657 			 C<$x> to the right by one and setting the now empty element at index
 2658 			 I<I> to I<V>.
 2659 			 ----
 2660 			 ],
 2661             doc_incr => q[
 2662 				    V < x[0]  : I = 0
 2663 			   x[0]  <= V < x[-1] : I s.t. x[I-1] <= V < x[I]
 2664 			   x[-1] <= V         : I = $x->nelem
 2665 			 ----
 2666 			 ],
 2667 
 2668             doc_decr => q[
 2669 				   V >= x[0]  : I = -1
 2670 			   x[0]  > V >= x[-1] : I s.t. x[I] >= V > x[I+1]
 2671 			   x[-1] > V          : I = $x->nelem -1
 2672 			 ----
 2673 			 ],
 2674 
 2675             doc_post => q[
 2676 			  If all elements of C<$x> are equal,
 2677 
 2678 			      i = $x->nelem - 1
 2679 
 2680 			  If C<$x> contains duplicated elements, I<I> is the index of the
 2681 			  leftmost (by index in array) duplicate if I<V> matches.
 2682 			  ----
 2683 			  ],
 2684 
 2685         },
 2686 
 2687     ],
 2688     [
 2689         # return index of matching element, else -( insertion point + 1 )
 2690         # patterned after the Java binarySearch interface; see
 2691         # http://docs.oracle.com/javase/7/docs/api/java/util/Arrays.html
 2692         vsearch_match => {
 2693             low  => 0,
 2694             high => 'n1',
 2695             code => q[
 2696                    int done = 0;
 2697 
 2698                    while (low <= high ) {
 2699                        $GENERIC() mid_value;
 2700 
 2701                        mid = %MID%;
 2702 
 2703                        mid_value = $x(n=>mid);
 2704 
 2705                        if ( up ) {
 2706 			   if      ( mid_value > value ) { high = mid - 1; }
 2707 			   else if ( mid_value < value ) { low  = mid + 1; }
 2708 			   else                          { done = 1; break; }
 2709                        }
 2710                        else {
 2711 			   if      ( mid_value < value ) { high = mid - 1; }
 2712 			   else if ( mid_value > value ) { low  = mid + 1; }
 2713 			   else                          { done = 1; break; }
 2714                        }
 2715                    }
 2716                    $idx() = done ? mid
 2717                          :   up ? - ( low  + 1 )
 2718                          :        - ( high + 1 );
 2719 	       ],
 2720             ref => 'Match values against a sorted array.',
 2721 
 2722             doc_pre => q[
 2723 			 B<%FUNC%> returns an index I<I> for each value I<V> of
 2724 			 C<$vals>.  If I<V> matches an element in C<$x>, I<I> is the
 2725 			 index of that element, otherwise it is I<-( insertion_point + 1 )>,
 2726 			 where I<insertion_point> is an index in C<$x> where I<V> may be
 2727 			 inserted while maintaining the order in C<$x>.  If C<$x> has
 2728 			 duplicated values, I<I> may refer to any of them.
 2729 			 ----
 2730 			 ],
 2731 
 2732         },
 2733     ],
 2734     [
 2735         # x[i] is the INclusive left edge of the bin
 2736         # return i, s.t. x[i] <= value < x[i+1].
 2737         # returns -1 if x[0] > value
 2738         # returns N-1 if x[-1] <= value
 2739         vsearch_bin_inclusive => {
 2740             low  => 0,
 2741             high => 'n1',
 2742             code => q[
 2743                    while (low <= high ) {
 2744                        mid = %MID%;
 2745                        if ( ( $x(n => mid) <= value ) == up ) low  = mid + 1;
 2746                        else                                   high = mid - 1;
 2747                    }
 2748                    $idx() = up ? high: low;
 2749             ],
 2750             ref =>
 2751               'Determine the index for values in a sorted array of bins, lower bound inclusive.',
 2752 
 2753             doc_pre => q[
 2754 			 C<$x> represents the edges of contiguous bins, with the first and
 2755 			 last elements representing the outer edges of the outer bins, and the
 2756 			 inner elements the shared bin edges.
 2757 
 2758 			 The lower bound of a bin is inclusive to the bin, its outer bound is exclusive to it.
 2759                          B<%FUNC%> returns an index I<I> for each value I<V> of C<$vals>
 2760 			 ----
 2761 			 ],
 2762             doc_incr => q[
 2763 				    V < x[0]  : I = -1
 2764 			   x[0]  <= V < x[-1] : I s.t. x[I] <= V < x[I+1]
 2765 			   x[-1] <= V         : I = $x->nelem - 1
 2766 			 ----
 2767 			 ],
 2768 
 2769             doc_decr => q[
 2770 				    V >= x[0]  : I = 0
 2771 			   x[0]  >  V >= x[-1] : I s.t. x[I+1] > V >= x[I]
 2772 			   x[-1] >  V          : I = $x->nelem
 2773 			 ----
 2774 			 ],
 2775 
 2776             doc_post => q[
 2777 			  If all elements of C<$x> are equal,
 2778 
 2779 			      i = $x->nelem - 1
 2780 
 2781 			  If C<$x> contains duplicated elements, I<I> is the index of the
 2782 			  righmost (by index in array) duplicate if I<V> matches.
 2783 			  ----
 2784 			  ],
 2785 
 2786         },
 2787     ],
 2788 
 2789     [
 2790         # x[i] is the EXclusive left edge of the bin
 2791         # return i, s.t. x[i] < value <= x[i+1].
 2792         # returns -1 if x[0] >= value
 2793         # returns N-1 if x[-1] < value
 2794         vsearch_bin_exclusive => {
 2795             low  => 0,
 2796             high => 'n1',
 2797             code => q[
 2798                    while (low <= high ) {
 2799                        mid = %MID%;
 2800                        if ( ( $x(n => mid) <  value ) == up ) low  = mid + 1;
 2801                        else                                   high = mid - 1;
 2802                    }
 2803                    $idx() = up ? high: low;
 2804             ],
 2805             ref =>
 2806               'Determine the index for values in a sorted array of bins, lower bound exclusive.',
 2807 
 2808             doc_pre => q[
 2809 			 C<$x> represents the edges of contiguous bins, with the first and
 2810 			 last elements representing the outer edges of the outer bins, and the
 2811 			 inner elements the shared bin edges.
 2812 
 2813 			 The lower bound of a bin is exclusive to the bin, its upper bound is inclusive to it.
 2814 			 B<%FUNC%> returns an index I<I> for each value I<V> of C<$vals>.
 2815 			 ----
 2816 			 ],
 2817             doc_incr => q[
 2818 				    V <= x[0]  : I = -1
 2819 			   x[0]  <  V <= x[-1] : I s.t. x[I] < V <= x[I+1]
 2820 			   x[-1] <  V          : I = $x->nelem - 1
 2821 			 ----
 2822 			 ],
 2823 
 2824             doc_decr => q[
 2825 				    V >  x[0]  : I = 0
 2826 			   x[0]  >= V >  x[-1] : I s.t. x[I-1] >= V > x[I]
 2827 			   x[-1] >= V          : I = $x->nelem
 2828 			 ----
 2829 			 ],
 2830 
 2831             doc_post => q[
 2832 			  If all elements of C<$x> are equal,
 2833 
 2834 			      i = $x->nelem - 1
 2835 
 2836 			  If C<$x> contains duplicated elements, I<I> is the index of the
 2837 			  righmost (by index in array) duplicate if I<V> matches.
 2838 			  ----
 2839 			  ],
 2840 
 2841         }
 2842     ],
 2843 
 2844   )
 2845 
 2846 {
 2847     my ( $func, $algo ) = @$func;
 2848 
 2849     my %replace = (
 2850         # calculate midpoint of range; ensure we don't overflow
 2851         # (low+high)>>1 for large values of low + high
 2852 	# see sf.net bug #360
 2853         '%MID%'  => 'low + (( high - low )>> 1);',
 2854 
 2855 	# determine which way the data are sorted.  vsearch_sample
 2856 	# overrides this.
 2857         '%UP%' => '$x(n => n1) >= $x(n => 0)',
 2858 
 2859         '%FUNC%' => $func,
 2860 
 2861         '%PRE%' => undent(
 2862             q[
 2863 		    %DOC_PRE%
 2864                     ----
 2865 		   ]
 2866         ),
 2867 
 2868 
 2869 
 2870         '%BODY%' => undent(
 2871             q[
 2872 		   I<I> has the following properties:
 2873 
 2874 		   =over
 2875 
 2876 		   =item *
 2877 
 2878 		   if C<$x> is sorted in increasing order
 2879 
 2880 		   %DOC_INCR%
 2881 
 2882 		   =item *
 2883 
 2884 		   if C<$x> is sorted in decreasing order
 2885 
 2886 		   %DOC_DECR%
 2887 
 2888 		   =back
 2889 		   ----
 2890                    ]
 2891         ),
 2892 
 2893         '%POST%' => undent(
 2894             q[
 2895                    %DOC_POST%
 2896 		   ----
 2897                    ]
 2898         ),
 2899 
 2900         map { ( "%\U$_%" => undent( $algo->{$_} ) ) } keys %$algo,
 2901     );
 2902 
 2903     $replace{'%PRE%'} = '' unless defined $replace{'%DOC_PRE%'};
 2904     $replace{'%BODY%'} = ''
 2905       unless defined $replace{'%DOC_INCR%'} || defined $replace{'%DOC_DECR%'};
 2906     $replace{'%POST%'} = '' unless defined $replace{'%DOC_POST%'};
 2907 
 2908 
 2909     my $code = undent q[
 2910                    PDL_Indx n1 = $SIZE(n)-1;
 2911                    PDL_Indx low = %LOW%;
 2912                    PDL_Indx high = %HIGH%;
 2913                    PDL_Indx mid;
 2914 
 2915 		   $GENERIC() value = $vals();
 2916 
 2917                    /* determine sort order of data */
 2918                    int up = %UP%;
 2919                    %CODE%
 2920                    ----
 2921                ];
 2922 
 2923     my $doc = undent q[
 2924 		   =for ref
 2925 
 2926 		   %REF%
 2927 
 2928 		   =for usage
 2929 
 2930 		     $idx = %FUNC%($vals, $x);
 2931 
 2932 		   C<$x> must be sorted, but may be in decreasing or increasing
 2933 		   order.
 2934 
 2935                    %PRE%
 2936 		   %BODY%
 2937 		   %POST%
 2938                    ----
 2939 		   ];
 2940 
 2941 
 2942     # redo until nothing changes
 2943     for my $tref ( \$code, \$doc ) {
 2944         1 while $$tref =~ s/(%[\w_]+%)/$replace{$1}/ge;
 2945     }
 2946 
 2947     pp_def(
 2948         $func,
 2949         HandleBad    => 0,
 2950         BadDoc       => 'needs major (?) work to handles bad values',
 2951         Pars         => 'vals(); x(n); indx [o]idx()',
 2952         GenericTypes => $F,    # too restrictive ?
 2953         Code         => $code,
 2954 	Doc          => $doc,
 2955     );
 2956 }
 2957 
 2958 ###############################################################
 2959 # routines somehow related to interpolation
 2960 ###############################################################
 2961 
 2962 pp_def('interpolate',
 2963        HandleBad => 0,
 2964        BadDoc => 'needs major (?) work to handles bad values',
 2965 	Pars => 'xi(); x(n); y(n); [o] yi(); int [o] err()',
 2966 	GenericTypes => $F, # too restrictive ?
 2967 	Code => '
 2968 		 $GENERIC() d;
 2969 		 PDL_Indx n  = $SIZE(n);
 2970 		 PDL_Indx n1 = n-1;
 2971 		 int up = ($x(n => n1) > $x(n => 0));
 2972                  PDL_Indx jl, jh, m;
 2973                  int carp;
 2974 
 2975                  broadcastloop %{
 2976                    jl = -1;
 2977                    jh = n;
 2978                    carp = 0;
 2979   		   while (jh-jl > 1)  /* binary search */
 2980     			{
 2981       				m = (jh+jl) >> 1;
 2982 				if (($xi() > $x(n => m)) == up)
 2983 					jl = m;
 2984       				else
 2985 					jh = m;
 2986     			}
 2987 		   if (jl == -1) {
 2988 			if ($xi() != $x(n => 0)) carp = 1;
 2989 			jl = 0;
 2990                    } else if (jh == n) {
 2991 			if ($xi() != $x(n => n1)) carp = 1;
 2992 			jl = n1-1;
 2993 		   }
 2994 		   jh = jl+1;
 2995 		   if ((d = $x(n => jh)-$x(n => jl)) == 0)
 2996 			$CROAK("identical abscissas");
 2997 		   d = ($x(n => jh)-$xi())/d;
 2998 		   $yi() = d*$y(n => jl) + (1-d)*$y(n => jh);
 2999                    $err() = carp;
 3000 		%}
 3001 ', Doc=><<'EOD');
 3002 
 3003 =for ref
 3004 
 3005 routine for 1D linear interpolation
 3006 
 3007 =for usage
 3008 
 3009  ( $yi, $err ) = interpolate($xi, $x, $y)
 3010 
 3011 Given a set of points C<($x,$y)>, use linear interpolation
 3012 to find the values C<$yi> at a set of points C<$xi>.
 3013 
 3014 C<interpolate> uses a binary search to find the suspects, er...,
 3015 interpolation indices and therefore abscissas (ie C<$x>)
 3016 have to be I<strictly> ordered (increasing or decreasing).
 3017 For interpolation at lots of
 3018 closely spaced abscissas an approach that uses the last index found as
 3019 a start for the next search can be faster (compare Numerical Recipes
 3020 C<hunt> routine). Feel free to implement that on top of the binary
 3021 search if you like. For out of bounds values it just does a linear
 3022 extrapolation and sets the corresponding element of C<$err> to 1,
 3023 which is otherwise 0.
 3024 
 3025 See also L</interpol>, which uses the same routine,
 3026 differing only in the handling of extrapolation - an error message
 3027 is printed rather than returning an error ndarray.
 3028 
 3029 =cut
 3030 
 3031 EOD
 3032 
 3033 pp_add_exported('', 'interpol');
 3034 pp_addpm(<<'EOD');
 3035 =head2 interpol
 3036 
 3037 =for sig
 3038 
 3039  Signature: (xi(); x(n); y(n); [o] yi())
 3040 
 3041 =for ref
 3042 
 3043 routine for 1D linear interpolation
 3044 
 3045 =for usage
 3046 
 3047  $yi = interpol($xi, $x, $y)
 3048 
 3049 C<interpol> uses the same search method as L</interpolate>,
 3050 hence C<$x> must be I<strictly> ordered (either increasing or decreasing).
 3051 The difference occurs in the handling of out-of-bounds values; here
 3052 an error message is printed.
 3053 
 3054 =cut
 3055 
 3056 # kept in for backwards compatability
 3057 sub interpol ($$$;$) {
 3058     my $xi = shift;
 3059     my $x  = shift;
 3060     my $y  = shift;
 3061     my $yi = @_ == 1 ? $_[0] : PDL->null;
 3062     interpolate( $xi, $x, $y, $yi, my $err = PDL->null );
 3063     print "some values had to be extrapolated\n"
 3064 	if any $err;
 3065     return $yi if @_ == 0;
 3066 } # sub: interpol()
 3067 *PDL::interpol = \&interpol;
 3068 
 3069 EOD
 3070 
 3071 pp_add_exported('','interpND');
 3072 pp_addpm(<<'EOD');
 3073 =head2 interpND
 3074 
 3075 =for ref
 3076 
 3077 Interpolate values from an N-D ndarray, with switchable method
 3078 
 3079 =for example
 3080 
 3081   $source = 10*xvals(10,10) + yvals(10,10);
 3082   $index = pdl([[2.2,3.5],[4.1,5.0]],[[6.0,7.4],[8,9]]);
 3083   print $source->interpND( $index );
 3084 
 3085 InterpND acts like L<indexND|PDL::Slices/indexND>,
 3086 collapsing C<$index> by lookup
 3087 into C<$source>; but it does interpolation rather than direct sampling.
 3088 The interpolation method and boundary condition are switchable via
 3089 an options hash.
 3090 
 3091 By default, linear or sample interpolation is used, with constant
 3092 value outside the boundaries of the source pdl.  No dataflow occurs,
 3093 because in general the output is computed rather than indexed.
 3094 
 3095 All the interpolation methods treat the pixels as value-centered, so
 3096 the C<sample> method will return C<< $a->(0) >> for coordinate values on
 3097 the set [-0.5,0.5), and all methods will return C<< $a->(1) >> for
 3098 a coordinate value of exactly 1.
 3099 
 3100 
 3101 Recognized options:
 3102 
 3103 =over 3
 3104 
 3105 =item method
 3106 
 3107 Values can be:
 3108 
 3109 =over 3
 3110 
 3111 =item * 0, s, sample, Sample (default for integer source types)
 3112 
 3113 The nearest value is taken. Pixels are regarded as centered on their
 3114 respective integer coordinates (no offset from the linear case).
 3115 
 3116 =item * 1, l, linear, Linear (default for floating point source types)
 3117 
 3118 The values are N-linearly interpolated from an N-dimensional cube of size 2.
 3119 
 3120 =item * 3, c, cube, cubic, Cubic
 3121 
 3122 The values are interpolated using a local cubic fit to the data.  The
 3123 fit is constrained to match the original data and its derivative at the
 3124 data points.  The second derivative of the fit is not continuous at the
 3125 data points.  Multidimensional datasets are interpolated by the
 3126 successive-collapse method.
 3127 
 3128 (Note that the constraint on the first derivative causes a small amount
 3129 of ringing around sudden features such as step functions).
 3130 
 3131 =item * f, fft, fourier, Fourier
 3132 
 3133 The source is Fourier transformed, and the interpolated values are
 3134 explicitly calculated from the coefficients.  The boundary condition
 3135 option is ignored -- periodic boundaries are imposed.
 3136 
 3137 If you pass in the option "fft", and it is a list (ARRAY) ref, then it
 3138 is a stash for the magnitude and phase of the source FFT.  If the list
 3139 has two elements then they are taken as already computed; otherwise
 3140 they are calculated and put in the stash.
 3141 
 3142 =back
 3143 
 3144 =item b, bound, boundary, Boundary
 3145 
 3146 This option is passed unmodified into L<indexND|PDL::Slices/indexND>,
 3147 which is used as the indexing engine for the interpolation.
 3148 Some current allowed values are 'extend', 'periodic', 'truncate', and 'mirror'
 3149 (default is 'truncate').
 3150 
 3151 =item bad
 3152 
 3153 contains the fill value used for 'truncate' boundary.  (default 0)
 3154 
 3155 =item fft
 3156 
 3157 An array ref whose associated list is used to stash the FFT of the source
 3158 data, for the FFT method.
 3159 
 3160 =back
 3161 
 3162 =cut
 3163 
 3164 *interpND = *PDL::interpND;
 3165 sub PDL::interpND {
 3166   my $source = shift;
 3167   my $index = shift;
 3168   my $options = shift;
 3169 
 3170   barf 'Usage: interp_nd($source,$index,[{%options}])\n'
 3171     if(defined $options   and    ref $options ne 'HASH');
 3172 
 3173   my($opt) = (defined $options) ? $options : {};
 3174 
 3175   my($method)   = $opt->{m} || $opt->{meth} || $opt->{method} || $opt->{Method};
 3176   $method //= $source->type->integer ? 'sample' : 'linear';
 3177 
 3178   my($boundary) = $opt->{b} || $opt->{boundary} || $opt->{Boundary} || $opt->{bound} || $opt->{Bound} || 'extend';
 3179   my($bad) = $opt->{bad} || $opt->{Bad} || 0.0;
 3180 
 3181   if($method =~ m/^s(am(p(le)?)?)?/i) {
 3182     return $source->range(PDL::Math::floor($index+0.5),0,$boundary);
 3183   }
 3184 
 3185   elsif (($method eq 1) || $method =~ m/^l(in(ear)?)?/i) {
 3186     ## key: (ith = index broadcast; cth = cube broadcast; sth = source broadcast)
 3187     my $d = $index->dim(0);
 3188     my $di = $index->ndims - 1;
 3189 
 3190     # Grab a 2-on-a-side n-cube around each desired pixel
 3191     my $samp = $source->range($index->floor,2,$boundary); # (ith, cth, sth)
 3192 
 3193     # Reorder to put the cube dimensions in front and convert to a list
 3194     $samp = $samp->reorder( $di .. $di+$d-1,
 3195 			    0 .. $di-1,
 3196 			    $di+$d .. $samp->ndims-1) # (cth, ith, sth)
 3197                   ->clump($d); # (clst, ith, sth)
 3198 
 3199     # Enumerate the corners of an n-cube and convert to a list
 3200     # (the 'x' is the normal perl repeat operator)
 3201     my $crnr = PDL::Basic::ndcoords( (2) x $index->dim(0) ) # (index,cth)
 3202              ->mv(0,-1)->clump($index->dim(0))->mv(-1,0); # (index, clst)
 3203 
 3204     # a & b are the weighting coefficients.
 3205     my($x,$y);
 3206     my($indexwhere);
 3207     ($indexwhere = $index->where( 0 * $index )) .= -10; # Change NaN to invalid
 3208     {
 3209       my $bb = PDL::Math::floor($index);
 3210       $x = ($index - $bb)     -> dummy(1,$crnr->dim(1)); # index, clst, ith
 3211       $y = ($bb + 1 - $index) -> dummy(1,$crnr->dim(1)); # index, clst, ith
 3212     }
 3213 
 3214     # Use 1/0 corners to select which multiplier happens, multiply
 3215     # 'em all together to get sample weights, and sum to get the answer.
 3216     my $out0 =  ( ($x * ($crnr==1) + $y * ($crnr==0)) #index, clst, ith
 3217 		 -> prodover                          #clst, ith
 3218 		 );
 3219 
 3220     my $out = ($out0 * $samp)->sumover; # ith, sth
 3221 
 3222     # Work around BAD-not-being-contagious bug in PDL <= 2.6 bad handling code  --CED 3-April-2013
 3223     if ($source->badflag) {
 3224 	my $baddies = $samp->isbad->orover;
 3225 	$out = $out->setbadif($baddies);
 3226     }
 3227 
 3228     return $out;
 3229 
 3230   } elsif(($method eq 3) || $method =~ m/^c(u(b(e|ic)?)?)?/i) {
 3231 
 3232       my ($d,@di) = $index->dims;
 3233       my $di = $index->ndims - 1;
 3234 
 3235       # Grab a 4-on-a-side n-cube around each desired pixel
 3236       my $samp = $source->range($index->floor - 1,4,$boundary) #ith, cth, sth
 3237 	  ->reorder( $di .. $di+$d-1, 0..$di-1, $di+$d .. $source->ndims-1 );
 3238 	                   # (cth, ith, sth)
 3239 
 3240       # Make a cube of the subpixel offsets, and expand its dims to
 3241       # a 4-on-a-side N-1 cube, to match the slices of $samp (used below).
 3242       my $y = $index - $index->floor;
 3243       for my $i(1..$d-1) {
 3244 	  $y = $y->dummy($i,4);
 3245       }
 3246 
 3247       # Collapse by interpolation, one dimension at a time...
 3248       for my $i(0..$d-1) {
 3249 	  my $a0 = $samp->slice("(1)");    # Just-under-sample
 3250 	  my $a1 = $samp->slice("(2)");    # Just-over-sample
 3251 	  my $a1a0 = $a1 - $a0;
 3252 
 3253 	  my $gradient = 0.5 * ($samp->slice("2:3")-$samp->slice("0:1"));
 3254 	  my $s0 = $gradient->slice("(0)");   # Just-under-gradient
 3255 	  my $s1 = $gradient->slice("(1)");   # Just-over-gradient
 3256 
 3257 	  my $bb = $y->slice("($i)");
 3258 
 3259 	  # Collapse the sample...
 3260 	  $samp = ( $a0 +
 3261 		    $bb * (
 3262 			   $s0  +
 3263 			   $bb * ( (3 * $a1a0 - 2*$s0 - $s1) +
 3264 				   $bb * ( $s1 + $s0 - 2*$a1a0 )
 3265 				   )
 3266 			   )
 3267 		    );
 3268 
 3269 	  # "Collapse" the subpixel offset...
 3270 	  $y = $y->slice(":,($i)");
 3271       }
 3272 
 3273       return $samp;
 3274 
 3275   } elsif($method =~ m/^f(ft|ourier)?/i) {
 3276 
 3277      local $@;
 3278      eval "use PDL::FFT;";
 3279      my $fftref = $opt->{fft};
 3280      $fftref = [] unless(ref $fftref eq 'ARRAY');
 3281      if(@$fftref != 2) {
 3282 	 my $x = $source->copy;
 3283 	 my $y = zeroes($source);
 3284 	 fftnd($x,$y);
 3285 	 $fftref->[0] = sqrt($x*$x+$y*$y) / $x->nelem;
 3286 	 $fftref->[1] = - atan2($y,$x);
 3287      }
 3288 
 3289      my $i;
 3290      my $c = PDL::Basic::ndcoords($source);               # (dim, source-dims)
 3291      for $i(1..$index->ndims-1) {
 3292 	 $c = $c->dummy($i,$index->dim($i))
 3293      }
 3294      my $id = $index->ndims-1;
 3295      my $phase = (($c * $index * 3.14159 * 2 / pdl($source->dims))
 3296 		  ->sumover) # (index-dims, source-dims)
 3297  	          ->reorder($id..$id+$source->ndims-1,0..$id-1); # (src, index)
 3298 
 3299      my $phref = $fftref->[1]->copy;        # (source-dims)
 3300      my $mag = $fftref->[0]->copy;          # (source-dims)
 3301 
 3302      for $i(1..$index->ndims-1) {
 3303 	 $phref = $phref->dummy(-1,$index->dim($i));
 3304 	 $mag = $mag->dummy(-1,$index->dim($i));
 3305      }
 3306      my $out = cos($phase + $phref ) * $mag;
 3307      $out = $out->clump($source->ndims)->sumover;
 3308 
 3309      return $out;
 3310  }  else {
 3311      barf("interpND: unknown method '$method'; valid ones are 'linear' and 'sample'.\n");
 3312  }
 3313 }
 3314 
 3315 EOD
 3316 
 3317 ##############################################################
 3318 # things related to indexing: one2nd, which, where
 3319 ##############################################################
 3320 
 3321 pp_add_exported("", 'one2nd');
 3322 pp_addpm(<<'EOD');
 3323 =head2 one2nd
 3324 
 3325 =for ref
 3326 
 3327 Converts a one dimensional index ndarray to a set of ND coordinates
 3328 
 3329 =for usage
 3330 
 3331  @coords=one2nd($x, $indices)
 3332 
 3333 returns an array of ndarrays containing the ND indexes corresponding to
 3334 the one dimensional list indices. The indices are assumed to
 3335 correspond to array C<$x> clumped using C<clump(-1)>. This routine is
 3336 used in the old vector form of L</whichND>, but is useful on
 3337 its own occasionally.
 3338 
 3339 Returned ndarrays have the L<indx|PDL::Core/indx> datatype.  C<$indices> can have
 3340 values larger than C<< $x->nelem >> but negative values in C<$indices>
 3341 will not give the answer you expect.
 3342 
 3343 =for example
 3344 
 3345  pdl> $x=pdl [[[1,2],[-1,1]], [[0,-3],[3,2]]]; $c=$x->clump(-1)
 3346  pdl> $maxind=maximum_ind($c); p $maxind;
 3347  6
 3348  pdl> print one2nd($x, maximum_ind($c))
 3349  0 1 1
 3350  pdl> p $x->at(0,1,1)
 3351  3
 3352 
 3353 =cut
 3354 
 3355 *one2nd = \&PDL::one2nd;
 3356 sub PDL::one2nd {
 3357   barf "Usage: one2nd \$array \$indices\n" if @_ != 2;
 3358   my ($x, $ind)=@_;
 3359   my @dimension=$x->dims;
 3360   $ind = indx($ind);
 3361   my(@index);
 3362   my $count=0;
 3363   foreach (@dimension) {
 3364     $index[$count++]=$ind % $_;
 3365     $ind /= $_;
 3366   }
 3367   return @index;
 3368 }
 3369 
 3370 EOD
 3371 
 3372 my $doc_which = <<'EOD';
 3373 
 3374 =for ref
 3375 
 3376 Returns indices of non-zero values from a 1-D PDL
 3377 
 3378 =for usage
 3379 
 3380  $i = which($mask);
 3381 
 3382 returns a pdl with indices for all those elements that are nonzero in
 3383 the mask. Note that the returned indices will be 1D. If you feed in a
 3384 multidimensional mask, it will be flattened before the indices are
 3385 calculated.  See also L</whichND> for multidimensional masks.
 3386 
 3387 If you want to index into the original mask or a similar ndarray
 3388 with output from C<which>, remember to flatten it before calling index:
 3389 
 3390   $data = random 5, 5;
 3391   $idx = which $data > 0.5; # $idx is now 1D
 3392   $bigsum = $data->flat->index($idx)->sum;  # flatten before indexing
 3393 
 3394 Compare also L</where> for similar functionality.
 3395 
 3396 SEE ALSO:
 3397 
 3398 L</which_both> returns separately the indices of both nonzero and zero
 3399 values in the mask.
 3400 
 3401 L</where_both> returns separately slices of both nonzero and zero
 3402 values in the mask.
 3403 
 3404 L</where> returns associated values from a data PDL, rather than
 3405 indices into the mask PDL.
 3406 
 3407 L</whichND> returns N-D indices into a multidimensional PDL.
 3408 
 3409 =for example
 3410 
 3411  pdl> $x = sequence(10); p $x
 3412  [0 1 2 3 4 5 6 7 8 9]
 3413  pdl> $indx = which($x>6); p $indx
 3414  [7 8 9]
 3415 
 3416 =cut
 3417 
 3418 EOD
 3419 
 3420 my $doc_which_both = <<'EOD';
 3421 
 3422 =for ref
 3423 
 3424 Returns indices of nonzero and zero values in a mask PDL
 3425 
 3426 =for usage
 3427 
 3428  ($i, $c_i) = which_both($mask);
 3429 
 3430 This works just as L</which>, but the complement of C<$i> will be in
 3431 C<$c_i>.
 3432 
 3433 =for example
 3434 
 3435  pdl> p $x = sequence(10)
 3436  [0 1 2 3 4 5 6 7 8 9]
 3437  pdl> ($big, $small) = which_both($x >= 5); p "$big\n$small"
 3438  [5 6 7 8 9]
 3439  [0 1 2 3 4]
 3440 
 3441 =cut
 3442 
 3443 EOD
 3444 
 3445     for (
 3446 	 {Name=>'which',
 3447 	  Pars => 'mask(n); indx [o] inds(n); indx [o]lastout()',
 3448 	  Variables => 'PDL_Indx dm=0;',
 3449 	  Elseclause => "",
 3450 	  Outclause => '$lastout() = dm; while (dm < $SIZE(n)) $inds(n=>dm++) = -1;',
 3451 	  Doc => $doc_which,
 3452 	  PMCode=><<'EOD',
 3453    sub which { my ($this,$out) = @_;
 3454 		$this = $this->flat;
 3455 		$out //= $this->nullcreate;
 3456 		PDL::_which_int($this,$out,my $lastout = $this->nullcreate);
 3457 		my $lastoutmax = $lastout->max->sclr;
 3458 		$lastoutmax ? $out->slice('0:'.($lastoutmax-1))->sever : empty(indx);
 3459    }
 3460    *PDL::which = \&which;
 3461 EOD
 3462 	  },
 3463 	 {Name => 'which_both',
 3464 	  Pars => 'mask(n); indx [o] inds(n); indx [o]notinds(n); indx [o]lastout(); indx [o]lastoutn()',
 3465 	  Variables => 'PDL_Indx dm=0; int dm2=0;',
 3466 	  Elseclause => "else { \n          \$notinds(n => dm2)=n; \n           dm2++;\n     }",
 3467 	  Outclause => '$lastout() = dm; $lastoutn() = dm2; while (dm < $SIZE(n)) $inds(n=>dm++) = -1; while (dm2 < $SIZE(n)) $notinds(n=>dm2++) = -1;',
 3468 	  Doc => $doc_which_both,
 3469 	  PMCode=><<'EOD',
 3470    sub which_both { my ($this,$outi,$outni) = @_;
 3471 		$this = $this->flat;
 3472 		$outi //= $this->nullcreate;
 3473 		$outni //= $this->nullcreate;
 3474 		PDL::_which_both_int($this,$outi,$outni,my $lastout = $this->nullcreate,my $lastoutn = $this->nullcreate);
 3475 		my $lastoutmax = $lastout->max->sclr;
 3476 		$outi = $lastoutmax ? $outi->slice('0:'.($lastoutmax-1))->sever : empty(indx);
 3477 		return $outi if !wantarray;
 3478 		my $lastoutnmax = $lastoutn->max->sclr;
 3479 		($outi, $lastoutnmax ? $outni->slice('0:'.($lastoutnmax-1))->sever : empty(indx));
 3480    }
 3481    *PDL::which_both = \&which_both;
 3482 EOD
 3483 	  }
 3484 	 )
 3485 {
 3486     pp_def($_->{Name},
 3487 	   HandleBad => 1,
 3488 	   Doc => $_->{Doc},
 3489 	   Pars => $_->{Pars},
 3490 	   GenericTypes => [ppdefs_all],
 3491 	   PMCode => $_->{PMCode},
 3492 	   Code => $_->{Variables} .'
 3493                  loop(n) %{
 3494 			if ( $mask() PDL_IF_BAD(&& $ISGOOD($mask()),) ) {
 3495 				$inds(n => dm) = n;
 3496 				dm++;
 3497 			}'.$_->{Elseclause}.'
 3498 		 %}'.$_->{Outclause},
 3499     );
 3500 }
 3501 
 3502 pp_add_exported("", 'where');
 3503 pp_addpm(<<'EOD');
 3504 =head2 where
 3505 
 3506 =for ref
 3507 
 3508 Use a mask to select values from one or more data PDLs
 3509 
 3510 C<where> accepts one or more data ndarrays and a mask ndarray.  It
 3511 returns a list of output ndarrays, corresponding to the input data
 3512 ndarrays.  Each output ndarray is a 1-dimensional list of values in its
 3513 corresponding data ndarray. The values are drawn from locations where
 3514 the mask is nonzero.
 3515 
 3516 The output PDLs are still connected to the original data PDLs, for the
 3517 purpose of dataflow.
 3518 
 3519 C<where> combines the functionality of L</which> and L<index|PDL::Slices/index>
 3520 into a single operation.
 3521 
 3522 BUGS:
 3523 
 3524 While C<where> works OK for most N-dimensional cases, it does not
 3525 broadcast properly over (for example) the (N+1)th dimension in data
 3526 that is compared to an N-dimensional mask.  Use C<whereND> for that.
 3527 
 3528 =for usage
 3529 
 3530  $i = $x->where($x+5 > 0); # $i contains those elements of $x
 3531                            # where mask ($x+5 > 0) is 1
 3532  $i .= -5;  # Set those elements (of $x) to -5. Together, these
 3533             # commands clamp $x to a maximum of -5.
 3534 
 3535 It is also possible to use the same mask for several ndarrays with
 3536 the same call:
 3537 
 3538  ($i,$j,$k) = where($x,$y,$z, $x+5>0);
 3539 
 3540 Note: C<$i> is always 1-D, even if C<$x> is E<gt>1-D.
 3541 
 3542 WARNING: The first argument
 3543 (the values) and the second argument (the mask) currently have to have
 3544 the exact same dimensions (or horrible things happen). You *cannot*
 3545 broadcast over a smaller mask, for example.
 3546 
 3547 =cut
 3548 
 3549 sub PDL::where {
 3550     barf "Usage: where( \$pdl1, ..., \$pdlN, \$mask )\n" if @_ == 1;
 3551     if(@_ == 2) {
 3552 	my($data,$mask) = @_;
 3553 	$data = $_[0]->clump(-1) if $_[0]->getndims>1;
 3554 	$mask = $_[1]->clump(-1) if $_[0]->getndims>1;
 3555 	return $data->index($mask->which());
 3556     } else {
 3557 	if($_[-1]->getndims > 1) {
 3558 	    my $mask = $_[-1]->clump(-1)->which;
 3559 	    return map {$_->clump(-1)->index($mask)} @_[0..$#_-1];
 3560 	} else {
 3561 	    my $mask = $_[-1]->which;
 3562 	    return map {$_->index($mask)} @_[0..$#_-1];
 3563 	}
 3564     }
 3565 }
 3566 *where = \&PDL::where;
 3567 
 3568 EOD
 3569 
 3570 pp_add_exported("", 'where_both');
 3571 pp_addpm(<<'EOD');
 3572 =head2 where_both
 3573 
 3574 =for ref
 3575 
 3576 Returns slices (non-zero in mask, zero) of an ndarray according to a mask
 3577 
 3578 =for usage
 3579 
 3580  ($match_vals, $non_match_vals) = where_both($pdl, $mask);
 3581 
 3582 This works like L</which_both>, but (flattened) data-flowing slices
 3583 rather than index-sets are returned.
 3584 
 3585 =for example
 3586 
 3587  pdl> p $x = sequence(10) + 2
 3588  [2 3 4 5 6 7 8 9 10 11]
 3589  pdl> ($big, $small) = where_both($x, $x > 5); p "$big\n$small"
 3590  [6 7 8 9 10 11]
 3591  [2 3 4 5]
 3592  pdl> p $big += 2, $small -= 1
 3593  [8 9 10 11 12 13] [1 2 3 4]
 3594  pdl> p $x
 3595  [1 2 3 4 8 9 10 11 12 13]
 3596 
 3597 =cut
 3598 
 3599 sub PDL::where_both {
 3600   barf "Usage: where_both(\$pdl, \$mask)\n" if @_ != 2;
 3601   my ($arr, $mask) = @_;  # $mask has 0==false, 1==true
 3602   my $arr_flat = $arr->clump(-1);
 3603   map $arr_flat->index1d($_), PDL::which_both($mask);
 3604 }
 3605 *where_both = \&PDL::where_both;
 3606 EOD
 3607 
 3608 pp_add_exported("", 'whereND');
 3609 pp_addpm(<<'EOD');
 3610 =head2 whereND
 3611 
 3612 =for ref
 3613 
 3614 C<where> with support for ND masks and broadcasting
 3615 
 3616 C<whereND> accepts one or more data ndarrays and a
 3617 mask ndarray.  It returns a list of output ndarrays,
 3618 corresponding to the input data ndarrays.  The values
 3619 are drawn from locations where the mask is nonzero.
 3620 
 3621 C<whereND> differs from C<where> in that the mask
 3622 dimensionality is preserved which allows for
 3623 proper broadcasting of the selection operation over
 3624 higher dimensions.
 3625 
 3626 As with C<where> the output PDLs are still connected
 3627 to the original data PDLs, for the purpose of dataflow.
 3628 
 3629 =for usage
 3630 
 3631   $sdata = whereND $data, $mask
 3632   ($s1, $s2, ..., $sn) = whereND $d1, $d2, ..., $dn, $mask
 3633 
 3634   where
 3635 
 3636     $data is M dimensional
 3637     $mask is N < M dimensional
 3638     dims($data) 1..N == dims($mask) 1..N
 3639     with broadcasting over N+1 to M dimensions
 3640 
 3641 =for example
 3642 
 3643   $data   = sequence(4,3,2);   # example data array
 3644   $mask4  = (random(4)>0.5);   # example 1-D mask array, has $n4 true values
 3645   $mask43 = (random(4,3)>0.5); # example 2-D mask array, has $n43 true values
 3646   $sdat4  = whereND $data, $mask4;   # $sdat4 is a [$n4,3,2] pdl
 3647   $sdat43 = whereND $data, $mask43;  # $sdat43 is a [$n43,2] pdl
 3648 
 3649 Just as with C<where>, you can use the returned value in an
 3650 assignment. That means that both of these examples are valid:
 3651 
 3652   # Used to create a new slice stored in $sdat4:
 3653   $sdat4 = $data->whereND($mask4);
 3654   $sdat4 .= 0;
 3655   # Used in lvalue context:
 3656   $data->whereND($mask4) .= 0;
 3657 
 3658 SEE ALSO:
 3659 
 3660 L</whichND> returns N-D indices into a multidimensional PDL, from a mask.
 3661 
 3662 =cut
 3663 
 3664 sub PDL::whereND :lvalue {
 3665    barf "Usage: whereND( \$pdl1, ..., \$pdlN, \$mask )\n" if @_ == 1;
 3666    my $mask = pop @_;  # $mask has 0==false, 1==true
 3667    my @to_return;
 3668    my $n = PDL::sum($mask);
 3669    foreach my $arr (@_) {
 3670       my $sub_i = $mask * ones($arr);
 3671       my $where_sub_i = PDL::where($arr, $sub_i);
 3672       # count the number of dims in $mask and $arr
 3673       # $mask = a b c d e f.....
 3674       my @idims = dims($arr);
 3675       # ...and pop off the number of dims in $mask
 3676       foreach ( dims($mask) ) { shift(@idims) };
 3677       my $ndim = 0;
 3678       foreach my $id ($n, @idims[0..($#idims-1)]) {
 3679          $where_sub_i = $where_sub_i->splitdim($ndim++,$id) if $n>0;
 3680       }
 3681       push @to_return, $where_sub_i;
 3682    }
 3683    return (@to_return == 1) ? $to_return[0] : @to_return;
 3684 }
 3685 *whereND = \&PDL::whereND;
 3686 
 3687 EOD
 3688 
 3689 pp_add_exported("", 'whichND');
 3690 pp_addpm(<<'EOD');
 3691 =head2 whichND
 3692 
 3693 =for ref
 3694 
 3695 Return the coordinates of non-zero values in a mask.
 3696 
 3697 =for usage
 3698 
 3699 WhichND returns the N-dimensional coordinates of each nonzero value in
 3700 a mask PDL with any number of dimensions.  The returned values arrive
 3701 as an array-of-vectors suitable for use in
 3702 L<indexND|PDL::Slices/indexND> or L<range|PDL::Slices/range>.
 3703 
 3704  $coords = whichND($mask);
 3705 
 3706 returns a PDL containing the coordinates of the elements that are non-zero
 3707 in C<$mask>, suitable for use in L<PDL::Slices/indexND>. The 0th dimension contains the
 3708 full coordinate listing of each point; the 1st dimension lists all the points.
 3709 For example, if $mask has rank 4 and 100 matching elements, then $coords has
 3710 dimension 4x100.
 3711 
 3712 If no such elements exist, then whichND returns a structured empty PDL:
 3713 an Nx0 PDL that contains no values (but matches, broadcasting-wise, with
 3714 the vectors that would be produced if such elements existed).
 3715 
 3716 DEPRECATED BEHAVIOR IN LIST CONTEXT:
 3717 
 3718 whichND once delivered different values in list context than in scalar
 3719 context, for historical reasons.  In list context, it returned the
 3720 coordinates transposed, as a collection of 1-PDLs (one per dimension)
 3721 in a list.  This usage is deprecated in PDL 2.4.10, and will cause a
 3722 warning to be issued every time it is encountered.  To avoid the
 3723 warning, you can set the global variable "$PDL::whichND" to 's' to
 3724 get scalar behavior in all contexts, or to 'l' to get list behavior in
 3725 list context.
 3726 
 3727 In later versions of PDL, the deprecated behavior will disappear.  Deprecated
 3728 list context whichND expressions can be replaced with:
 3729 
 3730     @list = $x->whichND->mv(0,-1)->dog;
 3731 
 3732 SEE ALSO:
 3733 
 3734 L</which> finds coordinates of nonzero values in a 1-D mask.
 3735 
 3736 L</where> extracts values from a data PDL that are associated
 3737 with nonzero values in a mask PDL.
 3738 
 3739 L<PDL::Slices/indexND> can be fed the coordinates to return the values.
 3740 
 3741 =for example
 3742 
 3743  pdl> $s=sequence(10,10,3,4)
 3744  pdl> ($x, $y, $z, $w)=whichND($s == 203); p $x, $y, $z, $w
 3745  [3] [0] [2] [0]
 3746  pdl> print $s->at(list(cat($x,$y,$z,$w)))
 3747  203
 3748 
 3749 =cut
 3750 
 3751 *whichND = \&PDL::whichND;
 3752 sub PDL::whichND {
 3753   my $mask = PDL->topdl(shift);
 3754 
 3755   # List context: generate a perl list by dimension
 3756   if(wantarray) {
 3757       if(!defined($PDL::whichND)) {
 3758 	  printf STDERR "whichND: WARNING - list context deprecated. Set \$PDL::whichND. Details in pod.";
 3759       } elsif($PDL::whichND =~ m/l/i) {
 3760 	  # old list context enabled by setting $PDL::whichND to 'l'
 3761 	  my $ind=($mask->clump(-1))->which;
 3762 	  return $mask->one2nd($ind);
 3763       }
 3764       # if $PDL::whichND does not contain 'l' or 'L', fall through to scalar context
 3765   }
 3766 
 3767   # Scalar context: generate an N-D index ndarray
 3768   return PDL->new_from_specification(indx,$mask->ndims,0) if !$mask->nelem;
 3769   return $mask ? pdl(indx,0) : PDL->new_from_specification(indx,0)
 3770     if !$mask->getndims;
 3771 
 3772   my $ind = $mask->flat->which->dummy(0,$mask->getndims)->make_physical;
 3773   # In the empty case, explicitly return the correct type of structured empty
 3774   return PDL->new_from_specification(indx,$mask->ndims, 0) if !$ind->nelem;
 3775 
 3776   my $mult = ones(indx, $mask->getndims);
 3777   my @mdims = $mask->dims;
 3778 
 3779   for my $i (0..$#mdims-1) {
 3780    # use $tmp for 5.005_03 compatibility
 3781    (my $tmp = $mult->index($i+1)) .= $mult->index($i)*$mdims[$i];
 3782   }
 3783 
 3784   for my $i (0..$#mdims) {
 3785    my($s) = $ind->index($i);
 3786    $s /= $mult->index($i);
 3787    $s %= $mdims[$i];
 3788   }
 3789 
 3790   return $ind;
 3791 }
 3792 EOD
 3793 
 3794 #
 3795 # Set operations suited for manipulation of the operations above.
 3796 #
 3797 
 3798 pp_add_exported("", 'setops');
 3799 pp_addpm(<<'EOD');
 3800 =head2 setops
 3801 
 3802 =for ref
 3803 
 3804 Implements simple set operations like union and intersection
 3805 
 3806 =for usage
 3807 
 3808    Usage: $set = setops($x, <OPERATOR>, $y);
 3809 
 3810 The operator can be C<OR>, C<XOR> or C<AND>. This is then applied
 3811 to C<$x> viewed as a set and C<$y> viewed as a set. Set theory says
 3812 that a set may not have two or more identical elements, but setops
 3813 takes care of this for you, so C<$x=pdl(1,1,2)> is OK. The functioning
 3814 is as follows:
 3815 
 3816 =over
 3817 
 3818 =item C<OR>
 3819 
 3820 The resulting vector will contain the elements that are either in C<$x>
 3821 I<or> in C<$y> or both. This is the union in set operation terms
 3822 
 3823 =item C<XOR>
 3824 
 3825 The resulting vector will contain the elements that are either in C<$x>
 3826 or C<$y>, but not in both. This is
 3827 
 3828      Union($x, $y) - Intersection($x, $y)
 3829 
 3830 in set operation terms.
 3831 
 3832 =item C<AND>
 3833 
 3834 The resulting vector will contain the intersection of C<$x> and C<$y>, so
 3835 the elements that are in both C<$x> and C<$y>. Note that for convenience
 3836 this operation is also aliased to L</intersect>.
 3837 
 3838 =back
 3839 
 3840 It should be emphasized that these routines are used when one or both of
 3841 the sets C<$x>, C<$y> are hard to calculate or that you get from a separate
 3842 subroutine.
 3843 
 3844 Finally IDL users might be familiar with Craig Markwardt's C<cmset_op.pro>
 3845 routine which has inspired this routine although it was written independently
 3846 However the present routine has a few less options (but see the examples)
 3847 
 3848 =for example
 3849 
 3850 You will very often use these functions on an index vector, so that is
 3851 what we will show here. We will in fact something slightly silly. First
 3852 we will find all squares that are also cubes below 10000.
 3853 
 3854 Create a sequence vector:
 3855 
 3856   pdl> $x = sequence(10000)
 3857 
 3858 Find all odd and even elements:
 3859 
 3860   pdl> ($even, $odd) = which_both( ($x % 2) == 0)
 3861 
 3862 Find all squares
 3863 
 3864   pdl> $squares= which(ceil(sqrt($x)) == floor(sqrt($x)))
 3865 
 3866 Find all cubes (being careful with roundoff error!)
 3867 
 3868   pdl> $cubes= which(ceil($x**(1.0/3.0)) == floor($x**(1.0/3.0)+1e-6))
 3869 
 3870 Then find all squares that are cubes:
 3871 
 3872   pdl> $both = setops($squares, 'AND', $cubes)
 3873 
 3874 And print these (assumes that C<PDL::NiceSlice> is loaded!)
 3875 
 3876   pdl> p $x($both)
 3877    [0 1 64 729 4096]
 3878 
 3879 Then find all numbers that are either cubes or squares, but not both:
 3880 
 3881   pdl> $cube_xor_square = setops($squares, 'XOR', $cubes)
 3882 
 3883   pdl> p $cube_xor_square->nelem()
 3884    112
 3885 
 3886 So there are a total of 112 of these!
 3887 
 3888 Finally find all odd squares:
 3889 
 3890   pdl> $odd_squares = setops($squares, 'AND', $odd)
 3891 
 3892 
 3893 Another common occurrence is to want to get all objects that are
 3894 in C<$x> and in the complement of C<$y>. But it is almost always best
 3895 to create the complement explicitly since the universe that both are
 3896 taken from is not known. Thus use L</which_both> if possible
 3897 to keep track of complements.
 3898 
 3899 If this is impossible the best approach is to make a temporary:
 3900 
 3901 This creates an index vector the size of the universe of the sets and
 3902 set all elements in C<$y> to 0
 3903 
 3904   pdl> $tmp = ones($n_universe); $tmp($y) .= 0;
 3905 
 3906 This then finds the complement of C<$y>
 3907 
 3908   pdl> $C_b = which($tmp == 1);
 3909 
 3910 and this does the final selection:
 3911 
 3912   pdl> $set = setops($x, 'AND', $C_b)
 3913 
 3914 =cut
 3915 
 3916 *setops = \&PDL::setops;
 3917 
 3918 sub PDL::setops {
 3919 
 3920   my ($x, $op, $y)=@_;
 3921 
 3922   # Check that $x and $y are 1D.
 3923   if ($x->ndims() > 1 || $y->ndims() > 1) {
 3924      warn 'setops: $x and $y must be 1D - flattening them!'."\n";
 3925      $x = $x->flat;
 3926      $y = $y->flat;
 3927   }
 3928 
 3929   #Make sure there are no duplicate elements.
 3930   $x=$x->uniq;
 3931   $y=$y->uniq;
 3932 
 3933   my $result;
 3934 
 3935   if ($op eq 'OR') {
 3936     # Easy...
 3937     $result = uniq(append($x, $y));
 3938   } elsif ($op eq 'XOR') {
 3939     # Make ordered list of set union.
 3940     my $union = append($x, $y)->qsort;
 3941     # Index lists.
 3942     my $s1=zeroes(byte, $union->nelem());
 3943     my $s2=zeroes(byte, $union->nelem());
 3944 
 3945     # Find indices which are duplicated - these are to be excluded
 3946     #
 3947     # We do this by comparing x with x shifted each way.
 3948     my $i1 = which($union != rotate($union, 1));
 3949     my $i2 = which($union != rotate($union, -1));
 3950     #
 3951     # We then mark/mask these in the s1 and s2 arrays to indicate which ones
 3952     # are not equal to their neighbours.
 3953     #
 3954     my $ts;
 3955     ($ts = $s1->index($i1)) .= 1 if $i1->nelem() > 0;
 3956     ($ts = $s2->index($i2)) .= 1 if $i2->nelem() > 0;
 3957 
 3958     my $inds=which($s1 == $s2);
 3959 
 3960     if ($inds->nelem() > 0) {
 3961       return $union->index($inds);
 3962     } else {
 3963       return $inds;
 3964     }
 3965 
 3966   } elsif ($op eq 'AND') {
 3967     # The intersection of the arrays.
 3968 
 3969     # Make ordered list of set union.
 3970     my $union = append($x, $y)->qsort;
 3971 
 3972     return $union->where($union == rotate($union, -1));
 3973   } else {
 3974     print "The operation $op is not known!";
 3975     return -1;
 3976   }
 3977 
 3978 }
 3979 EOD
 3980 
 3981 pp_add_exported("", 'intersect');
 3982 pp_addpm(<<'EOD');
 3983 =head2 intersect
 3984 
 3985 =for ref
 3986 
 3987 Calculate the intersection of two ndarrays
 3988 
 3989 =for usage
 3990 
 3991    Usage: $set = intersect($x, $y);
 3992 
 3993 This routine is merely a simple interface to L</setops>. See
 3994 that for more information
 3995 
 3996 =for example
 3997 
 3998 Find all numbers less that 100 that are of the form 2*y and 3*x
 3999 
 4000  pdl> $x=sequence(100)
 4001  pdl> $factor2 = which( ($x % 2) == 0)
 4002  pdl> $factor3 = which( ($x % 3) == 0)
 4003  pdl> $ii=intersect($factor2, $factor3)
 4004  pdl> p $x($ii)
 4005  [0 6 12 18 24 30 36 42 48 54 60 66 72 78 84 90 96]
 4006 
 4007 =cut
 4008 
 4009 *intersect = \&PDL::intersect;
 4010 
 4011 sub PDL::intersect {
 4012 
 4013    return setops($_[0], 'AND', $_[1]);
 4014 
 4015 }
 4016 EOD
 4017 
 4018 pp_addpm({At=>'Bot'},<<'EOD');
 4019 
 4020 =head1 AUTHOR
 4021 
 4022 Copyright (C) Tuomas J. Lukka 1997 (lukka@husc.harvard.edu). Contributions
 4023 by Christian Soeller (c.soeller@auckland.ac.nz), Karl Glazebrook
 4024 (kgb@aaoepp.aao.gov.au), Craig DeForest (deforest@boulder.swri.edu)
 4025 and Jarle Brinchmann (jarle@astro.up.pt)
 4026 All rights reserved. There is no warranty. You are allowed
 4027 to redistribute this software / documentation under certain
 4028 conditions. For details, see the file COPYING in the PDL
 4029 distribution. If this file is separated from the PDL distribution,
 4030 the copyright notice should be included in the file.
 4031 
 4032 Updated for CPAN viewing compatibility by David Mertens.
 4033 
 4034 =cut
 4035 
 4036 EOD
 4037 
 4038 pp_done();