"Fossies" - the Fresh Open Source Software Archive

Member "harminv-1.4.2/harminv.c" (6 Feb 2023, 26804 Bytes) of package /linux/privat/harminv-1.4.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "harminv.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.4.1_vs_1.4.2.

    1 /* Copyright (C) 2017 Massachusetts Institute of Technology.
    2  *
    3  * This program is free software; you can redistribute it and/or modify
    4  * it under the terms of the GNU General Public License as published by
    5  * the Free Software Foundation; either version 2 of the License, or
    6  * (at your option) any later version.
    7  *
    8  * This program is distributed in the hope that it will be useful,
    9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11  * GNU General Public License for more details.
   12  *
   13  * You should have received a copy of the GNU General Public License
   14  * along with this program; if not, write to the Free Software
   15  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   16  */
   17 
   18 #include <stdlib.h>
   19 #include <stdio.h>
   20 #include <math.h>
   21 
   22 #include "harminv-int.h"
   23 #include "check.h"
   24 
   25 /**************************************************************************/
   26 
   27 /* Workaround for weird problem observed on Debian/stable with glibc
   28    2.2.5 and gcc 2.95.4, 3.3.1, and 3.4.0: the stand-alone code works
   29    fine, but crashes in clog once I link from another (C++) program.
   30    I can't reproduce this on Debian/testing with glibc 2.3.2, and it
   31    passes valgrind on that machine (valgrind crashes on the first
   32    machine), so I'm guessing this is some weird glibc bug...using
   33    my "own" clog function seems to work around the problem. */
   34 
   35 #undef clog
   36 #define clog my_clog
   37 
   38 static cmplx my_clog(cmplx z) { return (log(cabs(z)) + I * carg(z)); }
   39 
   40 /**************************************************************************/
   41 
   42 /* The harminv routines are designed to perform "harmonic inversion."
   43    That is, given a signal (a set of samples as a function of time),
   44    they decompose the signal into a finite number of (possibly
   45    exponentially-decaying) sinusoids.
   46 
   47    Essentially, because we assume that the signal has this form, we
   48    can determine the frequencies, decay constants, and amplitudes of
   49    the sinusoids much more accurately than we could via taking the FFT
   50    and looking at the peaks, for the same number of samples.
   51 
   52    We use a low-storage "filter diagonalization method" (FDM) for finding
   53    the sinusoids near a given frequency interval, described in:
   54 
   55    V. A. Mandelshtam and H. S. Taylor, "Harmonic inversion of time
   56    signals," J. Chem. Phys., vol. 107, no. 17, p. 6756-6769 (Nov. 1
   57    1997).  See also erratum, ibid, vol. 109, no. 10, p. 4128 (Sep. 8
   58    1998).
   59 
   60    with a refinement (for generate_U below) described in:
   61 
   62    Rongqing Chen and Hua Guo, "Efficient calculation of matrix
   63    elements in low storate filter diagonalization," J. Chem. Phys.,
   64    vol. 111, no. 2, p. 464-471(Jul. 8 1999).
   65 
   66    The seminal work (though less practical than the M&T algorithm) on
   67    this class of methods was done by:
   68 
   69    Michael R. Wall and Daniel Neuhauser, "Extraction, through
   70    filter-diagonalization, of general quantum eigenvalues or classical
   71    normal mode frequencies from a small number of residues or a
   72    short-time segment of a signal. I. Theory and application to a
   73    quantum-dynamics model," J. Chem. Phys., 102, no. 20, p. 8011-8022
   74    (May 22 1995).
   75 
   76    A more recent reference is:
   77 
   78    V. A. Mandelshtam, "On harmonic inversion of cross-correlation
   79    functions by the filter diagonalization method," J. Theoretical and
   80    Computational Chemistry 2 (4), 497-505 (2003).
   81 
   82 */
   83 
   84 /**************************************************************************/
   85 
   86 #define TWOPI 6.2831853071795864769252867665590057683943388
   87 
   88 /**************************************************************************/
   89 
   90 /* Crays have float == double, and don't have the Z* functions in
   91    LAPACK/BLAS...we have to use C*.  Sigh. */
   92 #if defined(CRAY) || defined(_UNICOS) || defined(_CRAYMPP)
   93 #define BLAS_FUNC(x, X) F77_FUNC(c##x, C##X)
   94 #else /* ! CRAY */
   95 #define BLAS_FUNC(x, X) F77_FUNC(z##x, Z##X)
   96 #endif /* ! CRAY */
   97 
   98 #define ZGEEV BLAS_FUNC(geev, GEEV)
   99 #define ZGGEVX BLAS_FUNC(ggevx, GGEVX)
  100 #define ZGGEV BLAS_FUNC(ggev, GGEV)
  101 #define ZGEMM BLAS_FUNC(gemm, GEMM)
  102 #define ZCOPY BLAS_FUNC(copy, COPY)
  103 #define ZAXPY BLAS_FUNC(axpy, AXPY)
  104 #define ZGEMV BLAS_FUNC(gemv, GEMV)
  105 #define ZSCAL BLAS_FUNC(scal, SCAL)
  106 
  107 #ifdef __cplusplus
  108 extern "C" {
  109 #endif
  110 
  111 /* We have to pass strings in special ways on Crays, even
  112    for passing a single character as with LAPACK.  Sigh. */
  113 #if defined(CRAY) || defined(_UNICOS) || defined(_CRAYMPP)
  114 #include <fortran.h>
  115 #define FCHARP _fcd
  116 #define F_(s) _cptofcd(s, 1) /* second argument is the string length */
  117 #else                        /* ! CRAY */
  118 #define FCHARP const char *
  119 #define F_(s) (s)
  120 #endif
  121 
  122 extern void ZGEEV(FCHARP, FCHARP, int *, cmplx *, int *, cmplx *, cmplx *, int *, cmplx *, int *,
  123                   cmplx *, int *, double *, int *);
  124 extern void ZGGEVX(FCHARP balanc, FCHARP jobvl, FCHARP jobvr, FCHARP sense, int *n, cmplx *a,
  125                    int *lda, cmplx *b, int *ldb, cmplx *alpha, cmplx *beta, cmplx *vl, int *,
  126                    cmplx *vr, int *, int *ilo, int *ihi, double *lscale, double *rscale,
  127                    double *abnrm, double *bbnrm, double *rconde, double *rcondv, cmplx *wrk,
  128                    int *lwork, double *rwork, int *iwork, int *bwork, int *info);
  129 extern void ZGGEV(FCHARP jobvl, FCHARP jobvr, int *n, cmplx *a, int *lda, cmplx *b, int *ldb,
  130                   cmplx *alpha, cmplx *beta, cmplx *vl, int *, cmplx *vr, int *, cmplx *wrk,
  131                   int *lwork, double *rwork, int *info);
  132 extern void ZGEMM(FCHARP, FCHARP, int *, int *, int *, cmplx *, cmplx *, int *, cmplx *, int *,
  133                   cmplx *, cmplx *, int *);
  134 extern void ZCOPY(int *, const cmplx *, int *, cmplx *, int *);
  135 extern void ZAXPY(int *, cmplx *, cmplx *, int *, cmplx *, int *);
  136 extern void ZGEMV(FCHARP, int *, int *, cmplx *, cmplx *, int *, cmplx *, int *, cmplx *, cmplx *,
  137                   int *);
  138 extern void ZSCAL(int *, cmplx *, cmplx *, int *);
  139 extern void HARMINV_ZDOTU(cmplx *, int *, cmplx *, int *, cmplx *, int *);
  140 
  141 #ifdef __cplusplus
  142 } /* extern "C" */
  143 #endif
  144 
  145 /**************************************************************************/
  146 
  147 /* compute c^n, where n is an integer: */
  148 static cmplx cpow_i(cmplx c, int n) {
  149   if (n < 0)
  150     return (1.0 / cpow_i(c, -n));
  151   else {
  152     cmplx result = 1;
  153     while (n > 1) {
  154       if (n % 2 == 1) result *= c;
  155       c *= c;
  156       n /= 2;
  157     }
  158     if (n > 0) result *= c;
  159     return result;
  160   }
  161 }
  162 
  163 /* Computing powers by cumulative multiplication, below, is faster
  164    than calling cpow_i repeatedly, but accumulates O(n) floating-point
  165    error.  As a compromise, we call cpow_i every NPOW iterations, which
  166    accumulates only O(NPOW) error. */
  167 #define NPOW 8
  168 
  169 /* FIXME: instead of this, we should really do the first-order expansion
  170    of the U matrix in |z - z2|, below. */
  171 #define SMALL (1e-12)
  172 #define C_CLOSE(c1, c2) (cabs((c1) - (c2)) < SMALL)
  173 
  174 /**************************************************************************/
  175 
  176 /* Initialize the JxJ2 matrix U = U_p(z,z2), as described in M&T.
  177    Also, if U1 != NULL, then set U1 = U_{p+1}(z,z2).  If z == z2, it
  178    must be the case that no two elements of z are the same and that
  179    J2 == J1; in this case the matrix U will be symmetric.  Note that c
  180    must be an array whose size n, is at least 2*K+p elements.  */
  181 static void generate_U(cmplx *U, cmplx *U1, int p, const cmplx *c, int n, int K, int J, int J2,
  182                        const cmplx *z, const cmplx *z2, cmplxl **G0, cmplxl **G0_M, cmplxl **D0) {
  183   int M = K - 1;
  184   int i, j, m;
  185   /* temp. arrays for 1/z, z^(-m), z^(-M), the G function of C&G,
  186      and the diagonal elements D[i] = U(z[i],z[i]): */
  187   cmplx *z_inv, *z_m, *z_M;
  188   cmplxl *G, *G_M, *D;
  189   cmplx *z2_inv, *z2_m, *z2_M;
  190   cmplxl *G2, *G2_M;
  191 
  192   CHECK(U && c && z && z2, "invalid arguments to generate_U");
  193   CHECK(n >= 2 * K + p, "too few coefficients in generate_U");
  194   CHECK(z != z2 || J == J2, "invalid sizes passed to generate_U");
  195   CHECK((!G0 && !G0_M && !D0) || (G0 && G0_M && D0), "G0/G0_M/D0 must be all-non-NULL/all-NULL");
  196   CHECK(!G0 || (!*G0 && !*G0_M && !*D0) || (*G0 && *G0_M && *D0),
  197         "*G0/*G0_M/*D0 must be all-non-NULL/all-NULL");
  198 
  199   /* Now, compute U according to eqs. 25-27 of Chen & Guo, but
  200      using the notation of eq. 25 of M&T.  This operation has
  201      complexity O(N*J + J*J).  At the same time, we can compute the
  202      matrix U1 as well by eqs. 29-30 of C&G, saving an extra pass
  203      over the input array. */
  204 
  205   /* first, set up some temporary arrays for caching things like
  206      z^m and 1/z, so we don't need to recompute them all the time. */
  207   CHK_MALLOC(z_inv, cmplx, J);
  208   CHK_MALLOC(z_m, cmplx, J);
  209   CHK_MALLOC(z_M, cmplx, J);
  210   if (G0 && *G0) {
  211     G = *G0;
  212     G_M = *G0_M;
  213     D = *D0;
  214   }
  215   else {
  216     CHK_MALLOC(G, cmplxl, J);
  217     CHK_MALLOC(G_M, cmplxl, J);
  218     CHK_MALLOC(D, cmplxl, J);
  219     for (i = 0; i < J; ++i) {
  220       D[i] = G[i] = G_M[i] = 0;
  221     }
  222   }
  223   for (i = 0; i < J; ++i) {
  224     z_inv[i] = 1.0 / z[i];
  225     z_m[i] = 1;
  226     z_M[i] = cpow_i(z[i], -M);
  227   }
  228   if (z2 != z) {
  229     CHK_MALLOC(z2_inv, cmplx, J2);
  230     CHK_MALLOC(z2_m, cmplx, J2);
  231     CHK_MALLOC(z2_M, cmplx, J2);
  232     CHK_MALLOC(G2, cmplxl, J2);
  233     CHK_MALLOC(G2_M, cmplxl, J2);
  234     for (i = 0; i < J2; ++i) {
  235       z2_inv[i] = 1.0 / z2[i];
  236       z2_m[i] = 1;
  237       z2_M[i] = cpow_i(z2[i], -M);
  238       G2[i] = G2_M[i] = 0;
  239     }
  240   }
  241   else {
  242     z2_inv = z2_m = z2_M = NULL;
  243     G2 = G2_M = NULL;
  244   }
  245 
  246   /* First, loop over the signal array (c), building up the
  247      spectral functions G and G_M (corresponding to G_p and
  248      G_{p+M+1} in C&G), as well as the diagonal matrix entries: */
  249   for (m = 0; m <= M; ++m) {
  250     cmplx c1 = c[m + p], c2 = c[m + p + M + 1];
  251     double d = m + 1;  /* M - fabs(M - m) + 1 */
  252     double d2 = M - m; /* M - fabs(M - (m + M + 1)) + 1 */
  253 
  254     if (!G0 || !*G0) {
  255       for (i = 0; i < J; ++i) {
  256         cmplx x1 = z_m[i] * c1;
  257         cmplx x2 = z_m[i] * c2;
  258         G[i] += x1;
  259         G_M[i] += x2;
  260         D[i] += x1 * d + x2 * d2 * z_M[i] * z_inv[i];
  261         if (m % NPOW == NPOW - 1)
  262           z_m[i] = cpow_i(z_inv[i], m + 1);
  263         else
  264           z_m[i] *= z_inv[i];
  265       }
  266     }
  267     if (z2 != z)
  268       for (i = 0; i < J2; ++i) {
  269         G2[i] += z2_m[i] * c1;
  270         G2_M[i] += z2_m[i] * c2;
  271         if (m % NPOW == NPOW - 1)
  272           z2_m[i] = cpow_i(z2_inv[i], m + 1);
  273         else
  274           z2_m[i] *= z2_inv[i];
  275       }
  276   }
  277 
  278   /* Compute U (or just the upper part if U is symmetric), via the
  279      formula from C&G; compute U1 at the same time as in C&G. */
  280   if (z2 != z) {
  281     for (i = 0; i < J; ++i)
  282       for (j = 0; j < J2; ++j) {
  283         if (C_CLOSE(z[i], z2[j]))
  284           U[i * J2 + j] = D[i];
  285         else
  286           U[i * J2 + j] =
  287               (z[i] * G2[j] - z2[j] * G[i] + z2_M[j] * G_M[i] - z_M[i] * G2_M[j]) / (z[i] - z2[j]);
  288       }
  289 
  290     if (U1)
  291       for (i = 0; i < J; ++i)
  292         for (j = 0; j < J2; ++j) {
  293           if (C_CLOSE(z[i], z2[j]))
  294             U1[i * J2 + j] = z[i] * (D[i] - G[i]) + z_M[i] * G_M[i];
  295           else
  296             U1[i * J2 + j] = (z[i] * z2[j] * (G2[j] - G[i]) + z2_M[j] * z[i] * G_M[i] -
  297                               z_M[i] * z2[j] * G2_M[j]) /
  298                              (z[i] - z2[j]);
  299         }
  300   }
  301   else { /* z == z2 */
  302     for (i = 0; i < J; ++i) {
  303       U[i * J + i] = D[i];
  304       for (j = i + 1; j < J; ++j) {
  305         U[i * J + j] =
  306             (z[i] * G[j] - z[j] * G[i] + z_M[j] * G_M[i] - z_M[i] * G_M[j]) / (z[i] - z[j]);
  307       }
  308     }
  309 
  310     if (U1)
  311       for (i = 0; i < J; ++i) {
  312         U1[i * J + i] = z[i] * (D[i] - G[i]) + z_M[i] * G_M[i];
  313         for (j = i + 1; j < J; ++j) {
  314           U1[i * J + j] =
  315               (z[i] * z[j] * (G[j] - G[i]) + z_M[j] * z[i] * G_M[i] - z_M[i] * z[j] * G_M[j]) /
  316               (z[i] - z[j]);
  317         }
  318       }
  319   }
  320 
  321   /* finally, copy the upper to the lower triangle if U is symmetric: */
  322   if (z == z2) {
  323     for (i = 0; i < J; ++i)
  324       for (j = i + 1; j < J; ++j)
  325         U[j * J + i] = U[i * J + j];
  326     if (U1)
  327       for (i = 0; i < J; ++i)
  328         for (j = i + 1; j < J; ++j)
  329           U1[j * J + i] = U1[i * J + j];
  330   }
  331 
  332   free(G2_M);
  333   free(G2);
  334   free(z2_M);
  335   free(z2_m);
  336   free(z2_inv);
  337 
  338   if (G0 && !*G0) {
  339     *G0 = G;
  340     *G0_M = G_M;
  341     *D0 = D;
  342   }
  343   else if (!G0) {
  344     free(D);
  345     free(G_M);
  346     free(G);
  347   }
  348   free(z_M);
  349   free(z_m);
  350   free(z_inv);
  351 }
  352 
  353 /**************************************************************************/
  354 
  355 static void init_z(harminv_data d, int J, cmplx *z) {
  356   d->J = J;
  357   d->z = z;
  358   CHK_MALLOC(d->U0, cmplx, J * J);
  359   CHK_MALLOC(d->U1, cmplx, J * J);
  360   generate_U(d->U0, d->U1, 0, d->c, d->n, d->K, d->J, d->J, d->z, d->z, &d->G0, &d->G0_M, &d->D0);
  361 }
  362 
  363 /**************************************************************************/
  364 
  365 harminv_data harminv_data_create(int n, const cmplx *signal, double fmin, double fmax, int nf) {
  366   int i;
  367   harminv_data d;
  368 
  369   CHECK(nf > 1, "# frequencies must > 1");
  370   CHECK(n > 0, "invalid number of data points");
  371   CHECK(signal, "invalid NULL signal array");
  372   CHECK(fmin < fmax, "should have fmin < fmax");
  373 
  374   CHK_MALLOC(d, struct harminv_data_struct, 1);
  375   d->c = signal;
  376   d->n = n;
  377   d->K = n / 2 - 1;
  378   d->fmin = fmin;
  379   d->fmax = fmax;
  380   d->nfreqs = -1; /* we haven't computed eigen-solutions yet */
  381   d->B = d->u = d->amps = d->U0 = d->U1 = (cmplx *)NULL;
  382   d->G0 = d->G0_M = d->D0 = (cmplxl *)NULL;
  383   d->errs = (double *)NULL;
  384 
  385   CHK_MALLOC(d->z, cmplx, nf);
  386   for (i = 0; i < nf; ++i)
  387     d->z[i] = cexp(-I * TWOPI * (fmin + i * ((fmax - fmin) / (nf - 1))));
  388 
  389   init_z(d, nf, d->z);
  390 
  391   return d;
  392 }
  393 
  394 /**************************************************************************/
  395 
  396 void harminv_data_destroy(harminv_data d) {
  397   if (d) {
  398     free(d->u);
  399     free(d->B);
  400     free(d->U1);
  401     free(d->U0);
  402     free(d->G0);
  403     free(d->G0_M);
  404     free(d->D0);
  405     free(d->z);
  406     free(d->amps);
  407     free(d->errs);
  408     free(d);
  409   }
  410 }
  411 
  412 /**************************************************************************/
  413 
  414 /* Compute the symmetric dot product of x and y, both vectors of
  415    length n.  If they are column-vectors, this is: transpose(x) * y.
  416    (We could use the BLAS ZDOTU function for this, but calling Fortran
  417     functions, as opposed to subroutines, from C is problematic.) */
  418 static cmplx symmetric_dot(int n, cmplx *x, cmplx *y) {
  419   cmplxl dot = 0;
  420   int i;
  421   for (i = 0; i < n; ++i)
  422     dot += x[i] * y[i];
  423   return dot;
  424 }
  425 
  426 /**************************************************************************/
  427 
  428 /**************************************************************************/
  429 
  430 /* Solve for the eigenvalues (v) and eigenvectors (rows of V) of the
  431    complex-symmetric n x n matrix A.  The eigenvectors are normalized
  432    to 1 according to the symmetric dot product (i.e. no complex
  433    conjugation). */
  434 static void solve_eigenvects(int n, const cmplx *A0, cmplx *V, cmplx *v) {
  435   int lwork, info;
  436   cmplx *work;
  437   double *rwork;
  438   cmplx *A;
  439 
  440   /* according to the ZGEEV documentation, the matrix A is overwritten,
  441      and we don't want to overwrite our input A0 */
  442   CHK_MALLOC(A, cmplx, n * n);
  443   {
  444     int n2 = n * n, one = 1;
  445     ZCOPY(&n2, A0, &one, A, &one);
  446   }
  447 
  448   /* Unfortunately, LAPACK doesn't have a special solver for the
  449      complex-symmetric eigenproblem.  For now, just use the general
  450      non-symmetric solver, and realize that the left eigenvectors
  451      are the complex-conjugates of the right eigenvectors. */
  452 
  453 #if 0 /* LAPACK seems to be buggy here, returning ridiculous sizes at times */
  454      cmplx wsize;
  455      lwork = -1; /* compute optimal workspace size */
  456      ZGEEV(F_("N"), F_("V"), &n, A, &n, v, V, &n, V, &n, &wsize, &lwork, rwork, &info);
  457      if (info == 0)
  458       lwork = floor(creal(wsize) + 0.5);
  459      else
  460       lwork = 2*n;
  461      CHECK(lwork > 0, "zgeev is not returning a positive work size!");
  462 #else
  463   lwork = 4 * n; /* minimum is 2*n; we'll be generous. */
  464 #endif
  465 
  466   CHK_MALLOC(rwork, double, 2 * n);
  467   CHK_MALLOC(work, cmplx, lwork);
  468 
  469   ZGEEV(F_("N"), F_("V"), &n, A, &n, v, V, &n, V, &n, work, &lwork, rwork, &info);
  470 
  471   free(work);
  472   free(rwork);
  473   free(A);
  474 
  475   CHECK(info >= 0, "invalid argument to ZGEEV");
  476   CHECK(info <= 0, "failed convergence in ZGEEV");
  477 
  478   /* Finally, we need to fix the normalization of the eigenvectors,
  479      since LAPACK normalizes them under the ordinary dot product,
  480      i.e. with complex conjugation.  (In principle, do we also need
  481      to re-orthogonalize, for the case of degenerate eigenvalues?) */
  482   {
  483     int i, one = 1;
  484     for (i = 0; i < n; ++i) {
  485       cmplx norm = 1.0 / csqrt(symmetric_dot(n, V + i * n, V + i * n));
  486       ZSCAL(&n, &norm, V + i * n, &one);
  487     }
  488   }
  489 }
  490 
  491 /**************************************************************************/
  492 
  493 /* how conservative do we need to be for this? */
  494 #define SINGULAR_THRESHOLD 1e-5
  495 
  496 /* Solve the eigenvalue problem U1 b = u U0 b, where b is the eigenvector
  497    and u is the eigenvalue.  u = exp(iwt - at) then contains both the
  498    frequency and the decay constant. */
  499 void harminv_solve_once(harminv_data d) {
  500   int J, i, one = 1;
  501   cmplx zone = 1.0, zzero = 0.0;
  502   cmplx *V0, *v0, *H1, *V1; /* for eigensolutions of U0 and U1 */
  503   double max_v0 = 0.0;
  504 
  505   J = d->J;
  506   CHK_MALLOC(V0, cmplx, J * J);
  507   CHK_MALLOC(v0, cmplx, J);
  508 
  509   /* Unfortunately, U0 is very likely to be singular, so we must
  510      first extract the non-singular eigenvectors and only work in
  511      that sub-space.  See the Wall & Neuhauser paper.  */
  512 
  513   solve_eigenvects(J, d->U0, V0, v0);
  514 
  515   /* find maximum |eigenvalue| */
  516   for (i = 0; i < J; ++i) {
  517     double v = cabs(v0[i]);
  518     if (v > max_v0) max_v0 = v;
  519   }
  520 
  521   /* we must remove the singular components of U0, those
  522      that are less than some threshold times the maximum eigenvalue.
  523      Also, we need to scale the eigenvectors by 1/sqrt(eigenval). */
  524   d->nfreqs = J;
  525   for (i = 0; i < J; ++i) {
  526     if (cabs(v0[i]) < SINGULAR_THRESHOLD * max_v0) {
  527       v0[i] = 0; /* tag as a "hole" */
  528       d->nfreqs -= 1;
  529     }
  530     else { /* not singular */
  531       cmplx s;
  532       int j;
  533       /* move the eigenvector to the first "hole" left by
  534          deleting singular eigenvalues: */
  535       for (j = 0; j < i && v0[j] != 0.0; ++j)
  536         ;
  537       if (j < i) {
  538         ZCOPY(&J, V0 + i * J, &one, V0 + j * J, &one);
  539         v0[j] = v0[i];
  540         v0[i] = 0; /* tag as a "hole" */
  541       }
  542       s = 1.0 / csqrt(v0[j]);
  543       ZSCAL(&J, &s, V0 + j * J, &one);
  544     }
  545   }
  546 
  547   CHK_MALLOC(d->B, cmplx, d->nfreqs * J);
  548   CHK_MALLOC(d->u, cmplx, d->nfreqs);
  549   CHK_MALLOC(V1, cmplx, d->nfreqs * d->nfreqs);
  550   CHK_MALLOC(H1, cmplx, d->nfreqs * d->nfreqs);
  551 
  552   /* compute H1 = V0 * U1 * V0': */
  553 
  554   /* B = V0 * U1: */
  555   ZGEMM(F_("N"), F_("N"), &J, &d->nfreqs, &J, &zone, d->U1, &J, V0, &J, &zzero, d->B, &J);
  556   /* H1 = B * transpose(V0) */
  557   ZGEMM(F_("T"), F_("N"), &d->nfreqs, &d->nfreqs, &J, &zone, V0, &J, d->B, &J, &zzero, H1,
  558         &d->nfreqs);
  559 
  560   /* Finally, we can find the eigenvalues and eigenvectors: */
  561   solve_eigenvects(d->nfreqs, H1, V1, d->u);
  562   /* B = V1 * V0: */
  563   ZGEMM(F_("N"), F_("N"), &J, &d->nfreqs, &d->nfreqs, &zone, V0, &J, V1, &d->nfreqs, &zzero, d->B,
  564         &J);
  565 
  566   free(H1);
  567   free(V1);
  568   free(v0);
  569   free(V0);
  570 }
  571 
  572 /**************************************************************************/
  573 
  574 /* After solving once, solve again using the solutions from last
  575    time as the input to the spectra estimator this time.
  576 
  577    Optionally, if mode_ok is not NULL, we can only use the solutions k from
  578    last time that pass ok(d, k, ok_d).  Currently, this is not recommended
  579    as it seems to make things worse.  */
  580 void harminv_solve_again(harminv_data d, harminv_mode_ok_func ok, void *ok_d) {
  581   int i, j;
  582   char *mode_ok = 0;
  583   CHECK(d->nfreqs >= 0, "haven't computed eigensolutions yet");
  584 
  585   if (!d->nfreqs) return; /* no eigensolutions to work with */
  586 
  587   if (ok) {
  588     CHK_MALLOC(mode_ok, char, d->nfreqs);
  589     ok(d, -1, ok_d); /* initialize */
  590     for (i = 0; i < d->nfreqs; ++i)
  591       mode_ok[i] = ok(d, i, ok_d);
  592   }
  593 
  594   free(d->B);
  595   free(d->U1);
  596   free(d->U0);
  597   free(d->G0);
  598   free(d->G0_M);
  599   free(d->D0);
  600   free(d->z);
  601   free(d->amps);
  602   free(d->errs);
  603   d->B = d->U1 = d->U0 = d->z = d->amps = (cmplx *)NULL;
  604   d->G0 = d->G0_M = d->D0 = (cmplxl *)NULL;
  605   d->errs = (double *)NULL;
  606 
  607   /* Spectral grid needs to be on the unit circle or system is unstable: */
  608   for (i = j = 0; i < d->nfreqs; ++i)
  609     if (!ok || mode_ok[i]) d->u[j++] = d->u[i] / cabs(d->u[i]);
  610   d->nfreqs = j;
  611 
  612   if (ok) {
  613     ok(d, -2, ok_d); /* finish */
  614     free(mode_ok);
  615   }
  616 
  617   d->u = (cmplx *)realloc(d->u, sizeof(cmplx) * d->nfreqs);
  618 
  619   if (!d->nfreqs) return; /* no eigensolutions to work with */
  620 
  621   init_z(d, d->nfreqs, d->u);
  622 
  623   d->nfreqs = 0;
  624   d->B = d->u = NULL;
  625 
  626   harminv_solve_once(d);
  627 }
  628 
  629 /**************************************************************************/
  630 
  631 /* Keep re-solving as long as spurious solutions are eliminated.
  632 
  633    Currently, it is recommended that you use harminv_solve (i.e. pass ok = 0);
  634    see harminv_solve_again. */
  635 void harminv_solve_ok_modes(harminv_data d, harminv_mode_ok_func ok, void *ok_d) {
  636 
  637   harminv_solve_once(d);
  638 
  639   /* This is not in the papers, but seems to be a good idea:
  640      plug the u's back in as z's for another pass, and repeat
  641      as long as the number of eigenvalues decreases.   Effectively,
  642      this gives us more basis functions where the modes are. */
  643   {
  644     int prev_nf, cur_nf, nf_ok;
  645     cur_nf = harminv_get_num_freqs(d);
  646     do {
  647       prev_nf = cur_nf;
  648       harminv_solve_again(d, ok, ok_d);
  649       cur_nf = harminv_get_num_freqs(d);
  650       if (ok) {
  651         ok(d, -1, ok_d); /* initialize */
  652         for (nf_ok = 0; nf_ok < cur_nf && ok(d, nf_ok, ok_d); ++nf_ok)
  653           ;
  654         ok(d, -2, ok_d); /* finish */
  655       }
  656       else
  657         nf_ok = cur_nf;
  658     } while (cur_nf < prev_nf || nf_ok < cur_nf);
  659     /* FIXME: solve one more time for good measure? */
  660   }
  661 }
  662 
  663 /**************************************************************************/
  664 
  665 void harminv_solve(harminv_data d) { harminv_solve_ok_modes(d, NULL, NULL); }
  666 
  667 /**************************************************************************/
  668 
  669 #define NORMSQR(c) (creal(c) * creal(c) + cimag(c) * cimag(c))
  670 
  671 /* Returns an array (of size harminv_get_num_freqs(d)) of estimates
  672    for the |error| in the solution frequencies.  Solutions with
  673    errors much larger than the smallest error are likely to be spurious. */
  674 double *harminv_compute_frequency_errors(harminv_data d) {
  675   int i, J2, one = 1;
  676   cmplx *U2, *U2b;
  677   double *freq_err;
  678 
  679   CHECK(d->nfreqs >= 0, "haven't computed eigensolutions yet");
  680   if (!d->nfreqs) return NULL;
  681 
  682   CHK_MALLOC(freq_err, double, d->nfreqs);
  683 
  684   J2 = d->J * d->J;
  685   CHK_MALLOC(U2, cmplx, J2);
  686   generate_U(U2, NULL, 2, d->c, d->n, d->K, d->J, d->J, d->z, d->z, NULL, NULL, NULL);
  687   CHK_MALLOC(U2b, cmplx, d->J);
  688 
  689   /* For each eigenstate, compute an estimate of the error, roughly
  690      as suggested in W&N, eq. (2.19). */
  691 
  692   for (i = 0; i < d->nfreqs; ++i) {
  693     cmplx zone = 1.0, zzero = 0.0;
  694 
  695     /* compute U2b = U2 * B[i] */
  696     ZGEMV(F_("T"), &d->J, &d->J, &zone, U2, &d->J, d->B + i * d->J, &one, &zzero, U2b, &one);
  697 
  698     /* ideally, B[i] should satisfy U2 B[i] = u^2 U0 B[i].
  699        since B U0 B = 1, then we can get a second estimate
  700        for u by sqrt(B[i] U2 B[i]), and from this we compute
  701        the relative error in the (complex) frequency. */
  702 
  703     freq_err[i] = cabs(clog(csqrt(symmetric_dot(d->J, d->B + i * d->J, U2b)) / d->u[i])) /
  704                   cabs(clog(d->u[i]));
  705   }
  706 
  707   free(U2b);
  708   free(U2);
  709   return freq_err;
  710 }
  711 
  712 /**************************************************************************/
  713 
  714 #define UNITY_THRESH 1e-4 /* FIXME? */
  715 
  716 /* true if UNITY_THRESH < |u|^n < 1/UNITY_THRESH. */
  717 static int u_near_unity(cmplx u, int n) {
  718   double nlgabsu = n * log(cabs(u));
  719   return (log(UNITY_THRESH) < nlgabsu && nlgabsu < -log(UNITY_THRESH));
  720 }
  721 
  722 /* Return an array (of size harminv_get_num_freqs(d)) of complex
  723    amplitudes of each sinusoid in the solution. */
  724 cmplx *harminv_compute_amplitudes(harminv_data d) {
  725   int k, j;
  726   cmplx *u;
  727   cmplx *Uu;
  728   cmplx *a; /* the amplitudes of the eigenfrequencies */
  729   int ku, nu;
  730 
  731   CHECK(d->nfreqs >= 0, "haven't computed eigensolutions yet");
  732   if (!d->nfreqs) return NULL;
  733 
  734   CHK_MALLOC(a, cmplx, d->nfreqs);
  735   CHK_MALLOC(u, cmplx, d->nfreqs);
  736 
  737   for (k = ku = 0; k < d->nfreqs; ++k)
  738     if (u_near_unity(d->u[k], d->n)) u[ku++] = d->u[k];
  739   nu = ku;
  740 
  741   CHK_MALLOC(Uu, cmplx, d->J * nu);
  742   generate_U(Uu, NULL, 0, d->c, d->n, d->K, d->J, nu, d->z, u, &d->G0, &d->G0_M, &d->D0);
  743 
  744   /* compute the amplitudes via eq. 27 of M&T, except when |u| is
  745      too small..in that case, the computation of Uu is unstable,
  746      and we use eq. 26 instead (which doesn't use half of the data,
  747      but doesn't blow up either): */
  748   for (k = ku = 0; k < d->nfreqs; ++k) {
  749     cmplxl asum = 0;
  750     if (u_near_unity(d->u[k], d->n)) { /* eq. 27 */
  751       for (j = 0; j < d->J; ++j)
  752         asum += d->B[k * d->J + j] * Uu[j * nu + ku];
  753       asum /= d->K;
  754       ku++;
  755     }
  756     else { /* eq. 26 */
  757       for (j = 0; j < d->J; ++j)
  758         asum += d->B[k * d->J + j] * d->G0[j];
  759     }
  760     a[k] = asum * asum;
  761   }
  762 
  763   free(Uu);
  764   free(u);
  765   return a;
  766 }
  767 
  768 /**************************************************************************/
  769 
  770 int harminv_get_num_freqs(const harminv_data d) { return d->nfreqs; }
  771 
  772 double harminv_get_freq(harminv_data d, int k) {
  773   CHECK(d->nfreqs >= 0, "haven't computed eigensolutions yet");
  774   CHECK(k >= 0 && k < d->nfreqs, "argument out of range in harminv_get_freq");
  775   return (-carg(d->u[k]) / TWOPI);
  776 }
  777 
  778 double harminv_get_decay(harminv_data d, int k) {
  779   CHECK(d->nfreqs >= 0, "haven't computed eigensolutions yet");
  780   CHECK(k >= 0 && k < d->nfreqs, "argument out of range in harminv_get_decay");
  781   return (-log(cabs(d->u[k])));
  782 }
  783 
  784 double harminv_get_Q(harminv_data d, int k) {
  785   CHECK(k >= 0 && k < d->nfreqs, "argument out of range in harminv_get_Q");
  786   return (TWOPI * fabs(harminv_get_freq(d, k)) / (2 * harminv_get_decay(d, k)));
  787 }
  788 
  789 void harminv_get_omega(cmplx *omega, harminv_data d, int k) {
  790   CHECK(d->nfreqs >= 0, "haven't computed eigensolutions yet");
  791   CHECK(k >= 0 && k < d->nfreqs, "argument out of range in harminv_get_omega");
  792   *omega = (I * clog(d->u[k]));
  793   return;
  794 }
  795 
  796 void harminv_get_amplitude(cmplx *amplitude, harminv_data d, int k) {
  797   CHECK(k >= 0 && k < d->nfreqs, "argument out of range in harminv_get_amplitude");
  798   if (!d->amps) d->amps = harminv_compute_amplitudes(d);
  799   *amplitude = d->amps[k];
  800   return;
  801 }
  802 
  803 double harminv_get_freq_error(harminv_data d, int k) {
  804   CHECK(k >= 0 && k < d->nfreqs, "argument out of range in harminv_get_freq_error");
  805   if (!d->errs) d->errs = harminv_compute_frequency_errors(d);
  806   return d->errs[k];
  807 }
  808 
  809 /**************************************************************************/