"Fossies" - the Fresh Open Source Software Archive

Member "gretl-2020e/lib/src/estimate.c" (9 Sep 2020, 102467 Bytes) of package /linux/misc/gretl-2020e.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "estimate.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2020d_vs_2020e.

    1 /*
    2  *  gretl -- Gnu Regression, Econometrics and Time-series Library
    3  *  Copyright (C) 2001 Allin Cottrell and Riccardo "Jack" Lucchetti
    4  *
    5  *  This program is free software: you can redistribute it and/or modify
    6  *  it under the terms of the GNU General Public License as published by
    7  *  the Free Software Foundation, either version 3 of the License, or
    8  *  (at your option) any later version.
    9  *
   10  *  This program is distributed in the hope that it will be useful,
   11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  *  GNU General Public License for more details.
   14  *
   15  *  You should have received a copy of the GNU General Public License
   16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  *
   18  */
   19 
   20 /* estimate.c - basic gretl estimation procedures */
   21 
   22 #include "libgretl.h"
   23 #include "qr_estimate.h"
   24 #include "gretl_panel.h"
   25 #include "libset.h"
   26 #include "compat.h"
   27 #include "missing_private.h"
   28 #include "estim_private.h"
   29 #include "system.h"
   30 #include "tsls.h"
   31 #include "nls.h"
   32 
   33 #ifdef WIN32
   34 # include "gretl_win32.h"
   35 #endif
   36 
   37 /**
   38  * SECTION:estimate
   39  * @short_description: estimation of regression models
   40  * @title: Estimation
   41  * @include: gretl/libgretl.h
   42  *
   43  * Most libgretl functions that estimate regression models are
   44  * collected here.
   45  *
   46  * Please note that most of these functions return a #MODEL
   47  * struct -- literally a struct, not a pointer to one.
   48  * To handle such a return value in C you should either (a)
   49  * declare a MODEL and assign to it, or (b) allocate a MODEL
   50  * "on the heap" and then assign to its content using the
   51  * indirection operator. The code fragment below illustrates
   52  * both approaches.
   53  *
   54  * <informalexample><programlisting>
   55  * #include &lt;gretl/libgretl.h&gt;
   56  *
   57  * int ols_using_gretl (const int *list, DATASET *dset,
   58  *                      PRN *prn)
   59  * {
   60  *     MODEL model;
   61  *     MODEL *pmod;
   62  *     int err;
   63  *
   64  *     // method (a)
   65  *     model = lsq(list, dset, OLS, OPT_NONE);
   66  *     err = model.errcode;
   67  *     if (!err) {
   68  *         printmodel(&amp;model, dset, OPT_NONE, prn);
   69  *     }
   70  *     clear_model(&amp;model);
   71  *
   72  *     // method (b)
   73  *     pmod = gretl_model_new();
   74  *     *pmod = lsq(list, dset, OLS, OPT_NONE);
   75  *     err = pmod->errcode;
   76  *     if (!err) {
   77  *         printmodel(pmod, dset, OPT_NONE, prn);
   78  *     }
   79  *     gretl_model_free(pmod);
   80  *
   81  *     return err;
   82  * }
   83  * </programlisting></informalexample>
   84  *
   85  */
   86 
   87 /* Comment on 'TINY': It's the minimum value for 'test' (see below)
   88    that libgretl's Cholesky decomposition routine will accept before
   89    rejecting a data matrix as too highly collinear.  If you set it too
   90    high, data sets for which Cholesky could produce reasonable
   91    estimates will be rejected.  If you set it too low (and 100 *
   92    DBL_EPSILON is definitely too low), gretl will produce more or less
   93    worthless coefficient estimates when given highly collinear data.
   94    Before making a permanent change to the value of TINY, check how
   95    gretl does on the NIST reference data sets for linear regression
   96    and ensure you're not getting any garbage results.  The current
   97    value enables us to get decent results on the NIST nonlinear
   98    regression test suite; it might be a bit too low for some contexts.
   99 */
  100 
  101 #define TINY      8.0e-09 /* was 2.1e-09 (last changed 2007/01/20) */
  102 #define SMALL     2.0e-08 /* threshold for printing a warning for collinearity */
  103 #define YBARZERO  0.5e-14 /* threshold for treating mean of dependent
  104                  variable as effectively zero */
  105 #define ESSZERO   1.0e-22 /* threshold for considering a tiny error-sum-of-
  106                  squares value to be effectively zero */
  107 
  108 #define XPX_DEBUG 0
  109 
  110 static void regress (MODEL *pmod, double *xpy,
  111              double ysum, double ypy,
  112              const DATASET *dset,
  113              double rho, gretlopt opt);
  114 static void omitzero (MODEL *pmod, const DATASET *dset,
  115               gretlopt opt);
  116 static int lagdepvar (const int *list, const DATASET *dset);
  117 static double estimate_rho (const int *list, DATASET *dset,
  118                 gretlopt opt, PRN *prn, int *err);
  119 
  120 /* compute statistics for the dependent variable in a model */
  121 
  122 static void model_depvar_stats (MODEL *pmod, DATASET *dset)
  123 {
  124     double xx, sum = 0.0;
  125     int yno = pmod->list[1];
  126     int t, dwt = 0;
  127 
  128     if (pmod->ci == WLS && gretl_model_get_int(pmod, "wt_dummy")) {
  129     dwt = pmod->nwt;
  130     }
  131 
  132     pmod->ybar = pmod->sdy = NADBL;
  133 
  134     if (pmod->nobs <= 0) {
  135     return;
  136     }
  137 
  138     for (t=pmod->t1; t<=pmod->t2; t++) {
  139     if (dwt && dset->Z[pmod->nwt][t] == 0.0) {
  140         continue;
  141     }
  142     if (!model_missing(pmod, t)) {
  143         sum += dset->Z[yno][t];
  144     }
  145     }
  146 
  147     pmod->ybar = sum / pmod->nobs;
  148 
  149     sum = 0.0;
  150     for (t=pmod->t1; t<=pmod->t2; t++) {
  151     if (dwt && dset->Z[pmod->nwt][t] == 0.0) {
  152         continue;
  153     }
  154     if (!model_missing(pmod, t)) {
  155         sum += (dset->Z[yno][t] - pmod->ybar);
  156     }
  157     }
  158 
  159     pmod->ybar = pmod->ybar + sum / pmod->nobs;
  160 
  161     if (fabs(pmod->ybar) < YBARZERO) {
  162     pmod->ybar = 0.0;
  163     }
  164 
  165     sum = 0.0;
  166     for (t=pmod->t1; t<=pmod->t2; t++) {
  167     if (dwt && dset->Z[pmod->nwt][t] == 0.0) {
  168         continue;
  169     }
  170     if (!model_missing(pmod, t)) {
  171         xx = dset->Z[yno][t] - pmod->ybar;
  172         sum += xx * xx;
  173     }
  174     }
  175 
  176     sum = (pmod->nobs > 1)? sum / (pmod->nobs - 1) : 0.0;
  177 
  178     pmod->sdy = (sum >= 0)? sqrt(sum) : NADBL;
  179 }
  180 
  181 /* determine the degrees of freedom for a model */
  182 
  183 static int get_model_df (MODEL *pmod)
  184 {
  185     int err = 0;
  186 
  187     pmod->ncoeff = pmod->list[0] - 1;
  188     pmod->dfd = pmod->nobs - pmod->ncoeff;
  189 
  190     if (pmod->dfd < 0) {
  191     pmod->errcode = E_DF;
  192         gretl_errmsg_sprintf(_("No. of obs (%d) is less than no. "
  193                    "of parameters (%d)"), pmod->nobs,
  194                  pmod->ncoeff);
  195     err = 1;
  196     } else {
  197     pmod->dfn = pmod->ncoeff - pmod->ifc;
  198     }
  199 
  200     return err;
  201 }
  202 
  203 #define LDDEBUG 0
  204 
  205 static int transcribe_ld_vcv (MODEL *targ, MODEL *src)
  206 {
  207     int nv = targ->ncoeff;
  208     int nxpx = (nv * nv + nv) / 2;
  209     int i, j, err = 0;
  210 
  211     err = makevcv(src, src->sigma);
  212 
  213     if (!err && targ->vcv == NULL) {
  214     targ->vcv = malloc(nxpx * sizeof *targ->vcv);
  215     if (targ->vcv == NULL) {
  216         err = E_ALLOC;
  217     }
  218     }
  219 
  220     if (!err) {
  221     for (i=0; i<nv; i++) {
  222         for (j=i; j<nv; j++) {
  223         targ->vcv[ijton(i, j, nv)] =
  224             src->vcv[ijton(i, j, src->ncoeff)];
  225         }
  226     }
  227     }
  228 
  229     return err;
  230 }
  231 
  232 /* Calculate consistent standard errors (and VCV matrix) when doing
  233    AR(1) estimation of a model with lagged dependent variable.  See
  234    Ramanathan, Introductory Econometrics, 5e, p. 450.
  235 */
  236 
  237 static int
  238 ldepvar_std_errors (MODEL *pmod, DATASET *dset)
  239 {
  240     MODEL emod;
  241     const double *x;
  242     int orig_t1 = dset->t1;
  243     int orig_t2 = dset->t2;
  244     double rho = gretl_model_get_double(pmod, "rho_gls");
  245     int origv = dset->v;
  246     int vnew = pmod->list[0] + 1 - pmod->ifc;
  247     int *list;
  248     int vi, vm;
  249     int i, t;
  250     int err = 0;
  251 
  252 #if LDDEBUG
  253     PRN *prn = gretl_print_new(GRETL_PRINT_STDOUT, NULL);
  254     printlist(pmod->list, "pmod->list");
  255     printf("vnew = %d\n", vnew);
  256     printf("rho = %g\n", rho);
  257 #endif
  258 
  259     list = gretl_list_new(vnew + pmod->ifc);
  260     if (list == NULL) {
  261     pmod->errcode = E_ALLOC;
  262     return 1;
  263     }
  264 
  265     err = dataset_add_series(dset, vnew);
  266     if (err) {
  267     free(list);
  268     pmod->errcode = E_ALLOC;
  269     return 1;
  270     }
  271 
  272     vi = origv;
  273 
  274     /* dependent var is residual from original model */
  275     for (t=0; t<dset->n; t++) {
  276     dset->Z[vi][t] = pmod->uhat[t];
  277     }
  278     strcpy(dset->varname[vi], "eps");
  279     list[1] = vi++;
  280 
  281     /* indep vars are rho-differenced vars from original model */
  282     for (i=2; i<=pmod->list[0]; i++) {
  283     vm = pmod->list[i];
  284     if (vm == 0) {
  285         list[i] = 0;
  286         continue;
  287     }
  288     sprintf(dset->varname[vi], "%.6s_r", dset->varname[vm]);
  289     x = dset->Z[vm];
  290     for (t=0; t<dset->n; t++) {
  291         if (t == 0 || na(x[t]) || na(x[t-1])) {
  292         dset->Z[vi][t] = NADBL;
  293         } else {
  294         dset->Z[vi][t] = x[t] - rho * x[t-1];
  295         }
  296     }
  297     list[i] = vi++;
  298     }
  299 
  300     /* last indep var is lagged u-hat */
  301     for (t=0; t<dset->n; t++) {
  302     if (t == 0) {
  303         dset->Z[vi][t] = NADBL;
  304     } else {
  305         dset->Z[vi][t] = dset->Z[pmod->list[1]][t-1];
  306     }
  307     if (na(dset->Z[vi][t])) {
  308         continue;
  309     }
  310     for (i=0; i<pmod->ncoeff; i++) {
  311         x = dset->Z[pmod->list[i+2]];
  312         if (na(x[t-1])) {
  313         dset->Z[vi][t] = NADBL;
  314         break;
  315         } else {
  316         dset->Z[vi][t] -= pmod->coeff[i] * x[t-1];
  317         }
  318     }
  319     }
  320 
  321     list[list[0]] = vi;
  322     strcpy(dset->varname[vi], "uhat_1");
  323 
  324     dset->t1 = pmod->t1;
  325     dset->t2 = pmod->t2;
  326 
  327     emod = lsq(list, dset, OLS, OPT_A);
  328     if (emod.errcode) {
  329     err = emod.errcode;
  330     } else {
  331 #if LDDEBUG
  332     printmodel(&emod, dset, OPT_NONE, prn);
  333     gretl_print_destroy(prn);
  334 #endif
  335     for (i=0; i<pmod->ncoeff; i++) {
  336         pmod->sderr[i] = emod.sderr[i];
  337     }
  338 
  339     err = transcribe_ld_vcv(pmod, &emod);
  340     }
  341 
  342     clear_model(&emod);
  343 
  344     free(list);
  345     dataset_drop_last_variables(dset, vnew);
  346 
  347     dset->t1 = orig_t1;
  348     dset->t2 = orig_t2;
  349 
  350     if (err && !pmod->errcode) {
  351     pmod->errcode = err;
  352     }
  353 
  354     return err;
  355 }
  356 
  357 /* special computation of statistics for autoregressive models */
  358 
  359 static int compute_ar_stats (MODEL *pmod, const DATASET *dset,
  360                  double rho)
  361 {
  362     int i, v, t, yno = pmod->list[1];
  363     int pwe = (pmod->opt & OPT_P);
  364     double x, pw1 = 0.0;
  365 
  366     if (pwe) {
  367     pw1 = sqrt(1.0 - rho * rho);
  368     }
  369 
  370     for (t=pmod->t1; t<=pmod->t2; t++) {
  371     if (t == pmod->t1 && pwe) {
  372         x = pw1 * dset->Z[yno][t];
  373         for (i=pmod->ifc; i<pmod->ncoeff; i++) {
  374         v = pmod->list[i+2];
  375         x -= pmod->coeff[i] * pw1 * dset->Z[v][t];
  376         }
  377         if (pmod->ifc) {
  378         x -= pw1 * pmod->coeff[0];
  379         }
  380     } else {
  381         x = dset->Z[yno][t] - rho*dset->Z[yno][t-1];
  382         for (i=0; i<pmod->ncoeff; i++) {
  383         v = pmod->list[i+2];
  384         x -= pmod->coeff[i] * (dset->Z[v][t] - rho*dset->Z[v][t-1]);
  385         }
  386     }
  387     pmod->uhat[t] = x;
  388     pmod->yhat[t] = dset->Z[yno][t] - x;
  389     }
  390 
  391     pmod->rsq = gretl_corr_rsq(pmod->t1, pmod->t2, dset->Z[yno], pmod->yhat);
  392     pmod->adjrsq =
  393     1.0 - ((1.0 - pmod->rsq) * (pmod->t2 - pmod->t1) /
  394            (double) pmod->dfd);
  395 
  396     return 0;
  397 }
  398 
  399 /* calculation of WLS stats (in agreement with GNU R) */
  400 
  401 static void get_wls_stats (MODEL *pmod, const DATASET *dset,
  402                gretlopt opt)
  403 {
  404     int dumwt = gretl_model_get_int(pmod, "wt_dummy");
  405     int t, wobs = pmod->nobs, yno = pmod->list[1];
  406     double x, dy, wmean = 0.0, wsum = 0.0;
  407 
  408     for (t=pmod->t1; t<=pmod->t2; t++) {
  409     if (model_missing(pmod, t)) {
  410         continue;
  411     }
  412     if (dset->Z[pmod->nwt][t] == 0.0 && !dumwt) {
  413         wobs--;
  414         pmod->dfd -= 1;
  415     } else {
  416         wmean += dset->Z[pmod->nwt][t] * dset->Z[yno][t];
  417         wsum += dset->Z[pmod->nwt][t];
  418     }
  419     }
  420 
  421     wmean /= wsum;
  422     x = 0.0;
  423 
  424     for (t=pmod->t1; t<=pmod->t2; t++) {
  425     if (model_missing(pmod, t) || dset->Z[pmod->nwt][t] == 0.0) {
  426         continue;
  427     }
  428     dy = dset->Z[yno][t] - wmean;
  429     x += dset->Z[pmod->nwt][t] * dy * dy;
  430     }
  431 
  432     if (!(opt & OPT_R)) {
  433     pmod->fstt = ((x - pmod->ess) * pmod->dfd) / (pmod->dfn * pmod->ess);
  434     }
  435     pmod->rsq = (1 - (pmod->ess / x));
  436     pmod->adjrsq = 1 - ((1 - pmod->rsq) * (pmod->nobs - 1)/pmod->dfd);
  437 }
  438 
  439 static void fix_wls_values (MODEL *pmod, const DATASET *dset)
  440 {
  441     int t;
  442 
  443     if (gretl_model_get_int(pmod, "wt_dummy")) {
  444     for (t=pmod->t1; t<=pmod->t2; t++) {
  445         if (dset->Z[pmod->nwt][t] == 0.0) {
  446         pmod->yhat[t] = pmod->uhat[t] = NADBL;
  447         }
  448     }
  449     } else {
  450     double ess_orig = 0.0;
  451     double sw, sigma_orig;
  452 
  453     for (t=pmod->t1; t<=pmod->t2; t++) {
  454         if (model_missing(pmod, t)) {
  455         continue;
  456         }
  457         if (dset->Z[pmod->nwt][t] == 0.0) {
  458         pmod->yhat[t] = pmod->uhat[t] = NADBL;
  459         pmod->nobs -= 1;
  460         } else {
  461         sw = sqrt(dset->Z[pmod->nwt][t]);
  462         pmod->yhat[t] /= sw;
  463         pmod->uhat[t] /= sw;
  464         ess_orig += pmod->uhat[t] * pmod->uhat[t];
  465         }
  466     }
  467 
  468     sigma_orig = sqrt(ess_orig / pmod->dfd);
  469     gretl_model_set_double(pmod, "ess_orig", ess_orig);
  470     gretl_model_set_double(pmod, "sigma_orig", sigma_orig);
  471     }
  472 }
  473 
  474 /* drop the weight var from the list of regressors (WLS) */
  475 
  476 static void dropwt (int *list)
  477 {
  478     int i;
  479 
  480     list[0] -= 1;
  481     for (i=1; i<=list[0]; i++) {
  482     list[i] = list[i+1];
  483     }
  484 }
  485 
  486 static int model_missval_count (const MODEL *pmod)
  487 {
  488     int mc = 0;
  489 
  490     if (pmod->missmask != NULL) {
  491     int t;
  492 
  493     for (t=pmod->t1; t<=pmod->t2; t++) {
  494         if (pmod->missmask[t] == '1') {
  495         mc++;
  496         }
  497     }
  498     }
  499 
  500     return mc;
  501 }
  502 
  503 #define SMPL_DEBUG 0
  504 
  505 static int
  506 lsq_check_for_missing_obs (MODEL *pmod, gretlopt opts, DATASET *dset,
  507                int *misst)
  508 {
  509     int ref_mask = reference_missmask_present();
  510     int missv = 0;
  511     int reject_missing = 0;
  512 
  513 #if SMPL_DEBUG
  514     fprintf(stderr, "lsq_check_for_missing_obs: ref_mask = %d\n",
  515         ref_mask);
  516 #endif
  517 
  518     if (ref_mask) {
  519     int err = apply_reference_missmask(pmod);
  520 
  521     /* If there was a reference mask present, it was put there
  522        as part of a hypothesis test on some original model, and
  523        it should be respected in estimation of this model */
  524 
  525     if (err) {
  526         pmod->errcode = E_ALLOC;
  527         return 1;
  528     } else {
  529         return 0;
  530     }
  531     }
  532 
  533     /* can't do HAC VCV with missing obs in middle */
  534     if ((opts & OPT_R) && dataset_is_time_series(dset) &&
  535     !libset_get_bool(FORCE_HC)) {
  536     reject_missing = 1;
  537     }
  538 
  539     if (opts & OPT_M) {
  540     reject_missing = 1;
  541     }
  542 
  543     if (reject_missing) {
  544     /* reject missing obs within adjusted sample */
  545     missv = model_adjust_sample(pmod, dset->n,
  546                     (const double **) dset->Z,
  547                     misst);
  548     } else {
  549     /* we'll try to work around missing obs */
  550     missv = model_adjust_sample(pmod, dset->n,
  551                     (const double **) dset->Z,
  552                     NULL);
  553     }
  554 
  555 #if SMPL_DEBUG
  556     if (1) {
  557     char t1s[OBSLEN], t2s[OBSLEN];
  558     int misscount = model_missval_count(pmod);
  559 
  560     ntolabel(t1s, pmod->t1, dset);
  561     ntolabel(t2s, pmod->t2, dset);
  562     fprintf(stderr, "*** after adjustment, t1=%d (%s), t2=%d (%s)\n",
  563         pmod->t1, t1s, pmod->t2, t2s);
  564     fprintf(stderr, "Valid observations in range = %d\n",
  565         pmod->t2 - pmod->t1 + 1 - misscount);
  566     }
  567 #endif
  568 
  569     return missv;
  570 }
  571 
  572 static int
  573 lagged_depvar_check (MODEL *pmod, const DATASET *dset)
  574 {
  575     int ldv = lagdepvar(pmod->list, dset);
  576 
  577     if (ldv) {
  578     gretl_model_set_int(pmod, "ldepvar", ldv);
  579     } else if (gretl_model_get_int(pmod, "ldepvar")) {
  580     gretl_model_destroy_data_item(pmod, "ldepvar");
  581     }
  582 
  583     return ldv;
  584 }
  585 
  586 static void
  587 log_depvar_ll (MODEL *pmod, const DATASET *dset)
  588 {
  589     char parent[VNAMELEN];
  590 
  591     if (series_is_log(dset, pmod->list[1], parent)) {
  592     double jll = pmod->lnL;
  593     int t;
  594 
  595     for (t=0; t<dset->n; t++) {
  596         if (!na(pmod->uhat[t])) {
  597         jll -= dset->Z[pmod->list[1]][t];
  598         }
  599     }
  600     gretl_model_set_double(pmod, "jll", jll);
  601     gretl_model_set_string_as_data(pmod,
  602                        "log-parent",
  603                        gretl_strdup(parent));
  604     }
  605 }
  606 
  607 static int check_weight_var (MODEL *pmod, const double *w, int *effobs)
  608 {
  609     const char *wtzero =
  610     N_("Weight variable is all zeros, aborting regression");
  611     const char *wtneg =
  612     N_("Weight variable contains negative values");
  613     int t, allzero = 1;
  614 
  615     for (t=pmod->t1; t<=pmod->t2; t++) {
  616     if (w[t] < 0.0) {
  617         gretl_errmsg_set(_(wtneg));
  618         pmod->errcode = E_DATA;
  619         return 1;
  620     } else if (w[t] > 0.0) {
  621         allzero = 0;
  622     }
  623     }
  624 
  625     if (allzero) {
  626     gretl_errmsg_set(_(wtzero));
  627     pmod->errcode = E_DATA;
  628     return 1;
  629     }
  630 
  631     *effobs = gretl_isdummy(pmod->t1, pmod->t2, w);
  632 
  633     if (*effobs) {
  634     /* the weight var is a dummy, with effobs 1s */
  635     gretl_model_set_int(pmod, "wt_dummy", 1);
  636     }
  637 
  638     return 0;
  639 }
  640 
  641 void maybe_shift_ldepvar (MODEL *pmod, DATASET *dset)
  642 {
  643     if (gretl_model_get_int(pmod, "ldepvar")) {
  644     lagged_depvar_check(pmod, dset);
  645     }
  646 }
  647 
  648 /*
  649  * XTX_XTy:
  650  * @list: list of variables in model.
  651  * @t1: starting observation.
  652  * @t2: ending observation.
  653  * @dset: pointer to dataset.
  654  * @nwt: ID number of variable used as weight, or 0.
  655  * @rho: quasi-differencing coefficent, or 0.0;
  656  * @pwe: if non-zero, use Prais-Winsten for first observation.
  657  * @xpx: on output, X'X matrix as lower triangle, stacked by columns.
  658  * @xpy: on output, X'y vector (but see below).
  659  * @ysum: location to recieve \sum y_i, or NULL.
  660  * @ypy: location to recieve (scalar) y'y, or NULL.
  661  * @mask: missing observations mask, or NULL.
  662  *
  663  * Calculates X'X and X'y, with various possible transformations
  664  * of the original data, depending on @nwt, @rho and @pwe.
  665  * If X'y is not required, @xpy can be given as NULL.
  666  *
  667  * Note: the y-related pointer arguments @xpy, @ysum, and @ypy
  668  * form a "package": either all should be given, or all should
  669  * be NULL.
  670  *
  671  * Returns: 0 on success, non-zero on error.
  672  */
  673 
  674 static int XTX_XTy (const int *list, int t1, int t2,
  675             const DATASET *dset, int nwt,
  676             double rho, int pwe,
  677             double *xpx, double *xpy,
  678             double *ysum, double *ypy,
  679             const char *mask)
  680 {
  681     int yno = list[1];
  682     int lmin = (xpy != NULL)? 2 : 1;
  683     int lmax = list[0];
  684     int qdiff = (rho != 0.0);
  685     const double *y = NULL;
  686     const double *w = NULL;
  687     const double *xi = NULL;
  688     const double *xj = NULL;
  689     double x, pw1;
  690     int i, j, t, m;
  691     int err = 0;
  692 
  693     /* Prais-Winsten term */
  694     if (qdiff && pwe) {
  695     pw1 = sqrt(1.0 - rho * rho);
  696     } else {
  697     pwe = 0;
  698     pw1 = 0.0;
  699     }
  700 
  701     /* dependent variable */
  702     y = dset->Z[yno];
  703 
  704     if (nwt) {
  705     /* weight variable */
  706     w = dset->Z[nwt];
  707     }
  708 
  709     if (xpy != NULL) {
  710     *ysum = *ypy = 0.0;
  711 
  712     for (t=t1; t<=t2; t++) {
  713         if (masked(mask, t)) {
  714         continue;
  715         }
  716         x = y[t];
  717         if (qdiff) {
  718         if (pwe && t == t1) {
  719             x = pw1 * y[t];
  720         } else {
  721             x -= rho * y[t-1];
  722         }
  723         } else if (nwt) {
  724         x *= sqrt(w[t]);
  725         }
  726         *ysum += x;
  727         *ypy += x * x;
  728     }
  729 
  730     if (*ypy <= 0.0) {
  731         /* error condition */
  732         return yno;
  733     }
  734     }
  735 
  736     m = 0;
  737 
  738     if (qdiff) {
  739     /* quasi-difference the data */
  740     for (i=lmin; i<=lmax; i++) {
  741         xi = dset->Z[list[i]];
  742         for (j=i; j<=lmax; j++) {
  743         xj = dset->Z[list[j]];
  744         x = 0.0;
  745         for (t=t1; t<=t2; t++) {
  746             if (pwe && t == t1) {
  747             x += pw1 * xi[t1] * pw1 * xj[t];
  748             } else {
  749             x += (xi[t] - rho * xi[t-1]) *
  750                 (xj[t] - rho * xj[t-1]);
  751             }
  752         }
  753         if (i == j && x < DBL_EPSILON)  {
  754             return E_SINGULAR;
  755         }
  756         xpx[m++] = x;
  757         }
  758         if (xpy != NULL) {
  759         x = 0.0;
  760         for (t=t1; t<=t2; t++) {
  761             if (pwe && t == t1) {
  762             x += pw1 * y[t] * pw1 * xi[t];
  763             } else {
  764             x += (y[t] - rho * y[t-1]) *
  765                 (xi[t] - rho * xi[t-1]);
  766             }
  767         }
  768         xpy[i-2] = x;
  769         }
  770     }
  771     } else if (nwt) {
  772     /* weight the data */
  773     for (i=lmin; i<=lmax; i++) {
  774         xi = dset->Z[list[i]];
  775         for (j=i; j<=lmax; j++) {
  776         xj = dset->Z[list[j]];
  777         x = 0.0;
  778         for (t=t1; t<=t2; t++) {
  779             if (!masked(mask, t)) {
  780             x += w[t] * xi[t] * xj[t];
  781             }
  782         }
  783         if (i == j && x < DBL_EPSILON) {
  784             return E_SINGULAR;
  785         }
  786         xpx[m++] = x;
  787         }
  788         if (xpy != NULL) {
  789         x = 0.0;
  790         for (t=t1; t<=t2; t++) {
  791             if (!masked(mask, t)) {
  792             x += w[t] * y[t] * xi[t];
  793             }
  794         }
  795         xpy[i-2] = x;
  796         }
  797     }
  798     } else if (mask != NULL) {
  799     /* plain data, but with missing obs mask */
  800     for (i=lmin; i<=lmax; i++) {
  801         xi = dset->Z[list[i]];
  802         for (j=i; j<=lmax; j++) {
  803         xj = dset->Z[list[j]];
  804         x = 0.0;
  805         for (t=t1; t<=t2; t++) {
  806             if (!masked(mask, t)) {
  807             x += xi[t] * xj[t];
  808             }
  809         }
  810         if (i == j && x < DBL_EPSILON) {
  811             return E_SINGULAR;
  812         }
  813         xpx[m++] = x;
  814         }
  815         if (xpy != NULL) {
  816         x = 0.0;
  817         for (t=t1; t<=t2; t++) {
  818             if (!masked(mask, t)) {
  819             x += y[t] * xi[t];
  820             }
  821         }
  822         xpy[i-2] = x;
  823         }
  824     }
  825     } else {
  826     /* plain data, no missing obs mask */
  827     for (i=lmin; i<=lmax; i++) {
  828         xi = dset->Z[list[i]];
  829         for (j=i; j<=lmax; j++) {
  830         xj = dset->Z[list[j]];
  831         x = 0.0;
  832         for (t=t1; t<=t2; t++) {
  833             x += xi[t] * xj[t];
  834         }
  835         if (i == j && x < DBL_EPSILON) {
  836             return E_SINGULAR;
  837         }
  838         xpx[m++] = x;
  839         }
  840         if (xpy != NULL) {
  841         x = 0.0;
  842         for (t=t1; t<=t2; t++) {
  843             x += y[t] * xi[t];
  844         }
  845         xpy[i-2] = x;
  846         }
  847     }
  848     }
  849 
  850     return err;
  851 }
  852 
  853 static int native_cholesky_regress (MODEL *pmod, const DATASET *dset,
  854                     gretlopt opt)
  855 {
  856     double rho, ysum = 0.0, ypy = 0.0;
  857     double *xpy;
  858     int k = pmod->ncoeff;
  859     int nxpx = k * (k + 1) / 2;
  860     int pwe = (pmod->opt & OPT_P);
  861     int i;
  862 
  863     if (nxpx == 0) {
  864     fprintf(stderr, "problem: nxpx = 0\n");
  865     pmod->errcode = E_DATA;
  866     return pmod->errcode;
  867     }
  868 
  869     xpy = malloc(k * sizeof *xpy);
  870     if (xpy == NULL) {
  871     pmod->errcode = E_ALLOC;
  872     return pmod->errcode;
  873     }
  874 
  875     pmod->xpx = malloc(nxpx * sizeof *pmod->xpx);
  876     pmod->coeff = malloc(k * sizeof *pmod->coeff);
  877     pmod->sderr = malloc(k * sizeof *pmod->sderr);
  878 
  879     if (pmod->yhat == NULL) {
  880     pmod->yhat = malloc(pmod->full_n * sizeof *pmod->yhat);
  881     }
  882 
  883     if (pmod->uhat == NULL) {
  884     pmod->uhat = malloc(pmod->full_n * sizeof *pmod->uhat);
  885     }
  886 
  887     if (pmod->xpx == NULL || pmod->coeff == NULL ||
  888     pmod->sderr == NULL || pmod->yhat == NULL ||
  889     pmod->uhat == NULL) {
  890     free(xpy);
  891     pmod->errcode = E_ALLOC;
  892     return pmod->errcode;
  893     }
  894 
  895     for (i=0; i<k; i++) {
  896     xpy[i] = 0.0;
  897     }
  898     for (i=0; i<nxpx; i++) {
  899     pmod->xpx[i] = 0.0;
  900     }
  901 
  902     rho = gretl_model_get_double_default(pmod, "rho_gls", 0.0);
  903 
  904     /* calculate regression results, Cholesky style */
  905     pmod->errcode = XTX_XTy(pmod->list, pmod->t1, pmod->t2, dset,
  906                 pmod->nwt, rho, pwe, pmod->xpx, xpy,
  907                 &ysum, &ypy, pmod->missmask);
  908 
  909 #if XPX_DEBUG
  910     for (i=0; i<k; i++) {
  911     fprintf(stderr, "xpy[%d] = %g\n", i, xpy[i]);
  912     }
  913     for (i=0; i<nxpx; i++) {
  914     fprintf(stderr, "xpx[%d] = %g\n", i, pmod->xpx[i]);
  915     }
  916     fputc('\n', stderr);
  917 #endif
  918 
  919     if (!pmod->errcode) {
  920     regress(pmod, xpy, ysum, ypy, dset, rho, opt);
  921     }
  922 
  923     free(xpy);
  924 
  925     return pmod->errcode;
  926 }
  927 
  928 static int gretl_null_regress (MODEL *pmod, const DATASET *dset)
  929 {
  930     double yt;
  931     int t;
  932 
  933     if (pmod->yhat == NULL) {
  934     pmod->yhat = malloc(pmod->full_n * sizeof *pmod->yhat);
  935     }
  936 
  937     if (pmod->uhat == NULL) {
  938     pmod->uhat = malloc(pmod->full_n * sizeof *pmod->uhat);
  939     }
  940 
  941     if (pmod->yhat == NULL || pmod->uhat == NULL) {
  942     pmod->errcode = E_ALLOC;
  943     return pmod->errcode;
  944     }
  945 
  946     pmod->nobs = 0;
  947     pmod->ifc = 0;
  948     pmod->ess = 0.0;
  949     pmod->rsq = pmod->adjrsq = 0.0;
  950 
  951     for (t=0; t<pmod->full_n; t++) {
  952     yt = dset->Z[pmod->list[1]][t];
  953     if (t < pmod->t1 || t > pmod->t2 || na(yt)) {
  954         pmod->uhat[t] = pmod->yhat[t] = NADBL;
  955     } else {
  956         pmod->uhat[t] = yt;
  957         pmod->yhat[t] = 0.0;
  958         pmod->ess += yt * yt;
  959         pmod->nobs += 1;
  960     }
  961     }
  962 
  963     if (pmod->ess == 0) {
  964     pmod->sigma = 0.0;
  965     } else if (pmod->nobs > 1) {
  966     pmod->sigma = sqrt(pmod->ess / (pmod->nobs - 1));
  967     } else {
  968     pmod->errcode = E_DATA;
  969     }
  970 
  971     if (pmod->errcode == 0) {
  972     ls_criteria(pmod);
  973     }
  974 
  975     return pmod->errcode;
  976 }
  977 
  978 /* check whether the variable with ID @yno is all zeros;
  979    return 1 if so, otherwise return 0 */
  980 
  981 static int depvar_zero (const MODEL *pmod, int yno, const double **Z)
  982 {
  983     double yt;
  984     int t, ret = 1;
  985 
  986     for (t=pmod->t1; t<=pmod->t2; t++) {
  987     if (model_missing(pmod, t)) {
  988         continue;
  989     }
  990     yt = Z[yno][t];
  991     if (pmod->nwt) {
  992         yt *= Z[pmod->nwt][t];
  993     }
  994     if (yt != 0.0) {
  995         ret = 0;
  996         break;
  997     }
  998     }
  999 
 1000     return ret;
 1001 }
 1002 
 1003 static int cholesky_regress (MODEL *pmod, const DATASET *dset,
 1004                  gretlopt opt)
 1005 {
 1006     int T = pmod->t2 - pmod->t1 + 1;
 1007     int k = pmod->list[0] - 1;
 1008 
 1009     if (k >= 50 || (T >= 250 && k >= 30)) {
 1010     return lapack_cholesky_regress(pmod, dset, opt);
 1011     } else {
 1012     return native_cholesky_regress(pmod, dset, opt);
 1013     }
 1014 }
 1015 
 1016 /* limited freeing of elements before passing a model
 1017    on for QR estimation in the case of (near-)singularity
 1018 */
 1019 
 1020 static void model_free_storage (MODEL *pmod)
 1021 {
 1022     free(pmod->xpx);
 1023     free(pmod->coeff);
 1024     free(pmod->sderr);
 1025 
 1026     pmod->xpx = NULL;
 1027     pmod->coeff = NULL;
 1028     pmod->sderr = NULL;
 1029 }
 1030 
 1031 /*
 1032  * ar1_lsq:
 1033  * @list: model specification.
 1034  * @dset: dataset struct.
 1035  * @ci: command index, e.g. OLS, AR1.
 1036  * @opt: option flags (see lsq and ar1_model).
 1037  * @rho: coefficient for quasi-differencing the data, or 0.
 1038  *
 1039  * Nests lsq() and ar1_model(); is also used internally with ci == OLS
 1040  * but rho != 0.0 when estimating rho via, e.g., Cochrane-Orcutt
 1041  * iteration.
 1042  *
 1043  * Returns: model struct containing the estimates.
 1044  */
 1045 
 1046 static MODEL ar1_lsq (const int *list, DATASET *dset,
 1047               GretlCmdIndex ci, gretlopt opt,
 1048               double rho)
 1049 {
 1050     MODEL mdl;
 1051     int effobs = 0;
 1052     int missv = 0, misst = 0;
 1053     int pwe = (opt & OPT_P);
 1054     int nullmod = 0, ldv = 0;
 1055     int save_hc = -1;
 1056     int yno, i;
 1057 
 1058     gretl_model_init(&mdl, dset);
 1059 
 1060     if (list == NULL || dset == NULL || dset->Z == NULL) {
 1061     fprintf(stderr, "E_DATA: lsq: list = %p, dset = %p\n",
 1062         (void *) list, (void *) dset);
 1063     mdl.errcode = E_DATA;
 1064         return mdl;
 1065     }
 1066 
 1067     if (opt & OPT_C) {
 1068     /* cluster option implies robust */
 1069     opt |= OPT_R;
 1070     }
 1071 
 1072     if (ci == AR1) {
 1073     if (opt & OPT_P) {
 1074         /* OPT_P: Prais-Winsten */
 1075         mdl.opt |= OPT_P;
 1076     } else if (opt & OPT_H) {
 1077         /* OPT_H: Hildreth-Lu */
 1078         mdl.opt |= OPT_H;
 1079     }
 1080     } else if (rho != 0.0 && (opt & OPT_P)) {
 1081     /* estimation of rho in progress */
 1082     mdl.opt |= OPT_P;
 1083     }
 1084 
 1085     if (list[0] == 1 && ci == OLS && (opt & OPT_U)) {
 1086     /* null model OK */
 1087     nullmod = 1;
 1088     } else if (list[0] == 1 || dset->v == 1) {
 1089     fprintf(stderr, "E_DATA: lsq: list[0] = %d, dset->v = %d\n",
 1090         list[0], dset->v);
 1091     mdl.errcode = E_DATA;
 1092         return mdl;
 1093     }
 1094 
 1095     /* preserve a copy of the list supplied, for future reference */
 1096     mdl.list = gretl_list_copy(list);
 1097     if (mdl.list == NULL) {
 1098         mdl.errcode = E_ALLOC;
 1099         return mdl;
 1100     }
 1101 
 1102     mdl.t1 = dset->t1;
 1103     mdl.t2 = dset->t2;
 1104     mdl.full_n = dset->n;
 1105     mdl.ci = ci;
 1106 
 1107     /* Doing weighted least squares? */
 1108     if (ci == WLS) {
 1109     check_weight_var(&mdl, dset->Z[mdl.list[1]], &effobs);
 1110     if (mdl.errcode) {
 1111         return mdl;
 1112     }
 1113     mdl.nwt = mdl.list[1];
 1114     } else {
 1115     mdl.nwt = 0;
 1116     }
 1117 
 1118     /* sanity check */
 1119     if (mdl.t1 < 0 || mdl.t2 > dset->n - 1) {
 1120         mdl.errcode = E_DATA;
 1121         goto lsq_abort;
 1122     }
 1123 
 1124     /* adjust sample range and check for missing obs: this
 1125        may set the model errcode */
 1126     missv = lsq_check_for_missing_obs(&mdl, opt, dset, &misst);
 1127     if (mdl.errcode) {
 1128         goto lsq_abort;
 1129     }
 1130 
 1131     /* react to presence of unhandled missing obs */
 1132     if (missv) {
 1133     gretl_errmsg_sprintf(_("Missing value encountered for "
 1134                    "variable %d, obs %d"),
 1135                  missv, misst);
 1136     mdl.errcode = E_DATA;
 1137     return mdl;
 1138     }
 1139 
 1140     if (ci == WLS) {
 1141     dropwt(mdl.list);
 1142     }
 1143 
 1144     yno = mdl.list[1];
 1145 
 1146     /* check for unknown vars in list */
 1147     for (i=1; i<=mdl.list[0]; i++) {
 1148         if (mdl.list[i] > dset->v - 1) {
 1149             mdl.errcode = E_UNKVAR;
 1150             goto lsq_abort;
 1151         }
 1152     }
 1153 
 1154     /* check for zero dependent var */
 1155     if (depvar_zero(&mdl, yno, (const double **) dset->Z)) {
 1156         mdl.errcode = E_ZERO;
 1157         goto lsq_abort;
 1158     }
 1159 
 1160     /* drop any vars that are all zero and repack the list */
 1161     omitzero(&mdl, dset, opt);
 1162 
 1163     /* if regressor list contains a constant, record this fact and
 1164        place it first among the regressors */
 1165     mdl.ifc = reglist_check_for_const(mdl.list, dset);
 1166 
 1167     /* Check for presence of lagged dependent variable?
 1168        (Don't bother if this is an auxiliary regression.) */
 1169     if (!(opt & OPT_A)) {
 1170     ldv = lagged_depvar_check(&mdl, dset);
 1171     }
 1172 
 1173     /* AR1: advance the starting observation by one? */
 1174     if (rho != 0.0 && !pwe) {
 1175     mdl.t1 += 1;
 1176     }
 1177 
 1178     mdl.ncoeff = mdl.list[0] - 1;
 1179     if (effobs) {
 1180     mdl.nobs = effobs; /* FIXME? */
 1181     } else {
 1182     mdl.nobs = mdl.t2 - mdl.t1 + 1;
 1183     if (mdl.missmask != NULL) {
 1184         mdl.nobs -= model_missval_count(&mdl);
 1185     }
 1186     }
 1187 
 1188     /* check degrees of freedom */
 1189     if (get_model_df(&mdl)) {
 1190         goto lsq_abort;
 1191     }
 1192 
 1193     /* if df correction is not wanted, record this fact */
 1194     if (opt & OPT_N) {
 1195     mdl.opt |= OPT_N;
 1196     }
 1197 
 1198     if (dataset_is_time_series(dset)) {
 1199     opt |= OPT_T;
 1200     }
 1201 
 1202     /* remove Durbin-Watson p-value flag if not appropriate */
 1203     if (ldv || !(opt & OPT_T)) {
 1204     opt &= ~OPT_I;
 1205     }
 1206 
 1207     if (opt & OPT_J) {
 1208     /* --jackknife */
 1209     save_hc = libset_get_int(HC_VERSION);
 1210     libset_set_int(HC_VERSION, 4);
 1211     opt |= OPT_R;
 1212     mdl.opt |= (OPT_J | OPT_R);
 1213     }
 1214 
 1215     if (rho != 0.0) {
 1216     gretl_model_set_double(&mdl, "rho_gls", rho);
 1217     }
 1218 
 1219     if (nullmod) {
 1220     gretl_null_regress(&mdl, dset);
 1221     } else if (libset_get_bool(USE_QR)) {
 1222     gretl_qr_regress(&mdl, dset, opt);
 1223     } else if (opt & (OPT_R | OPT_I)) {
 1224     gretl_qr_regress(&mdl, dset, opt);
 1225     } else {
 1226     cholesky_regress(&mdl, dset, opt);
 1227     if (mdl.errcode == E_SINGULAR) {
 1228         /* near-perfect collinearity is better handled by QR */
 1229         model_free_storage(&mdl);
 1230         gretl_qr_regress(&mdl, dset, opt);
 1231     }
 1232     }
 1233 
 1234     if (save_hc >= 0) {
 1235     libset_set_int(HC_VERSION, save_hc);
 1236     }
 1237 
 1238     if (mdl.errcode) {
 1239     goto lsq_abort;
 1240     }
 1241 
 1242     /* get the mean and sd of dep. var. and make available */
 1243     model_depvar_stats(&mdl, dset);
 1244 
 1245     /* Doing an autoregressive procedure? */
 1246     if (ci == AR1) {
 1247     if (compute_ar_stats(&mdl, dset, rho)) {
 1248         goto lsq_abort;
 1249     }
 1250     if (ldv) {
 1251         ldepvar_std_errors(&mdl, dset);
 1252         if (mdl.errcode) {
 1253         goto lsq_abort;
 1254         }
 1255     }
 1256     if ((opt & OPT_H) && (opt & OPT_B)) {
 1257         gretl_model_set_int(&mdl, "no-corc", 1);
 1258     }
 1259     }
 1260 
 1261     /* weighted least squares: fix yhat and uhat; add calculation of
 1262        ESS and sigma based on unweighted data
 1263     */
 1264     if (ci == WLS) {
 1265     get_wls_stats(&mdl, dset, opt);
 1266     fix_wls_values(&mdl, dset);
 1267     }
 1268 
 1269     if (mdl.missmask == NULL && (opt & OPT_T) && !(opt & OPT_I)) {
 1270     mdl.rho = rhohat(1, mdl.t1, mdl.t2, mdl.uhat);
 1271     mdl.dw = dwstat(1, &mdl, dset);
 1272     } else if (!(opt & OPT_I)) {
 1273     mdl.rho = mdl.dw = NADBL;
 1274     }
 1275 
 1276     /* special case: degenerate model */
 1277     if (mdl.ncoeff == 1 && mdl.ifc) {
 1278     mdl.rsq = mdl.adjrsq = 0.0;
 1279     mdl.fstt = NADBL;
 1280     }
 1281 
 1282     /* Generate model selection statistics */
 1283     if (ci == AR1) {
 1284     mdl.lnL = NADBL;
 1285     mle_criteria(&mdl, 0);
 1286     } else {
 1287     ls_criteria(&mdl);
 1288     }
 1289     if (!(opt & OPT_A) && !na(mdl.lnL)) {
 1290     log_depvar_ll(&mdl, dset);
 1291     }
 1292 
 1293  lsq_abort:
 1294 
 1295     if (!(opt & OPT_A)) {
 1296     /* if it's not an auxiliary regression, set an ID number
 1297        on the model */
 1298     set_model_id(&mdl, opt);
 1299     }
 1300 
 1301     return mdl;
 1302 }
 1303 
 1304 /**
 1305  * lsq:
 1306  * @list: dependent variable plus list of regressors.
 1307  * @dset: dataset struct.
 1308  * @ci: one of the command indices in #LSQ_MODEL.
 1309  * @opt: option flags: zero or more of the following --
 1310  *   OPT_R: compute robust standard errors;
 1311  *   OPT_A: treat as auxiliary regression (don't bother checking
 1312  *     for presence of lagged dependent var, don't augment model count);
 1313  *   OPT_N: don't use degrees of freedom correction for standard
 1314  *      error of regression;
 1315  *   OPT_M: reject missing observations within sample range;
 1316  *   OPT_Z: (internal use) suppress the automatic elimination of
 1317  *      perfectly collinear variables.
 1318  *   OPT_X: compute "variance matrix" as just (X'X)^{-1}
 1319  *   OPT_I: compute Durbin-Watson p-value.
 1320  *   OPT_U: treat null model as OK.
 1321  *   OPT_P: (ar1 only): use Prais-Winsten.
 1322  *
 1323  * Computes least squares estimates of the model specified by @list,
 1324  * using an estimator determined by the value of @ci.
 1325  *
 1326  * Returns: a #MODEL struct, containing the estimates.
 1327  */
 1328 
 1329 MODEL lsq (const int *list, DATASET *dset, GretlCmdIndex ci,
 1330        gretlopt opt)
 1331 {
 1332     return ar1_lsq(list, dset, ci, opt, 0.0);
 1333 }
 1334 
 1335 /**
 1336  * ar1_model:
 1337  * @list: model specification.
 1338  * @dset: dataset struct.
 1339  * @opt: option flags: may include OPT_H to use Hildreth-Lu,
 1340  *       OPT_P to use Prais-Winsten, OPT_B to suppress Cochrane-Orcutt
 1341  *       fine-tuning of Hildreth-Lu results, OPT_G to generate
 1342  *       a gnuplot graph of the search in Hildreth-Lu case.
 1343  * @prn: gretl printer.
 1344  *
 1345  * Returns: model struct containing the estimates.
 1346  */
 1347 
 1348 MODEL ar1_model (const int *list, DATASET *dset,
 1349          gretlopt opt, PRN *prn)
 1350 {
 1351     MODEL mdl;
 1352     double rho;
 1353     int err = 0;
 1354 
 1355     rho = estimate_rho(list, dset, opt, prn, &err);
 1356 
 1357     if (err) {
 1358     gretl_model_init(&mdl, NULL);
 1359     mdl.errcode = err;
 1360     return mdl;
 1361     } else {
 1362     return ar1_lsq(list, dset, AR1, opt, rho);
 1363     }
 1364 }
 1365 
 1366 static int make_ess (MODEL *pmod, const DATASET *dset)
 1367 {
 1368     int i, t, yno = pmod->list[1], l0 = pmod->list[0];
 1369     int nwt = pmod->nwt;
 1370     double yhat, ut;
 1371 
 1372     pmod->ess = 0.0;
 1373 
 1374     for (t=pmod->t1; t<=pmod->t2; t++) {
 1375     if (nwt && dset->Z[nwt][t] == 0.0) {
 1376         continue;
 1377     }
 1378     if (model_missing(pmod, t)) {
 1379         continue;
 1380     }
 1381     yhat = 0.0;
 1382     for (i=2; i<=l0; i++) {
 1383         yhat += pmod->coeff[i-2] * dset->Z[pmod->list[i]][t];
 1384     }
 1385     ut = dset->Z[yno][t] - yhat;
 1386     if (nwt) {
 1387         ut *= sqrt(dset->Z[nwt][t]);
 1388     }
 1389     pmod->ess += ut * ut;
 1390     }
 1391 
 1392     return 0;
 1393 }
 1394 
 1395 /* The heuristic used here is that the model effectively
 1396    contains a constant or intercept if the means of y and
 1397    yhat are the same, or in other words the mean residual
 1398    is zero -- but allowing for some numerical slop.
 1399 */
 1400 
 1401 int check_for_effective_const (MODEL *pmod, const DATASET *dset)
 1402 {
 1403     double ubar = 0.0, ybar = 0.0;
 1404     const double *y = dset->Z[pmod->list[1]];
 1405     int t, ret = 0;
 1406 
 1407     for (t=pmod->t1; t<=pmod->t2; t++) {
 1408     if (!na(pmod->uhat[t])) {
 1409         ubar += pmod->uhat[t];
 1410         ybar += y[t];
 1411     }
 1412     }
 1413 
 1414     ubar = fabs(ubar / pmod->nobs);
 1415     ybar = fabs(ybar / pmod->nobs);
 1416 
 1417     /* the calibration below is debatable: watch out for
 1418        "wrong" results */
 1419 
 1420     if (ubar < 1.0e-7) {
 1421     /* absolute value of mean residual small enough? */
 1422     ret = 1;
 1423     } else if (ubar < 0.01 && ybar > 1.0e-20) {
 1424     /* try scaled variant? */
 1425     ret = ubar / ybar < 2.0e-15;
 1426     }
 1427 
 1428 #if 0
 1429     fprintf(stderr, "check_for_effective_const:\n"
 1430         "ubar = %g, ybar = %g, ret = %d\n",
 1431         ubar, ybar, ret);
 1432 #endif
 1433 
 1434     if (ret) {
 1435     gretl_model_set_int(pmod, "effconst", 1);
 1436     pmod->dfn -= 1;
 1437     } else if (gretl_model_get_int(pmod, "effconst")) {
 1438     gretl_model_set_int(pmod, "effconst", 0);
 1439     pmod->dfn += 1;
 1440     }
 1441 
 1442     return ret;
 1443 }
 1444 
 1445 static void uncentered_r_squared (MODEL *pmod, const DATASET *dset)
 1446 {
 1447     double y0 = dset->Z[pmod->list[1]][pmod->t1];
 1448 
 1449     /* special computation for the case of TSS = 0, i.e.
 1450        the dependent variable is a constant */
 1451 
 1452     if (y0 != 0) {
 1453     double den = pmod->nobs * y0 * y0;
 1454 
 1455     pmod->rsq = 1 - (pmod->ess / den);
 1456     gretl_model_set_int(pmod, "uncentered", 1);
 1457     }
 1458 }
 1459 
 1460 static void compute_r_squared (MODEL *pmod, const DATASET *dset,
 1461                    int *ifc)
 1462 {
 1463     int yno = pmod->list[1];
 1464 
 1465     pmod->rsq = 1.0 - (pmod->ess / pmod->tss);
 1466 
 1467     if (*ifc == 0) {
 1468     *ifc = check_for_effective_const(pmod, dset);
 1469     }
 1470 
 1471     if (pmod->dfd > 0) {
 1472     double yt, den = 0.0;
 1473 
 1474     if (*ifc) {
 1475         den = pmod->tss * pmod->dfd;
 1476         pmod->adjrsq = 1 - (pmod->ess * (pmod->nobs - 1) / den);
 1477     } else {
 1478         /* model does not contain a constant */
 1479         int t;
 1480 
 1481         for (t=pmod->t1; t<=pmod->t2; t++) {
 1482         if (!na(pmod->yhat[t])) {
 1483             yt = dset->Z[yno][t];
 1484             den += yt * yt;
 1485         }
 1486         }
 1487 
 1488         /* make the centered R^2 available for output */
 1489         gretl_model_set_double(pmod, "centered-R2", pmod->rsq);
 1490 
 1491         /* but flag the fact that the reported R-squared is
 1492            in fact uncentered */
 1493         gretl_model_set_int(pmod, "uncentered", 1);
 1494 
 1495         /* and make the "official" figure the uncentered R^2,
 1496            as per NIST, R, Stata, SPSS, ... */
 1497         pmod->rsq = 1 - pmod->ess / den;
 1498         pmod->adjrsq =
 1499         1.0 - ((1.0 - pmod->rsq) * (pmod->nobs - 1.0) / pmod->dfd);
 1500     }
 1501     }
 1502 
 1503     if (pmod->rsq < 0.0) {
 1504     pmod->rsq = 0.0;
 1505     }
 1506 }
 1507 
 1508 /*
 1509   diaginv: solves for the diagonal elements of X'X inverse.
 1510 
 1511   @xpx = Cholesky-decomposed X'X
 1512   @tmp = work array, length >= nv
 1513   @diag = diagonal elements of {X'X}^{-1} (output)
 1514   @nv = number of regression coefficients = length of diag
 1515 */
 1516 
 1517 static void diaginv (const double *xpx, double *tmp, double *diag,
 1518              int nv)
 1519 {
 1520     int kk, l, m, k, i, j;
 1521     double d, e;
 1522 
 1523     kk = 0;
 1524 
 1525     for (l=0; l<nv-1; l++) {
 1526         d = xpx[kk];
 1527         tmp[l] = d;
 1528         e = d * d;
 1529         m = 0;
 1530         if (l > 0) {
 1531         for (j=1; j<=l; j++) {
 1532         m += nv - j;
 1533         }
 1534     }
 1535         for (i=l+1; i<nv; i++) {
 1536             d = 0.0;
 1537             k = i + m;
 1538             for (j=l; j<i; j++) {
 1539                 d += tmp[j] * xpx[k];
 1540                 k += nv - (j+1);
 1541             }
 1542             d = -d * xpx[k];
 1543             tmp[i] = d;
 1544             e += d * d;
 1545         }
 1546     kk += nv - l;
 1547         diag[l] = e;
 1548     }
 1549 
 1550     diag[nv-1] = xpx[kk] * xpx[kk];
 1551 }
 1552 
 1553 /*
 1554   cholbeta: in-place Cholesky decomposition of X'X (pmod->xpx) (lower
 1555   triangular matrix stacked in columns) plus solution for the
 1556   least-squares coefficient estimates.
 1557 
 1558   pmod->xpx = X'X on input and Cholesky decomposition on output
 1559   xpy = the X'y vector on input and Cholesky-transformed t
 1560         vector on output
 1561   rss = location to receive regression sum of squares
 1562 
 1563   The number of floating-point operations is basically 3.5 * nv^2
 1564   plus (nv^3) / 3.
 1565 */
 1566 
 1567 static int cholbeta (MODEL *pmod, double *xpy, double *rss)
 1568 {
 1569     int i, j, k, kk, l, jm1;
 1570     double e, d, d1, d2, test, xx;
 1571     double *xpx = pmod->xpx;
 1572     double *b = pmod->coeff;
 1573     int nc = pmod->ncoeff;
 1574 
 1575     if (xpx[0] <= 0.0) {
 1576     fprintf(stderr, "%s %d: xpx <= 0.0\n", __FILE__, __LINE__);
 1577     return E_NAN;
 1578     }
 1579 
 1580     e = 1.0 / sqrt(xpx[0]);
 1581     xpx[0] = e;
 1582     xpy[0] *= e;
 1583     for (i=1; i<nc; i++) {
 1584     xpx[i] *= e;
 1585     }
 1586 
 1587     kk = nc;
 1588 
 1589     for (j=1; j<nc; j++) {
 1590     /* diagonal elements */
 1591         d = d1 = 0.0;
 1592         k = jm1 = j;
 1593 
 1594         for (l=1; l<=jm1; l++) {
 1595             xx = xpx[k];
 1596             d1 += xx * xpy[l-1];
 1597             d += xx * xx;
 1598             k += nc - l;
 1599         }
 1600 
 1601         d2 = xpx[kk] - d;
 1602     test = d2 / xpx[kk];
 1603 
 1604     /* check for singularity */
 1605         if (test < TINY) {
 1606 #if 0
 1607         fprintf(stderr, "cholbeta: test[%d] = %g\n", j, test);
 1608 #endif
 1609         *rss = -1.0;
 1610         return E_SINGULAR;
 1611         } else if (test < SMALL) {
 1612         gretl_model_set_int(pmod, "near-singular", 1);
 1613     }
 1614 
 1615         e = 1 / sqrt(d2);
 1616         xpx[kk] = e;
 1617         xpy[j] = (xpy[j] - d1) * e;
 1618 
 1619     /* off-diagonal elements */
 1620         for (i=j+1; i<nc; i++) {
 1621             kk++;
 1622             d = 0.0;
 1623             k = j;
 1624             for (l=1; l<=jm1; l++) {
 1625                 d += xpx[k] * xpx[k-j+i];
 1626                 k += nc - l;
 1627             }
 1628             xpx[kk] = (xpx[kk] - d) * e;
 1629         }
 1630         kk++;
 1631     }
 1632 
 1633     kk--;
 1634 
 1635     /* calculate regression sum of squares */
 1636     d = 0.0;
 1637     for (j=0; j<nc; j++) {
 1638     d += xpy[j] * xpy[j];
 1639     }
 1640     *rss = d;
 1641 
 1642     /* back-solve for the coefficients */
 1643 
 1644     b[nc-1] = xpy[nc-1] * xpx[kk];
 1645 
 1646     for (j=nc-2; j>=0; j--) {
 1647     d = xpy[j];
 1648     for (i=nc-1; i>j; i--) {
 1649         d -= b[i] * xpx[--kk];
 1650     }
 1651     b[j] = d * xpx[--kk];
 1652     }
 1653 
 1654     for (j=0; j<nc; j++) {
 1655     if (isnan(b[j])) {
 1656         fprintf(stderr, "%s %d: coeff %d is NaN\n", __FILE__, __LINE__, j);
 1657         return E_NAN;
 1658     }
 1659     }
 1660 
 1661     return 0;
 1662 }
 1663 
 1664 /* compute fitted values and residuals */
 1665 
 1666 static int hatvars (MODEL *pmod, const DATASET *dset)
 1667 {
 1668     int yno = pmod->list[1];
 1669     int xno, i, t;
 1670     double x;
 1671 
 1672     for (t=pmod->t1; t<=pmod->t2; t++) {
 1673     if (model_missing(pmod, t)) {
 1674         continue;
 1675     }
 1676     pmod->yhat[t] = 0.0;
 1677         for (i=0; i<pmod->ncoeff; i++) {
 1678             xno = pmod->list[i+2];
 1679         x = dset->Z[xno][t];
 1680         if (pmod->nwt) {
 1681         x *= sqrt(dset->Z[pmod->nwt][t]);
 1682         }
 1683             pmod->yhat[t] += pmod->coeff[i] * x;
 1684         }
 1685     x = dset->Z[yno][t];
 1686     if (pmod->nwt) {
 1687         x *= sqrt(dset->Z[pmod->nwt][t]);
 1688     }
 1689         pmod->uhat[t] = x - pmod->yhat[t];
 1690     }
 1691 
 1692     return 0;
 1693 }
 1694 
 1695 /*
 1696   regress: takes X'X (pmod->xpx) and X'y (@xpy) and
 1697   computes OLS estimates and associated statistics.
 1698 
 1699   n = total number of observations per series in data set
 1700   pmod->ifc = 1 if constant is present in model, else = 0
 1701 
 1702   ess = error sum of squares
 1703   sigma = standard error of regression
 1704   fstt = F-statistic
 1705   pmod->coeff = array of regression coefficients
 1706   pmod->sderr = corresponding array of standard errors
 1707 */
 1708 
 1709 static void regress (MODEL *pmod, double *xpy,
 1710              double ysum, double ypy,
 1711              const DATASET *dset,
 1712              double rho, gretlopt opt)
 1713 {
 1714     int ifc = pmod->ifc;
 1715     double zz, rss = 0.0;
 1716     double s2 = 0.0;
 1717     double *diag = NULL;
 1718     int i, err = 0;
 1719 
 1720     for (i=0; i<pmod->full_n; i++) {
 1721     pmod->yhat[i] = pmod->uhat[i] = NADBL;
 1722     }
 1723 
 1724     zz = ysum * ysum / pmod->nobs;
 1725     pmod->tss = ypy - zz;
 1726 
 1727     /* Cholesky-decompose X'X and find the coefficients */
 1728     err = cholbeta(pmod, xpy, &rss);
 1729     if (err) {
 1730         pmod->errcode = err;
 1731         return;
 1732     }
 1733 
 1734     if (rho != 0.0) {
 1735     pmod->ess = ypy - rss;
 1736     } else {
 1737     make_ess(pmod, dset);
 1738     rss = ypy - pmod->ess;
 1739     }
 1740 
 1741     if (fabs(pmod->ess) < ESSZERO) {
 1742     pmod->ess = 0.0;
 1743     } else if (pmod->ess < 0.0) {
 1744     gretl_errmsg_sprintf(_("Error sum of squares (%g) is not > 0"),
 1745                  pmod->ess);
 1746         return;
 1747     }
 1748 
 1749     if (pmod->dfd == 0) {
 1750     pmod->sigma = 0.0;
 1751     pmod->adjrsq = NADBL;
 1752     } else {
 1753     if (pmod->opt & OPT_N) {
 1754         /* no-df-corr */
 1755         s2 = pmod->ess / pmod->nobs;
 1756     } else {
 1757         s2 = pmod->ess / pmod->dfd;
 1758     }
 1759     pmod->sigma = sqrt(s2);
 1760     }
 1761 
 1762     if (pmod->tss < DBL_EPSILON) {
 1763     pmod->rsq = pmod->adjrsq = NADBL;
 1764     }
 1765 
 1766     hatvars(pmod, dset);
 1767 
 1768     if (pmod->errcode) return;
 1769 
 1770     if (pmod->tss > 0.0) {
 1771     compute_r_squared(pmod, dset, &ifc);
 1772     } else if (pmod->tss == 0.0 && !(opt & OPT_A)) {
 1773     uncentered_r_squared(pmod, dset);
 1774     }
 1775 
 1776     if (s2 <= 0.0 || pmod->dfd == 0 || pmod->dfn == 0) {
 1777     pmod->fstt = NADBL;
 1778     } else if (pmod->rsq == 1.0) {
 1779     pmod->fstt = NADBL;
 1780     } else if (pmod->opt & OPT_N) {
 1781     /* for consistency with no-df-corr */
 1782     pmod->fstt = NADBL;
 1783     pmod->chisq = (rss - zz * ifc) / s2;
 1784     } else {
 1785     pmod->fstt = (rss - zz * ifc) / (s2 * pmod->dfn);
 1786     if (pmod->fstt < 0.0) {
 1787         pmod->fstt = 0.0;
 1788     }
 1789     }
 1790 
 1791     diag = malloc(pmod->ncoeff * sizeof *diag);
 1792     if (diag == NULL) {
 1793     pmod->errcode = E_ALLOC;
 1794     return;
 1795     }
 1796 
 1797     /* Note: 'xpy' is used purely as workspace in diaginv() below, as
 1798        a matter of economy/convenience; no values in this array are
 1799        used before they are written.
 1800     */
 1801     diaginv(pmod->xpx, xpy, diag, pmod->ncoeff);
 1802 
 1803     for (i=0; i<pmod->ncoeff; i++) {
 1804     if (diag[i] >= 0.0) {
 1805         pmod->sderr[i] = pmod->sigma * sqrt(diag[i]);
 1806     } else {
 1807         pmod->sderr[i] = 0.0;
 1808     }
 1809     }
 1810 
 1811     free(diag);
 1812 }
 1813 
 1814 /**
 1815  * makevcv:
 1816  * @pmod: pointer to model.
 1817  * @sigma: square root of error variance, or 1.0 to
 1818  * produce just X'X^{-1}.
 1819  *
 1820  * Inverts the Cholesky-decomposed X'X and computes the
 1821  * coefficient covariance matrix.
 1822  *
 1823  * Returns: 0 on successful completion, non-zero code on error.
 1824  */
 1825 
 1826 int makevcv (MODEL *pmod, double sigma)
 1827 {
 1828     int dec, mst, kk, i, j, kj, icnt, m, k, l = 0;
 1829     int nv, nxpx;
 1830     double d;
 1831 
 1832     if (pmod->vcv != NULL) {
 1833     /* already done */
 1834     return 0;
 1835     }
 1836 
 1837     if (pmod->xpx == NULL) {
 1838     /* raw material not available */
 1839     return E_BADSTAT;
 1840     }
 1841 
 1842     nv = pmod->ncoeff;
 1843     nxpx = (nv * nv + nv) / 2;
 1844     mst = nxpx;
 1845     kk = nxpx - 1;
 1846 
 1847     pmod->vcv = malloc(nxpx * sizeof *pmod->vcv);
 1848     if (pmod->vcv == NULL) {
 1849     return E_ALLOC;
 1850     }
 1851 
 1852     for (i=0; i<nv; i++) {
 1853     mst -= i;
 1854     /* find diagonal element */
 1855     d = pmod->xpx[kk];
 1856     if (i > 0) {
 1857         for (j=kk+1; j<=kk+i; j++) {
 1858         d -= pmod->xpx[j] * pmod->vcv[j];
 1859         }
 1860     }
 1861     pmod->vcv[kk] = d * pmod->xpx[kk];
 1862     /* find off-diagonal elements indexed by kj */
 1863     kj = kk;
 1864     kk = kk - i - 2;
 1865     if (i > nv - 2) {
 1866         continue;
 1867     }
 1868     for (j=i+1; j<nv; j++) {
 1869         icnt = i+1;
 1870         kj -= j;
 1871         d = 0.0;
 1872         m = mst + 1;
 1873         for (k=0; k<=j-1; k++) {
 1874         if (icnt > 0) {
 1875             dec = 1;
 1876             icnt--;
 1877         } else {
 1878             dec = k;
 1879         }
 1880         m -= dec;
 1881         l = kj + i - k;
 1882         d += pmod->vcv[m-1] * pmod->xpx[l];
 1883         }
 1884         pmod->vcv[kj] = -d * pmod->xpx[l-1];
 1885     }
 1886     }
 1887 
 1888     if (pmod->ci == LOGIT || pmod->ci == PROBIT) {
 1889     sigma = 1.0;
 1890     }
 1891 
 1892     if (sigma != 1.0) {
 1893     double s2 = sigma * sigma;
 1894 
 1895     for (i=0; i<nxpx; i++) {
 1896         pmod->vcv[i] *= s2;
 1897     }
 1898     }
 1899 
 1900     return 0;
 1901 }
 1902 
 1903 /**
 1904  * dwstat:
 1905  * @order: order of autoregression (usually 1).
 1906  * @pmod: pointer to model.
 1907  * @dset: pointer to dataset.
 1908  *
 1909  * Computes the Durbin-Watson statistic for @pmod.
 1910  *
 1911  * Returns: the D-W value, or #NADBL on error.
 1912  */
 1913 
 1914 double dwstat (int order, MODEL *pmod, const DATASET *dset)
 1915 {
 1916     const double *w = NULL;
 1917     double ut, u1;
 1918     double num = 0.0;
 1919     double den = 0.0;
 1920     int t, t1;
 1921 
 1922     if (pmod->ess <= 0.0) {
 1923     return NADBL;
 1924     }
 1925 
 1926     t1 = pmod->t1 + order;
 1927 
 1928     if (pmod->nwt) {
 1929     w = dset->Z[pmod->nwt];
 1930     ut = pmod->uhat[t1 - 1];
 1931     if (!na(ut)) {
 1932         den += ut * ut;
 1933     }
 1934     } else {
 1935     den = pmod->ess;
 1936     }
 1937 
 1938     for (t=t1; t<=pmod->t2; t++)  {
 1939         ut = pmod->uhat[t];
 1940         u1 = pmod->uhat[t-1];
 1941         if (na(ut) || na(u1) ||
 1942         (w != NULL && (w[t] == 0.0 || w[t-1] == 0.0))) {
 1943         continue;
 1944     }
 1945         num += (ut - u1) * (ut - u1);
 1946     if (pmod->nwt) {
 1947         den += ut * ut;
 1948     }
 1949     }
 1950 
 1951     return num / den;
 1952 }
 1953 
 1954 /* altrho: alternative calculation of rho */
 1955 
 1956 static double altrho (int order, int t1, int t2, const double *uhat)
 1957 {
 1958     int t, n, len = t2 - (t1 + order) + 1;
 1959     double *ut, *u1;
 1960     double uht, uh1, rho;
 1961 
 1962     ut = malloc(2 * len * sizeof *ut);
 1963     if (ut == NULL) {
 1964     return NADBL;
 1965     }
 1966 
 1967     u1 = ut + len;
 1968     n = 0;
 1969 
 1970     for (t=t1+order; t<=t2; t++) {
 1971         uht = uhat[t];
 1972     uh1 = (t > 0)? uhat[t-1] : NADBL;
 1973         if (!na(uht) && !na(uh1)) {
 1974         ut[n] = uht;
 1975         u1[n] = uh1;
 1976         n++;
 1977     }
 1978     }
 1979 
 1980     rho = gretl_corr(0, n - 1, ut, u1, NULL);
 1981 
 1982     free(ut);
 1983 
 1984     return rho;
 1985 }
 1986 
 1987 /**
 1988  * rhohat:
 1989  * @order: order of autoregression, usually 1.
 1990  * @t1: start of sample range.
 1991  * @t2: end of sample range.
 1992  * @uhat: array of regression residuals.
 1993  *
 1994  * Computes the first order serial correlation coefficient
 1995  * for @uhat, over the range @t1 to @t2.
 1996  *
 1997  * Returns: the \hat{rho} value, or #NADBL on error.
 1998  */
 1999 
 2000 double rhohat (int order, int t1, int t2, const double *uhat)
 2001 {
 2002     double ut, u1, num = 0.0, den = 0.0;
 2003     double rho;
 2004     int t;
 2005 
 2006     for (t=t1+order; t<=t2; t++) {
 2007         ut = uhat[t];
 2008         u1 = uhat[t-1];
 2009         if (na(ut) || na(u1)) {
 2010         continue;
 2011     }
 2012         num += ut * u1;
 2013         den += u1 * u1;
 2014     }
 2015 
 2016     if (den < DBL_EPSILON) {
 2017     return NADBL;
 2018     }
 2019 
 2020     rho = num / den;
 2021 
 2022     if (rho > 1.0 || rho < -1.0) {
 2023     /* out of bounds, try again */
 2024     rho = altrho(order, t1, t2, uhat);
 2025     }
 2026 
 2027     return rho;
 2028 }
 2029 
 2030 static int hilu_plot (double *ssr, double *rho, int n)
 2031 {
 2032     FILE *fp;
 2033     int i, err = 0;
 2034 
 2035     fp = open_plot_input_file(PLOT_REGULAR, 0, &err);
 2036     if (err) {
 2037     return err;
 2038     }
 2039 
 2040     fputs("# hildreth-lu\n", fp);
 2041     fputs("set xlabel 'rho'\n", fp);
 2042 
 2043     fprintf(fp, "set ylabel '%s'\n", _("ESS"));
 2044 
 2045     fputs("set nokey\n", fp);
 2046     fputs("set xrange [-1.0:1.0]\n", fp);
 2047     fputs("plot '-' using 1:2 w impulses\n", fp);
 2048 
 2049     gretl_push_c_numeric_locale();
 2050 
 2051     for (i=0; i<n; i++) {
 2052     fprintf(fp, "%g %g\n", rho[i], ssr[i]);
 2053     }
 2054     fputs("e\n", fp);
 2055 
 2056     gretl_pop_c_numeric_locale();
 2057 
 2058     return finalize_plot_input_file(fp);
 2059 }
 2060 
 2061 /* calculation of rhohat for use with CORC/PWE */
 2062 
 2063 static double autores (MODEL *pmod, DATASET *dset, gretlopt opt)
 2064 {
 2065     double *uhat = pmod->uhat;
 2066     double ut, num = 0.0, den = 0.0;
 2067     int yno = pmod->list[1];
 2068     int t1 = pmod->t1;
 2069     int t, i, v;
 2070 
 2071     if (!(opt & OPT_P)) {
 2072     /* not using Prais-Winsten */
 2073     t1--;
 2074     }
 2075 
 2076     for (t=t1; t<=pmod->t2; t++) {
 2077     ut = dset->Z[yno][t];
 2078     for (i=0; i<pmod->ncoeff; i++) {
 2079         v = pmod->list[i+2];
 2080         ut -= pmod->coeff[i] * dset->Z[v][t];
 2081     }
 2082     uhat[t] = ut;
 2083     if (t > t1) {
 2084         num += uhat[t] * uhat[t-1];
 2085         den += uhat[t-1] * uhat[t-1];
 2086     }
 2087     }
 2088 
 2089     return num / den;
 2090 }
 2091 
 2092 static int rho_val_ends_row (int k)
 2093 {
 2094     int i, endval = -80; /* rho = -0.80 */
 2095 
 2096     for (i=0; i<7; i++) {
 2097     if (k == endval) {
 2098         return 1;
 2099     }
 2100     endval += 30;
 2101     }
 2102 
 2103     return 0;
 2104 }
 2105 
 2106 static void hilu_print_iter (double r, double ess, int iter,
 2107                  double *ssr, double *rh,
 2108                  int *ip, int asciiplot,
 2109                  PRN *prn)
 2110 {
 2111     char num[8];
 2112     int k;
 2113 
 2114     sprintf(num, "%.1f", 100 * r);
 2115     k = atoi(num);
 2116 
 2117     /* we'll not print all 199 rho values */
 2118 
 2119     if (k == -99 || k == 99 || k % 10 == 0) {
 2120     if (asciiplot) {
 2121         /* recording a subset of values */
 2122         ssr[*ip] = ess;
 2123         rh[*ip] = r;
 2124         *ip += 1;
 2125     }
 2126     pprintf(prn, " %5.2f %#12.6g", r, ess);
 2127     if (rho_val_ends_row(k)) {
 2128         pputc(prn, '\n');
 2129     } else {
 2130         bufspace(3, prn);
 2131     }
 2132     }
 2133 }
 2134 
 2135 static void hilu_header (PRN *prn)
 2136 {
 2137     int i;
 2138 
 2139     pputs(prn, "\n   ");
 2140 
 2141     for (i=0; i<3; i++) {
 2142     pputs(prn, "rho");
 2143     bufspace(10, prn);
 2144     pputs(prn, _("ESS"));
 2145     if (i < 2) {
 2146         pputs(prn, "      ");
 2147     }
 2148     }
 2149 
 2150     pputc(prn, '\n');
 2151 }
 2152 
 2153 static double hilu_search (const int *list, DATASET *dset,
 2154                MODEL *pmod, gretlopt opt,
 2155                PRN *prn)
 2156 {
 2157     double *rh = NULL, *ssr = NULL;
 2158     double essmin = 1.0e200;
 2159     double ess, r, hl_rho = 0;
 2160     int quiet = (opt & OPT_Q);
 2161     int niceplot = (opt & OPT_G);
 2162     int iter = 0, i = 0;
 2163     int nplot = 0;
 2164 
 2165     if (!quiet) {
 2166     /* allocate arrays for recording plot info */
 2167     nplot = niceplot ? 199 : 21;
 2168     ssr = malloc(2 * nplot * sizeof *ssr);
 2169     if (ssr == NULL) {
 2170         pmod->errcode = E_ALLOC;
 2171         return NADBL;
 2172     }
 2173     rh = ssr + nplot;
 2174     /* and print header */
 2175     hilu_header(prn);
 2176     }
 2177 
 2178     for (r = -0.99; r < 1.0; r += .01, iter++) {
 2179     clear_model(pmod);
 2180 
 2181     *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
 2182     if (pmod->errcode) {
 2183         free(ssr);
 2184         return NADBL;
 2185     }
 2186 
 2187     ess = pmod->ess;
 2188 
 2189     if (!quiet) {
 2190         hilu_print_iter(r, ess, iter, ssr, rh, &i,
 2191                 !niceplot, prn);
 2192         if (niceplot) {
 2193         /* record full data for plotting */
 2194         ssr[i] = ess;
 2195         rh[i++] = r;
 2196         }
 2197     }
 2198 
 2199     if (iter == 0 || ess < essmin) {
 2200         essmin = ess;
 2201         hl_rho = r;
 2202     }
 2203     } /* end of basic iteration */
 2204 
 2205     if (hl_rho > 0.989) {
 2206     /* try exploring this funny region? */
 2207     for (r = 0.99; r <= 0.999; r += .001) {
 2208         clear_model(pmod);
 2209         *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
 2210         if (pmod->errcode) {
 2211         free(ssr);
 2212         return NADBL;
 2213         }
 2214         ess = pmod->ess;
 2215         if (ess < essmin) {
 2216         essmin = ess;
 2217         hl_rho = r;
 2218         }
 2219     }
 2220     }
 2221 
 2222     if (hl_rho > 0.9989) {
 2223     /* this even funnier one? */
 2224     for (r = 0.9991; r <= 0.9999; r += .0001) {
 2225         clear_model(pmod);
 2226         *pmod = ar1_lsq(list, dset, OLS, OPT_A, r);
 2227         if (pmod->errcode) {
 2228         free(ssr);
 2229         return NADBL;
 2230         }
 2231         ess = pmod->ess;
 2232         if (ess < essmin) {
 2233         essmin = ess;
 2234         hl_rho = r;
 2235         }
 2236     }
 2237     }
 2238 
 2239     if (!quiet) {
 2240     pprintf(prn, _("\n\nESS is minimized for rho = %g\n\n"), hl_rho);
 2241     if (niceplot) {
 2242         hilu_plot(ssr, rh, nplot);
 2243     } else {
 2244         /* ascii plot */
 2245         graphyx(ssr, rh, nplot, "ESS", "RHO", prn);
 2246         pputs(prn, "\n");
 2247     }
 2248     }
 2249 
 2250     free(ssr);
 2251 
 2252     return hl_rho;
 2253 }
 2254 
 2255 static void corc_header (gretlopt opt, PRN *prn)
 2256 {
 2257     if (opt & OPT_H) {
 2258     pputs(prn, _("\nFine-tune rho using the CORC procedure...\n\n"));
 2259     } else {
 2260     pputs(prn, _("\nPerforming iterative calculation of rho...\n\n"));
 2261     }
 2262 
 2263     pputs(prn, _("                 ITER       RHO        ESS"));
 2264     pputc(prn, '\n');
 2265 }
 2266 
 2267 /* Unlike plain CORC, PWE needs the term sqrt(1 - rho^2), so
 2268    we really cannot proceed if |rho| >= 1.0, even just to see
 2269    where rho ends up, as we now do with CORC.
 2270 */
 2271 
 2272 static int pwe_error (double rho, int quiet, int iter, PRN *prn)
 2273 {
 2274     gretl_errmsg_sprintf(_("Prais-Winsten: invalid rho value %g "
 2275                "(explosive error)"), rho);
 2276     if (!quiet) {
 2277     pprintf(prn, "          %10d %12.5f", iter, rho);
 2278     pprintf(prn, "        NA\n");
 2279     }
 2280 
 2281     return E_NOCONV;
 2282 }
 2283 
 2284 /*
 2285  * estimate_rho:
 2286  * @list: dependent variable plus list of regressors.
 2287  * @dset: dataset struct.
 2288  * @opt: option flags: may include OPT_H to use Hildreth-Lu,
 2289  *       OPT_P to use Prais-Winsten, OPT_B to suppress Cochrane-Orcutt
 2290  *       fine-tuning of Hildreth-Lu results, OPT_G to generate
 2291  *       a gnuplot graph of the search in Hildreth-Lu case.
 2292  * @prn: gretl printing struct.
 2293  * @err: location to receive error code.
 2294  *
 2295  * Estimate the quasi-differencing coefficient for use with the
 2296  * Cochrane-Orcutt, Hildreth-Lu or Prais-Winsten procedures for
 2297  * handling first-order serial correlation. Print a trace of the
 2298  * search for rho.
 2299  *
 2300  * Returns: rho estimate on successful completion, #NADBL on error.
 2301  */
 2302 
 2303 static double estimate_rho (const int *list, DATASET *dset,
 2304                 gretlopt opt, PRN *prn, int *err)
 2305 {
 2306     int save_t1 = dset->t1;
 2307     int save_t2 = dset->t2;
 2308     int quiet = (opt & OPT_Q);
 2309     int pwe = (opt & OPT_P);
 2310     double rho = 0.0;
 2311     MODEL armod;
 2312 
 2313     *err = list_adjust_sample(list, &dset->t1, &dset->t2,
 2314                   dset, NULL);
 2315     if (*err) {
 2316     goto bailout;
 2317     }
 2318 
 2319     gretl_model_init(&armod, dset);
 2320 
 2321     if (opt & OPT_H) {
 2322     /* Do Hildreth-Lu first */
 2323     rho = hilu_search(list, dset, &armod, opt, prn);
 2324     } else {
 2325     /* Initialize Cochrane-Orcutt (or Prais-Winsten) */
 2326     armod = lsq(list, dset, OLS, OPT_A);
 2327     if (!armod.errcode && armod.dfd == 0) {
 2328         armod.errcode = E_DF;
 2329     }
 2330     if (!armod.errcode) {
 2331         rho = armod.rho;
 2332     }
 2333     }
 2334 
 2335     if (armod.errcode) {
 2336     *err = armod.errcode;
 2337     } else if (!(opt & OPT_H) || !(opt & OPT_B)) {
 2338     /* either not Hildreth-Lu, or Hildreth-Lu but CORC
 2339        fine-tuning is wanted (has not been suppressed
 2340        via OPT_B)
 2341     */
 2342     gretlopt lsqopt = pwe ? (OPT_A | OPT_P) : OPT_A;
 2343     double edsmall = 5.0e-8;
 2344     double ess0 = armod.ess;
 2345     double rho0 = rho;
 2346     double rdsmall = (opt & OPT_L)? 0.001 : 1.0e-6;
 2347     int iter = 0, itermax = 100;
 2348     int converged = 0;
 2349 
 2350     if (!quiet) {
 2351         corc_header(opt, prn);
 2352     }
 2353 
 2354     while (++iter <= itermax && !converged) {
 2355         clear_model(&armod);
 2356         armod = ar1_lsq(list, dset, OLS, lsqopt, rho);
 2357         if ((*err = armod.errcode)) {
 2358         break;
 2359         }
 2360 
 2361         if (!quiet) {
 2362         pprintf(prn, "          %10d %12.5f", iter, rho);
 2363         pprintf(prn, "   %#.6g\n", armod.ess);
 2364         }
 2365 
 2366         rho = autores(&armod, dset, opt);
 2367 
 2368         if (pwe && fabs(rho) >= 1.0) {
 2369         *err = pwe_error(rho, quiet, iter + 1, prn);
 2370         break;
 2371         }
 2372 
 2373         if (armod.ess == 0.0) {
 2374         converged = 1;
 2375         } else if ((ess0 - armod.ess) / ess0 < edsmall) {
 2376         converged = 1;
 2377         } else if (fabs(rho - rho0) < rdsmall) {
 2378         converged = 1;
 2379         }
 2380 
 2381         ess0 = armod.ess;
 2382         rho0 = rho;
 2383     } /* end Cochrane-Orcutt iteration */
 2384 
 2385     if (converged && (rho >= 1.0 || rho <= -1.0)) {
 2386         gretl_errmsg_sprintf(_("%s: rho = %g, error is unbounded"),
 2387                  "ar1", rho);
 2388         converged = 0;
 2389     }
 2390 
 2391     if (!converged && !*err) {
 2392         *err = E_NOCONV;
 2393     }
 2394 
 2395     if (!quiet) {
 2396         if (*err) {
 2397         pputc(prn, '\n');
 2398         } else {
 2399         pprintf(prn, "          %10d %12.5f", iter, rho);
 2400         pprintf(prn, "   %#.6g\n", armod.ess);
 2401         }
 2402     }
 2403     }
 2404 
 2405     clear_model(&armod);
 2406 
 2407  bailout:
 2408 
 2409     dset->t1 = save_t1;
 2410     dset->t2 = save_t2;
 2411 
 2412     if (*err) {
 2413     rho = NADBL;
 2414     }
 2415 
 2416     return rho;
 2417 }
 2418 
 2419 /**
 2420  * augment_regression_list:
 2421  * @orig: list giving original regression specification.
 2422  * @aux: either %AUX_SQ, %AUX_LOG or %AUX_WHITE.
 2423  * @dset: dataset struct.
 2424  * @err: location to receive error code.
 2425  *
 2426  * Augment the regression list @orig with auxiliary terms.  If @aux
 2427  * is %AUX_SQ add the squares of the original regressors; if @aux
 2428  * is %AUX_WHITE add squares and cross-products, or if @aux is
 2429  * %AUX_LOG add the natural logs of the original regressors.
 2430  * If they are not already present, these variables are added
 2431  * to the data array.
 2432  *
 2433  * Returns: the augmented list, or NULL on failure.
 2434  */
 2435 
 2436 int *augment_regression_list (const int *orig, int aux,
 2437                   DATASET *dset, int *err)
 2438 {
 2439     int *list;
 2440     int listlen;
 2441     int cnum = 0;
 2442     int i, k;
 2443 
 2444     if (aux == AUX_WHITE) {
 2445     int cpos = gretl_list_const_pos(orig, 2, dset);
 2446     int nt, trv = orig[0] - 1;
 2447 
 2448     if (cpos > 0) {
 2449         trv--;
 2450         cnum = orig[cpos];
 2451     }
 2452     nt = (trv * trv + trv) / 2;
 2453     listlen = orig[0] + nt + 1;
 2454     } else {
 2455     listlen = 2 * orig[0];
 2456     }
 2457 
 2458     list = malloc(listlen * sizeof *list);
 2459     if (list == NULL) {
 2460     *err = E_ALLOC;
 2461     return NULL;
 2462     }
 2463 
 2464     /* transcribe original list */
 2465     for (i=0; i<=orig[0]; i++) {
 2466     list[i] = orig[i];
 2467     }
 2468 
 2469     /* add squares, cross-products or logs of independent vars */
 2470 
 2471     k = list[0];
 2472     for (i=2; i<=orig[0]; i++) {
 2473     int vnew, vi = orig[i];
 2474 
 2475     if (vi == 0) {
 2476         continue;
 2477     }
 2478 
 2479     if (aux == AUX_SQ || aux == AUX_WHITE) {
 2480         vnew = xpxgenr(vi, vi, dset);
 2481         if (vnew > 0) {
 2482         list[++k] = vnew;
 2483         }
 2484         if (aux == AUX_WHITE) {
 2485         int j, vj;
 2486 
 2487         for (j=i+1; j<=orig[0]; j++) {
 2488             vj = orig[j];
 2489             if (vj == cnum) {
 2490             continue;
 2491             }
 2492             vnew = xpxgenr(vi, vj, dset);
 2493             if (vnew > 0) {
 2494             /* ensure uniqueness of generated varnames */
 2495             sprintf(dset->varname[vnew], "X%d_X%d", i-1, j-1);
 2496             list[++k] = vnew;
 2497             }
 2498         }
 2499         }
 2500     } else if (aux == AUX_LOG) {
 2501         /* don't try to log series with non-positive values */
 2502         if (gretl_ispositive(dset->t1, dset->t2, dset->Z[vi], 1)) {
 2503         vnew = loggenr(vi, dset);
 2504         if (vnew > 0) {
 2505             list[++k] = vnew;
 2506         }
 2507         }
 2508     }
 2509     }
 2510 
 2511     list[0] = k;
 2512 
 2513     return list;
 2514 }
 2515 
 2516 /* For observation s, see if the regression list contains a variable
 2517    that has a single non-zero value at that particular observation.
 2518    We run this check on observations showing an OLS residual of zero.
 2519 */
 2520 
 2521 static int observation_is_dummied (const MODEL *pmod,
 2522                    int *list, const double **Z,
 2523                    int s)
 2524 {
 2525     int i, t, v;
 2526     int ret = 0;
 2527 
 2528     for (i=list[0]; i>=2; i--) {
 2529     v = list[i];
 2530     if (v == 0) {
 2531         continue;
 2532     }
 2533     ret = 1;
 2534     for (t=pmod->t1; t<=pmod->t2; t++) {
 2535         if ((t == s && Z[v][t] == 0.0) || (t != s && Z[v][t] != 0.0)) {
 2536         ret = 0;
 2537         break;
 2538         }
 2539     }
 2540     if (ret) {
 2541         gretl_list_delete_at_pos(list, i);
 2542         break;
 2543     }
 2544     }
 2545 
 2546     return ret;
 2547 }
 2548 
 2549 static int *copy_ensure_const (const int *orig)
 2550 {
 2551     int *list = gretl_list_new(orig[0] + 1);
 2552     int i;
 2553 
 2554     if (list != NULL) {
 2555     list[1] = orig[1];
 2556     list[2] = 0;
 2557     for (i=3; i<=list[0]; i++) {
 2558         list[i] = orig[i-1];
 2559     }
 2560     }
 2561 
 2562     return list;
 2563 }
 2564 
 2565 /* get_hsk_weights: take the residuals from the model @pmod, square
 2566    them and take logs; find the fitted values for this series using an
 2567    auxiliary regression including the original independent variables
 2568    (and their squares, if not given OPT_N); exponentiate the fitted
 2569    values; and add the resulting series to the data set.
 2570 */
 2571 
 2572 static int get_hsk_weights (MODEL *pmod, DATASET *dset, gretlopt opt)
 2573 {
 2574     int oldv = dset->v;
 2575     int t, t1 = dset->t1, t2 = dset->t2;
 2576     int *lcpy = NULL;
 2577     int *auxlist = NULL;
 2578     int err = 0, shrink = 0;
 2579     double xx;
 2580     MODEL aux;
 2581 
 2582     if (pmod->ifc) {
 2583     lcpy = gretl_list_copy(pmod->list);
 2584     } else {
 2585     lcpy = copy_ensure_const(pmod->list);
 2586     }
 2587 
 2588     if (lcpy == NULL) {
 2589     return E_ALLOC;
 2590     }
 2591 
 2592     /* allocate space for an additional variable */
 2593     if (dataset_add_series(dset, 1)) {
 2594     free(lcpy);
 2595     return E_ALLOC;
 2596     }
 2597 
 2598     /* add transformed residuals to data set */
 2599     for (t=0; t<dset->n; t++) {
 2600     xx = pmod->uhat[t];
 2601     if (na(xx)) {
 2602         dset->Z[oldv][t] = NADBL;
 2603     } else if (xx == 0.0) {
 2604         if (observation_is_dummied(pmod, lcpy, (const double **) dset->Z, t)) {
 2605         dset->Z[oldv][t] = NADBL;
 2606         } else {
 2607         fprintf(stderr, "hsk: got a zero residual, could be a problem!\n");
 2608         dset->Z[oldv][t] = -1.0e16; /* ?? */
 2609         }
 2610     } else {
 2611         dset->Z[oldv][t] = log(xx * xx);
 2612     }
 2613     }
 2614 
 2615     if (opt & OPT_N) {
 2616     /* --no-squares option */
 2617     auxlist = lcpy;
 2618     } else {
 2619     /* build regression list, adding the squares of the original
 2620        independent vars */
 2621     auxlist = augment_regression_list(lcpy, AUX_SQ, dset, &err);
 2622     if (err) {
 2623         return err;
 2624     }
 2625     }
 2626 
 2627     auxlist[1] = oldv; /* the newly added log(uhat-squared) */
 2628 
 2629     dset->t1 = pmod->t1;
 2630     dset->t2 = pmod->t2;
 2631 
 2632     aux = lsq(auxlist, dset, OLS, OPT_A);
 2633     err = aux.errcode;
 2634     if (err) {
 2635     shrink = dset->v - oldv;
 2636     } else {
 2637     /* write into the data set the required weights */
 2638     for (t=aux.t1; t<=aux.t2; t++) {
 2639         xx = aux.yhat[t];
 2640         if (na(xx)) {
 2641         dset->Z[oldv][t] = NADBL;
 2642         } else {
 2643         dset->Z[oldv][t] = 1.0 / exp(xx);
 2644         }
 2645     }
 2646     shrink = dset->v - oldv - 1;
 2647     }
 2648 
 2649     dset->t1 = t1;
 2650     dset->t2 = t2;
 2651 
 2652     clear_model(&aux);
 2653 
 2654     if (shrink > 0) {
 2655     dataset_drop_last_variables(dset, shrink);
 2656     }
 2657 
 2658     if (auxlist != lcpy) {
 2659     free(auxlist);
 2660     }
 2661     free(lcpy);
 2662 
 2663     return err;
 2664 }
 2665 
 2666 /**
 2667  * hsk_model:
 2668  * @list: dependent variable plus list of regressors.
 2669  * @dset: dataset struct.
 2670  * @opt: may include OPT_N to suppress use of squares
 2671  * of regressors in the auxiliary regression.
 2672  *
 2673  * Estimate the model given in @list using a correction for
 2674  * heteroskedasticity.
 2675  *
 2676  * Returns: a #MODEL struct, containing the estimates.
 2677  */
 2678 
 2679 MODEL hsk_model (const int *list, DATASET *dset, gretlopt opt)
 2680 {
 2681     int i, err;
 2682     int orig_nvar = dset->v;
 2683     int *hsklist;
 2684     MODEL hsk;
 2685 
 2686     if (dset->Z == NULL) {
 2687     hsk.errcode = E_DATA;
 2688     return hsk;
 2689     }
 2690 
 2691     gretl_error_clear();
 2692 
 2693     /* run initial OLS */
 2694     hsk = lsq(list, dset, OLS, OPT_A);
 2695     if (hsk.errcode) {
 2696     return hsk;
 2697     }
 2698 
 2699     /* form weights based on the OLS residuals */
 2700     err = get_hsk_weights(&hsk, dset, opt);
 2701     if (err) {
 2702     hsk.errcode = err;
 2703     return hsk;
 2704     }
 2705 
 2706     /* allocate regression list for weighted least squares */
 2707     hsklist = gretl_list_new(list[0] + 1);
 2708     if (hsklist == NULL) {
 2709     hsk.errcode = E_ALLOC;
 2710     return hsk;
 2711     }
 2712 
 2713     /* the last variable in the dataset will be the weight var */
 2714     hsklist[1] = dset->v - 1;
 2715 
 2716     /* put the original dependent variable in at position 2 */
 2717     hsklist[2] = list[1];
 2718 
 2719     /* add the original independent vars into the WLS list */
 2720     for (i=3; i<=hsklist[0]; i++) {
 2721     hsklist[i] = list[i-1];
 2722     }
 2723 
 2724     clear_model(&hsk);
 2725     hsk = lsq(hsklist, dset, WLS, OPT_NONE);
 2726     hsk.ci = HSK;
 2727 
 2728     if (opt & OPT_N) {
 2729     gretl_model_set_int(&hsk, "no-squares", 1);
 2730     hsk.opt |= OPT_N;
 2731     }
 2732 
 2733     dataset_drop_last_variables(dset, dset->v - orig_nvar);
 2734 
 2735     free(hsklist);
 2736 
 2737     return hsk;
 2738 }
 2739 
 2740 static void print_HET_1 (double z, double pval, PRN *prn)
 2741 {
 2742     pprintf(prn, "\n%s\n", _("Pesaran-Taylor test for heteroskedasticity"));
 2743     pprintf(prn, "\n%s: HET_1 = %f,\n", _("Test statistic"), z);
 2744     pprintf(prn, "%s = 2 * P(z > %f) = %.3g\n\n",
 2745         _("with p-value"), z, pval);
 2746 }
 2747 
 2748 static void print_whites_test (double LM, int df, double pval,
 2749                    gretlopt opt, PRN *prn)
 2750 {
 2751     if (opt & OPT_B) {
 2752     pprintf(prn, "\n%s\n", _("Breusch-Pagan test for heteroskedasticity"));
 2753     if (opt & OPT_R) {
 2754         pprintf(prn, "%s\n", _("with Koenker robust variance estimator"));
 2755     }
 2756     pprintf(prn, "\n%s: LM = %f,\n", _("Test statistic"), LM);
 2757     } else {
 2758     pprintf(prn, "\n%s", _("White's test for heteroskedasticity"));
 2759     if (opt & OPT_X) {
 2760         pprintf(prn, " (%s)\n", _("squares only"));
 2761     } else {
 2762         pputc(prn, '\n');
 2763     }
 2764     pprintf(prn, "\n%s: TR^2 = %f,\n", _("Test statistic"), LM);
 2765     }
 2766 
 2767     pprintf(prn, "%s = P(%s(%d) > %f) = %f\n\n",
 2768         _("with p-value"), _("Chi-square"),
 2769         df, LM, pval);
 2770 }
 2771 
 2772 /* For White's test, see if we have enough degrees of freedom
 2773    to add squares -- and if so, if we have enough df to add
 2774    cross-products also.
 2775 */
 2776 
 2777 static int get_whites_aux (const MODEL *pmod, const double **Z)
 2778 {
 2779     int aux = AUX_WHITE;
 2780     int rem = pmod->ncoeff - pmod->ifc - 1;
 2781     int nsq = 0, nxpx = 0;
 2782     int i, v;
 2783 
 2784     for (i=2; i<=pmod->list[0]; i++) {
 2785     v = pmod->list[i];
 2786     if (v > 0) {
 2787         if (!gretl_isdummy(pmod->t1, pmod->t2, Z[v])) {
 2788         nsq++;
 2789         }
 2790         nxpx += rem--;
 2791     }
 2792     }
 2793 
 2794     if (pmod->dfd - nsq < 1) {
 2795     aux = AUX_NONE;
 2796     } else if (pmod->dfd - nsq - nxpx < 1) {
 2797     aux = AUX_SQ;
 2798     }
 2799 
 2800     return aux;
 2801 }
 2802 
 2803 #define PT_DEBUG 0
 2804 
 2805 /**
 2806  * tsls_hetero_test:
 2807  * @pmod: pointer to model.
 2808  * @dset: dataset struct.
 2809  * @opt: if flags include OPT_S, save results to model; OPT_Q
 2810  * means don't print the auxiliary regression.
 2811  * @prn: gretl printing struct.
 2812  *
 2813  * Runs Pesaran and Taylor's (1999) HET_1 test for heteroskedasticity
 2814  * on the given tsls model. The statistic is just a t-statistic, so
 2815  * under the null it is distributed as a standard normal.
 2816  *
 2817  * Returns: 0 on successful completion, error code on error.
 2818  */
 2819 
 2820 static int tsls_hetero_test (MODEL *pmod, DATASET *dset,
 2821                  gretlopt opt, PRN *prn)
 2822 {
 2823     int pos, newv = dset->v;
 2824     int *ptlist = NULL, *testlist = NULL;
 2825     int save_t1 = dset->t1;
 2826     int save_t2 = dset->t2;
 2827     MODEL ptmod;
 2828     double x;
 2829     int i, h, t;
 2830     int err = 0;
 2831 
 2832     pos = gretl_list_separator_position(pmod->list);
 2833     h = pmod->list[0] - pos;
 2834 
 2835 #if PT_DEBUG
 2836     pprintf(prn, "v = %d, h = %d\n", v, h);
 2837 #endif
 2838 
 2839     ptlist = gretl_list_new(h + 1);
 2840     testlist = gretl_list_new(3);
 2841 
 2842     if (ptlist == NULL || testlist == NULL) {
 2843     free(ptlist);
 2844     free(testlist);
 2845     return E_ALLOC;
 2846     }
 2847 
 2848     ptlist[1] = pmod->list[1];
 2849     for (i=2; i<=ptlist[0]; i++) {
 2850     ptlist[i] = pmod->list[i + pos - 1];
 2851     }
 2852 
 2853     testlist[1] = newv;
 2854     testlist[2] = 0;
 2855     testlist[3] = newv + 1;
 2856 
 2857 #if PT_DEBUG
 2858     printlist(ptlist, "ptlist");
 2859     printlist(testlist, "testlist");
 2860 #endif
 2861 
 2862     /* reduced form: regress the original dependent variable on all of
 2863        the instruments from the original model
 2864     */
 2865     ptmod = lsq(ptlist, dset, OLS, OPT_A);
 2866     err = ptmod.errcode;
 2867     if (err) {
 2868     goto bailout;
 2869     }
 2870 
 2871 #if PT_DEBUG
 2872     printmodel(&ptmod, dset, OPT_S, prn);
 2873 #endif
 2874 
 2875     /* add two series: (i) the squares of the residuals from the
 2876        original model and (ii) the squares of the fitted values from
 2877        the auxiliary regression above
 2878      */
 2879     err = dataset_add_series(dset, 2);
 2880     if (err) {
 2881     clear_model(&ptmod);
 2882     goto bailout;
 2883     }
 2884 
 2885     strcpy(dset->varname[newv+1], "yhat^2");
 2886 
 2887     for (t=pmod->t1; t<=pmod->t2; t++) {
 2888     x = pmod->uhat[t];
 2889     dset->Z[newv][t] = x*x;   /* squared residual */
 2890     x = ptmod.yhat[t];
 2891     dset->Z[newv+1][t] = x*x; /* squared fitted value */
 2892     }
 2893 
 2894     clear_model(&ptmod);
 2895 
 2896     dset->t1 = pmod->t1;
 2897     dset->t2 = pmod->t2;
 2898 
 2899     /* regress the squared residuals on the squared fitted
 2900        values from the reduced-form auxiliary regression
 2901     */
 2902     ptmod = lsq(testlist, dset, OLS, OPT_A);
 2903     err = ptmod.errcode;
 2904 
 2905     if (!err) {
 2906     double z = fabs(ptmod.coeff[1]) / ptmod.sderr[1];
 2907     double pval = 2.0 * (1 - normal_cdf(z));
 2908 
 2909     if (opt & OPT_Q) {
 2910         print_HET_1(z, pval, prn);
 2911     } else {
 2912         ptmod.aux = AUX_HET_1;
 2913         printmodel(&ptmod, dset, OPT_NONE, prn);
 2914     }
 2915 
 2916     if (opt & OPT_S) {
 2917         ModelTest *test = model_test_new(GRETL_TEST_HET_1);
 2918 
 2919         if (test != NULL) {
 2920         model_test_set_teststat(test, GRETL_STAT_Z);
 2921         model_test_set_value(test, z);
 2922         model_test_set_pvalue(test, pval);
 2923         maybe_add_test_to_model(pmod, test);
 2924         }
 2925     }
 2926 
 2927     record_test_result(z, pval);
 2928     }
 2929 
 2930     clear_model(&ptmod);
 2931 
 2932     dataset_drop_last_variables(dset, 2);
 2933 
 2934  bailout:
 2935 
 2936     free(ptlist);
 2937     free(testlist);
 2938 
 2939     dset->t1 = save_t1;
 2940     dset->t2 = save_t2;
 2941 
 2942     return err;
 2943 }
 2944 
 2945 /* Compute LM statistic as per Breusch and Pagan (Econometrica, 1979),
 2946    with the option to use the robust variance estimator proposed by
 2947    Koenker (Journal of Econometrics, 1981).
 2948 */
 2949 
 2950 static double get_BP_LM (MODEL *pmod, int *list, MODEL *aux,
 2951              DATASET *dset, gretlopt opt, int *err)
 2952 {
 2953     double s2, u2t, gt;
 2954     double V = 0.0, LM = NADBL;
 2955     int t, v = list[1];
 2956 
 2957     /* note: no df adjustment */
 2958     s2 = pmod->ess / pmod->nobs;
 2959 
 2960     if (opt & OPT_R) {
 2961     /* calculate robust variance estimate a la Koenker */
 2962     for (t=pmod->t1; t<=pmod->t2; t++) {
 2963         if (!na(pmod->uhat[t])) {
 2964         u2t = pmod->uhat[t] * pmod->uhat[t];
 2965         V += (u2t - s2) * (u2t - s2);
 2966         }
 2967     }
 2968     V /= pmod->nobs;
 2969     }
 2970 
 2971     for (t=pmod->t1; t<=pmod->t2; t++) {
 2972     if (na(pmod->uhat[t])) {
 2973         dset->Z[v][t] = NADBL;
 2974     } else {
 2975         u2t = pmod->uhat[t] * pmod->uhat[t];
 2976         gt = (opt & OPT_R)? (u2t - s2) : (u2t / s2);
 2977         dset->Z[v][t] = gt;
 2978     }
 2979     }
 2980 
 2981     *aux = lsq(list, dset, OLS, OPT_A);
 2982     *err = aux->errcode;
 2983 
 2984     if (!*err) {
 2985     double RSS = aux->tss - aux->ess;
 2986 
 2987     if (RSS < 0) {
 2988         *err = E_DATA;
 2989     } else {
 2990         if (opt & OPT_R) {
 2991         aux->opt |= OPT_R;
 2992         LM = RSS / V;
 2993         } else {
 2994         LM = .5 * RSS;
 2995         }
 2996         gretl_model_set_double(aux, "BPLM", LM);
 2997         aux->aux = AUX_BP;
 2998     }
 2999     }
 3000 
 3001     return LM;
 3002 }
 3003 
 3004 static int *ensure_const_copy_list (const MODEL *pmod, int *err)
 3005 {
 3006     int *list = NULL;
 3007 
 3008     if (pmod->ifc) {
 3009     list = gretl_list_copy(pmod->list);
 3010     } else {
 3011     int i, nl = pmod->list[0] + 1;
 3012 
 3013     list = gretl_list_new(nl);
 3014     if (list != NULL) {
 3015         list[1] = pmod->list[1];
 3016         list[2] = 0; /* insert const */
 3017         for (i=3; i<=nl; i++) {
 3018         list[i] = pmod->list[i-1];
 3019         }
 3020     }
 3021     }
 3022 
 3023     if (list == NULL) {
 3024     *err = E_ALLOC;
 3025     }
 3026 
 3027     return list;
 3028 }
 3029 
 3030 static int *ensure_const_augment_list (const MODEL *pmod, int aux,
 3031                        DATASET *dset, int *err)
 3032 {
 3033     int *list = NULL;
 3034 
 3035     if (pmod->ifc) {
 3036     list = augment_regression_list(pmod->list, aux, dset, err);
 3037     } else {
 3038     int *tmp = ensure_const_copy_list(pmod, err);
 3039 
 3040     if (!*err) {
 3041         list = augment_regression_list(tmp, aux, dset, err);
 3042         free(tmp);
 3043     }
 3044     }
 3045 
 3046     return list;
 3047 }
 3048 
 3049 /**
 3050  * whites_test:
 3051  * @pmod: pointer to model.
 3052  * @dset: dataset struct.
 3053  * @opt: if flags include OPT_S, save results to model; OPT_Q
 3054  * means don't print the auxiliary regression; OPT_B means
 3055  * do the simpler Breusch-Pagan variant; OPT_I means run
 3056  * silently.
 3057  * @prn: gretl printing struct.
 3058  *
 3059  * Runs White's test for heteroskedasticity on the given model.
 3060  *
 3061  * Returns: 0 on successful completion, error code on error.
 3062  */
 3063 
 3064 int whites_test (MODEL *pmod, DATASET *dset,
 3065          gretlopt opt, PRN *prn)
 3066 {
 3067     int BP = (opt & OPT_B);
 3068     int aux = AUX_NONE;
 3069     int v = dset->v;
 3070     int *list = NULL;
 3071     int save_t1 = dset->t1;
 3072     int save_t2 = dset->t2;
 3073     double zz, LM = 0;
 3074     MODEL white;
 3075     int t, err = 0;
 3076 
 3077     if (pmod->ci == IVREG) {
 3078     return tsls_hetero_test(pmod, dset, opt, prn);
 3079     }
 3080 
 3081     if (pmod->list == NULL || gretl_list_has_separator(pmod->list)) {
 3082     return E_NOTIMP;
 3083     }
 3084 
 3085     if (pmod->ci == NLS || pmod->ci == MLE ||
 3086     pmod->ci == GMM || pmod->ci == ARMA ||
 3087     pmod->ci == LOGISTIC) {
 3088     return E_NOTIMP;
 3089     }
 3090 
 3091     if ((err = list_members_replaced(pmod, dset))) {
 3092     return err;
 3093     }
 3094 
 3095     impose_model_smpl(pmod, dset);
 3096 
 3097     /* what can we do, with the degrees of freedom available? */
 3098     if (!BP) {
 3099     if (opt & OPT_X) {
 3100         aux = AUX_SQ;
 3101     } else {
 3102         aux = get_whites_aux(pmod, (const double **) dset->Z);
 3103         if (aux == AUX_NONE) {
 3104         dset->t1 = save_t1;
 3105         dset->t2 = save_t2;
 3106         return E_DF;
 3107         }
 3108     }
 3109     }
 3110 
 3111     gretl_model_init(&white, dset);
 3112 
 3113     /* make space in data set */
 3114     if (dataset_add_series(dset, 1)) {
 3115     err = E_ALLOC;
 3116     }
 3117 
 3118     if (!err) {
 3119     /* get residuals, square and add to data set */
 3120     for (t=0; t<dset->n; t++) {
 3121         zz = pmod->uhat[t];
 3122         if (na(zz)) {
 3123         dset->Z[v][t] = NADBL;
 3124         } else {
 3125         dset->Z[v][t] = zz * zz;
 3126         }
 3127     }
 3128     strcpy(dset->varname[v], "uhatsq");
 3129     }
 3130 
 3131     if (!err) {
 3132     if (BP) {
 3133         list = ensure_const_copy_list(pmod, &err);
 3134     } else {
 3135         /* build aux regression list, adding squares and
 3136            (possibly) cross-products of the original
 3137            independent vars */
 3138         list = ensure_const_augment_list(pmod, aux, dset, &err);
 3139     }
 3140     if (!err){
 3141         list[1] = v; /* the newly added uhat-squared */
 3142     }
 3143     }
 3144 
 3145     if (!err) {
 3146     /* run auxiliary regression */
 3147     if (BP) {
 3148         LM = get_BP_LM(pmod, list, &white, dset, opt, &err);
 3149     } else {
 3150         white = lsq(list, dset, OLS, OPT_A);
 3151         err = white.errcode;
 3152         if (!err) {
 3153         LM = white.rsq * white.nobs;
 3154         white.aux = AUX_WHITE;
 3155         }
 3156     }
 3157     }
 3158 
 3159     if (!err) {
 3160     int df = white.ncoeff - 1;
 3161     double pval = chisq_cdf_comp(df, LM);
 3162     gretlopt testopt = OPT_NONE;
 3163 
 3164     if (BP) {
 3165         if (opt & OPT_R) {
 3166         /* Koenker robust variant */
 3167         testopt = OPT_R;
 3168         }
 3169     } else if (opt & OPT_X) {
 3170         /* squares only */
 3171         testopt = OPT_X;
 3172     }
 3173 
 3174     if (!(opt & OPT_I)) {
 3175         if (opt & OPT_Q) {
 3176         print_whites_test(LM, df, pval, opt, prn);
 3177         } else {
 3178         white.opt |= testopt;
 3179         printmodel(&white, dset, OPT_NONE, prn);
 3180         }
 3181     }
 3182 
 3183     if (opt & OPT_S) {
 3184         ModelTestType mt = BP? GRETL_TEST_BP : GRETL_TEST_WHITES;
 3185         ModelTest *test = model_test_new(mt);
 3186 
 3187         if (test != NULL) {
 3188         model_test_set_teststat(test, GRETL_STAT_LM);
 3189         model_test_set_dfn(test, df);
 3190         model_test_set_value(test, LM);
 3191         model_test_set_pvalue(test, pval);
 3192         model_test_set_opt(test, testopt);
 3193         maybe_add_test_to_model(pmod, test);
 3194         }
 3195     }
 3196 
 3197     record_test_result(LM, pval);
 3198     }
 3199 
 3200     clear_model(&white);
 3201     dataset_drop_last_variables(dset, dset->v - v);
 3202     free(list);
 3203 
 3204     dset->t1 = save_t1;
 3205     dset->t2 = save_t2;
 3206 
 3207     return err;
 3208 }
 3209 
 3210 static int ar_list_max (const int *list)
 3211 {
 3212     int i, lmax = 0;
 3213 
 3214     for (i=1; i<=list[0]; i++) {
 3215     if (list[i] > lmax) {
 3216         lmax = list[i];
 3217     }
 3218     }
 3219 
 3220     return lmax;
 3221 }
 3222 
 3223 /**
 3224  * ar_model:
 3225  * @list: list of lags plus dependent variable and list of regressors.
 3226  * @dset: dataset struct.
 3227  * @opt: may contain OPT_O to print covariance matrix.
 3228  * @prn: gretl printing struct.
 3229  *
 3230  * Estimate the model given in @list using the generalized
 3231  * Cochrane-Orcutt procedure for autoregressive errors.
 3232  *
 3233  * Returns: #MODEL struct containing the results.
 3234  */
 3235 
 3236 MODEL ar_model (const int *list, DATASET *dset,
 3237         gretlopt opt, PRN *prn)
 3238 {
 3239     double diff, ess, tss, xx;
 3240     int i, j, vi, t, t1, t2, vc, yno, ryno, iter;
 3241     int k, maxlag, v = dset->v;
 3242     int *arlist = NULL, *rholist = NULL;
 3243     int *reglist = NULL, *reglist2 = NULL;
 3244     int cpos, nvadd;
 3245     MODEL ar, rhomod;
 3246 
 3247     gretl_error_clear();
 3248     gretl_model_init(&ar, dset);
 3249     gretl_model_init(&rhomod, dset);
 3250 
 3251     ar.errcode = gretl_list_split_on_separator(list, &arlist, &reglist);
 3252 
 3253     if (!ar.errcode && arlist[0] == 1 && arlist[1] == 1) {
 3254     /* special case: ar 1 ; ... => use AR1 apparatus */
 3255     ar = ar1_model(reglist, dset, opt, prn);
 3256     goto bailout;
 3257     }
 3258 
 3259     if (!ar.errcode) {
 3260     reglist2 = gretl_list_new(reglist[0]);
 3261     rholist = gretl_list_new(arlist[0] + 1);
 3262     if (reglist2 == NULL || rholist == NULL) {
 3263         ar.errcode = E_ALLOC;
 3264     }
 3265     }
 3266 
 3267     if (ar.errcode) {
 3268     goto bailout;
 3269     }
 3270 
 3271     maxlag = ar_list_max(arlist);
 3272     cpos = reglist_check_for_const(reglist, dset);
 3273 
 3274     /* first pass: estimate model via OLS: use OPT_M to generate an
 3275        error in case of missing values within sample range
 3276     */
 3277     ar = lsq(reglist, dset, OLS, OPT_A | OPT_M);
 3278     if (ar.errcode) {
 3279     goto bailout;
 3280     }
 3281 
 3282     nvadd = arlist[0] + 1 + reglist[0];
 3283 
 3284     /* allocate space for the uhat terms and transformed data */
 3285     if (dataset_add_series(dset, nvadd)) {
 3286     ar.errcode = E_ALLOC;
 3287     goto bailout;
 3288     }
 3289 
 3290     yno = reglist[1];
 3291     t1 = ar.t1;
 3292     t2 = ar.t2;
 3293     rholist[1] = v;
 3294 
 3295     pprintf(prn, "%s\n\n", _("Generalized Cochrane-Orcutt estimation"));
 3296     bufspace(17, prn);
 3297     /* xgettext:no-c-format */
 3298     pputs(prn, _("ITER             ESS           % CHANGE"));
 3299     pputs(prn, "\n\n");
 3300 
 3301     /* now loop while ess is changing */
 3302     diff = 1.0e6;
 3303     ess = 0.0;
 3304 
 3305     for (iter = 1; iter <= 20 && diff > 0.005; iter++) {
 3306     for (t=0; t<dset->n; t++) {
 3307         if (t < t1 || t > t2) {
 3308         dset->Z[v][t] = NADBL;
 3309         } else {
 3310         /* special computation of uhat */
 3311         xx = dset->Z[yno][t];
 3312         for (j=0; j<reglist[0]-1; j++) {
 3313             xx -= ar.coeff[j] * dset->Z[reglist[j+2]][t];
 3314         }
 3315         dset->Z[v][t] = xx;
 3316         }
 3317     }
 3318     for (i=1; i<=arlist[0]; i++) {
 3319         k = arlist[i];
 3320         rholist[1+i] = v + i;
 3321         for (t=0; t<dset->n; t++) {
 3322         if (t < t1 + k || t > t2) {
 3323             dset->Z[v+i][t] = NADBL;
 3324         } else {
 3325             dset->Z[v+i][t] = dset->Z[v][t-k];
 3326         }
 3327         }
 3328     }
 3329 
 3330     /* estimate the rho terms */
 3331     if (iter > 1) {
 3332         clear_model(&rhomod);
 3333     }
 3334     rhomod = lsq(rholist, dset, OLS, OPT_A);
 3335 
 3336     /* and rho-transform the data */
 3337     ryno = vc = v + i;
 3338     for (i=1; i<=reglist[0]; i++) {
 3339         vi = reglist[i];
 3340         for (t=0; t<dset->n; t++) {
 3341         if (t < t1 + maxlag || t > t2) {
 3342             dset->Z[vc][t] = NADBL;
 3343         } else {
 3344             xx = dset->Z[vi][t];
 3345             for (j=1; j<=arlist[0]; j++) {
 3346             k = arlist[j];
 3347             xx -= rhomod.coeff[j-1] * dset->Z[vi][t-k];
 3348             }
 3349             dset->Z[vc][t] = xx;
 3350         }
 3351         }
 3352         reglist2[i] = vc++;
 3353     }
 3354 
 3355     /* estimate the transformed model */
 3356     clear_model(&ar);
 3357     ar = lsq(reglist2, dset, OLS, OPT_A);
 3358 
 3359         if (iter > 1) {
 3360         diff = fabs(100 * (ar.ess - ess) / ess);
 3361     }
 3362 
 3363     ess = ar.ess;
 3364     pprintf(prn, "%16c%3d %20f ", ' ', iter, ess);
 3365 
 3366     if (iter > 1) {
 3367         pprintf(prn, "%13.3f\n", diff);
 3368     } else {
 3369         pputc(prn, '\n');
 3370     }
 3371     } /* end "ess changing" loop */
 3372 
 3373     for (i=0; i<=reglist[0]; i++) {
 3374     ar.list[i] = reglist[i];
 3375     }
 3376     if (cpos > 0) {
 3377     ar.ifc = 1;
 3378     }
 3379     if (ar.ifc) {
 3380     if (!gretl_model_get_int(&ar, "effconst")) {
 3381         ar.dfn -= 1;
 3382     }
 3383     }
 3384     ar.ci = AR;
 3385 
 3386     /* special computation of fitted values */
 3387     for (t=t1; t<=t2; t++) {
 3388     xx = 0.0;
 3389     for (j=2; j<=reglist[0]; j++) {
 3390         xx += ar.coeff[j-2] * dset->Z[reglist[j]][t];
 3391     }
 3392     ar.uhat[t] = dset->Z[yno][t] - xx;
 3393     for (j=1; j<=arlist[0]; j++) {
 3394         if (t - t1 >= arlist[j]) {
 3395         k = arlist[j];
 3396         xx += rhomod.coeff[j-1] * ar.uhat[t-k];
 3397         }
 3398     }
 3399     ar.yhat[t] = xx;
 3400     }
 3401 
 3402     for (t=t1; t<=t2; t++) {
 3403     ar.uhat[t] = dset->Z[yno][t] - ar.yhat[t];
 3404     }
 3405 
 3406     ar.rsq = gretl_corr_rsq(ar.t1, ar.t2, dset->Z[reglist[1]], ar.yhat);
 3407     ar.adjrsq = 1.0 - ((1.0 - ar.rsq) * (ar.nobs - 1.0) / ar.dfd);
 3408 
 3409     /* special computation of TSS */
 3410     xx = gretl_mean(ar.t1, ar.t2, dset->Z[ryno]);
 3411     tss = 0.0;
 3412     for (t=ar.t1; t<=ar.t2; t++) {
 3413     tss += (dset->Z[ryno][t] - xx) * (dset->Z[ryno][t] - xx);
 3414     }
 3415     if (ar.dfn == 0) {
 3416     ar.fstt = NADBL;
 3417     } else {
 3418     ar.fstt = ar.dfd * (tss - ar.ess) / (ar.dfn * ar.ess);
 3419     }
 3420     ar.lnL = NADBL;
 3421     mle_criteria(&ar, 0);
 3422     ar.dw = dwstat(maxlag, &ar, dset);
 3423     ar.rho = rhohat(maxlag, ar.t1, ar.t2, ar.uhat);
 3424 
 3425     dataset_drop_last_variables(dset, nvadd);
 3426 
 3427     if (gretl_model_add_arinfo(&ar, maxlag)) {
 3428     ar.errcode = E_ALLOC;
 3429     } else {
 3430     double *y = dset->Z[reglist[1]];
 3431 
 3432     for (i=0; i<=arlist[0]; i++) {
 3433         ar.arinfo->arlist[i] = arlist[i];
 3434         if (i >= 1) {
 3435         ar.arinfo->rho[i-1] = rhomod.coeff[i-1];
 3436         ar.arinfo->sderr[i-1] = rhomod.sderr[i-1];
 3437         }
 3438     }
 3439     ar.ybar = gretl_mean(ar.t1, ar.t2, y);
 3440     ar.sdy = gretl_stddev(ar.t1, ar.t2, y);
 3441     }
 3442     clear_model(&rhomod);
 3443 
 3444     set_model_id(&ar, opt);
 3445 
 3446  bailout:
 3447 
 3448     free(reglist);
 3449     free(reglist2);
 3450     free(rholist);
 3451     free(arlist);
 3452 
 3453     return ar;
 3454 }
 3455 
 3456 static int modelvar_iszero (const MODEL *pmod, const double *x)
 3457 {
 3458     int t;
 3459 
 3460     for (t=pmod->t1; t<=pmod->t2; t++) {
 3461     if (!model_missing(pmod, t) && floatneq(x[t], 0.0)) {
 3462         return 0;
 3463     }
 3464     }
 3465 
 3466     return 1;
 3467 }
 3468 
 3469 /* From the position of the first regressor to end of list, omits
 3470    variables with all-zero observations and repacks the rest of
 3471    them. If the regression in question is not just an auxiliary
 3472    one (OPT_A), records the list of dropped variables, if any,
 3473    on @pmod.
 3474 */
 3475 
 3476 static void omitzero (MODEL *pmod, const DATASET *dset,
 3477               gretlopt opt)
 3478 {
 3479     int *zlist = NULL;
 3480     int i, v, offset;
 3481     double x = 0.0;
 3482 
 3483     offset = (pmod->ci == WLS)? 3 : 2;
 3484 
 3485     if (!(opt & OPT_A)) {
 3486     zlist = gretl_null_list();
 3487     }
 3488 
 3489     for (i=pmod->list[0]; i>=offset; i--) {
 3490         v = pmod->list[i];
 3491         if (modelvar_iszero(pmod, dset->Z[v])) {
 3492         if (zlist != NULL) {
 3493         gretl_list_append_term(&zlist, v);
 3494         }
 3495         fprintf(stderr, "Deleting var %d (%s) at list pos %d: all zero\n",
 3496             v, dset->varname[v], i);
 3497         gretl_list_delete_at_pos(pmod->list, i);
 3498     }
 3499     }
 3500 
 3501     if (pmod->nwt) {
 3502     int t, wtzero;
 3503 
 3504     for (i=pmod->list[0]; i>=offset; i--) {
 3505         v = pmod->list[i];
 3506         wtzero = 1;
 3507         for (t=pmod->t1; t<=pmod->t2; t++) {
 3508         x = dset->Z[v][t] * dset->Z[pmod->nwt][t];
 3509         if (!model_missing(pmod, t) && floatneq(x, 0.0)) {
 3510             wtzero = 0;
 3511             break;
 3512         }
 3513         }
 3514         if (wtzero) {
 3515         if (zlist != NULL) {
 3516             gretl_list_append_term(&zlist, v);
 3517         }
 3518         gretl_list_delete_at_pos(pmod->list, i);
 3519         }
 3520     }
 3521     }
 3522 
 3523     if (zlist != NULL) {
 3524     if (zlist[0] > 0) {
 3525         gretl_model_set_list_as_data(pmod, "zerolist", zlist);
 3526     } else {
 3527         free(zlist);
 3528     }
 3529     }
 3530 }
 3531 
 3532 static int not_equal (double y, double x)
 3533 {
 3534     if (na(y) && na(x)) {
 3535     /* y and x are equal for the current purpose */
 3536     return 0;
 3537     } else {
 3538     return y != x;
 3539     }
 3540 }
 3541 
 3542 /* lagdepvar: attempt to detect presence of a lagged dependent
 3543    variable among the regressors -- if found, return the position of
 3544    this lagged var in the list; otherwise return 0.
 3545 
 3546    FIXME: use inside a function, when the name of the dependent
 3547    variable may be changed if it was a function argument?
 3548 */
 3549 
 3550 static int lagdepvar (const int *list, const DATASET *dset)
 3551 {
 3552     const char *p, *yname, *xname;
 3553     int xno, yno = list[1];
 3554     int i, t, ret = 0;
 3555 
 3556     yname = dset->varname[yno];
 3557 
 3558     for (i=2; i<=list[0] && !ret; i++) {
 3559     if (list[i] == LISTSEP) {
 3560         break;
 3561     }
 3562     xno = list[i];
 3563     xname = dset->varname[xno];
 3564     p = strrchr(xname, '_');
 3565     if (p != NULL && isdigit(*(p + 1))) {
 3566         /* this looks like a lag */
 3567         size_t len = strlen(xname) - strlen(p);
 3568 
 3569         if (!strncmp(yname, xname, len)) {
 3570         /* strong candidate for lagged depvar, but make sure */
 3571         ret = i;
 3572         for (t=dset->t1+1; t<=dset->t2; t++) {
 3573             if (not_equal(dset->Z[yno][t-1], dset->Z[xno][t])) {
 3574             ret = 0; /* nope */
 3575             break;
 3576             }
 3577         }
 3578         }
 3579     }
 3580     }
 3581 
 3582     return ret;
 3583 }
 3584 
 3585 static void arch_test_print_simple (int order, double LM,
 3586                     double pval, gretlopt opt,
 3587                     PRN *prn)
 3588 {
 3589     if (opt & OPT_M) {
 3590     /* multi-equation system */
 3591     pprintf(prn, "%s: TR^2 = %f,\n", _("Test statistic"), LM);
 3592     } else {
 3593     pprintf(prn, "\n%s %d\n", _("Test for ARCH of order"), order);
 3594     pprintf(prn, "\n%s: TR^2 = %f,\n", _("Test statistic"), LM);
 3595     }
 3596     pprintf(prn, "%s = P(%s(%d) > %f) = %f\n\n",
 3597         _("with p-value"), _("Chi-square"),
 3598         order, LM, pval);
 3599 }
 3600 
 3601 static void print_arch_regression (const gretl_matrix *b,
 3602                    const gretl_matrix *V,
 3603                    int T, int order,
 3604                    gretlopt opt, PRN *prn)
 3605 {
 3606     int i, k = order + 1;
 3607     double *se = malloc(k * sizeof *se);
 3608     char **names;
 3609 
 3610     names = strings_array_new_with_length(k, 16);
 3611 
 3612     if (se != NULL && names != NULL) {
 3613     if (!(opt & OPT_M)) {
 3614         /* not multi-equation */
 3615         pputc(prn, '\n');
 3616         pprintf(prn, _("Test for ARCH of order %d"), order);
 3617         pputs(prn, "\n\n");
 3618     }
 3619 
 3620     for (i=0; i<k; i++) {
 3621         se[i] = sqrt(gretl_matrix_get(V, i, i));
 3622         sprintf(names[i], "alpha(%d)", i);
 3623     }
 3624 
 3625     /* is a df correction wanted here? */
 3626     print_coeffs(b->val, se, (const char **) names,
 3627              k, T - k, ARCH, prn);
 3628     }
 3629 
 3630     free(se);
 3631     strings_array_free(names, k);
 3632 }
 3633 
 3634 static void
 3635 arch_test_save_or_print (const gretl_matrix *b, const gretl_matrix *V,
 3636              int T, int order, double rsq, MODEL *pmod,
 3637              gretlopt opt, PRN *prn)
 3638 {
 3639     double LM = T * rsq;
 3640     double pv = chisq_cdf_comp(order, LM);
 3641 
 3642     record_test_result(LM, pv);
 3643 
 3644     if (!(opt & OPT_I)) {
 3645     if (V != NULL) {
 3646         /* V will be NULL if --quiet is in force */
 3647         print_arch_regression(b, V, T, order, opt, prn);
 3648     }
 3649     if (opt & OPT_Q) {
 3650         arch_test_print_simple(order, LM, pv, opt, prn);
 3651     }
 3652     }
 3653 
 3654     if ((opt & OPT_S) || !(opt & OPT_Q)) {
 3655     ModelTest *test = model_test_new(GRETL_TEST_ARCH);
 3656 
 3657     if (test != NULL) {
 3658         int heading = (V == NULL);
 3659 
 3660         model_test_set_teststat(test, GRETL_STAT_LM);
 3661         model_test_set_order(test, order);
 3662         model_test_set_dfn(test, order);
 3663         model_test_set_value(test, LM);
 3664         model_test_set_pvalue(test, pv);
 3665 
 3666         if (!(opt & OPT_Q)) {
 3667         gretl_model_test_print_direct(test, heading, prn);
 3668         }
 3669 
 3670         if (pmod != NULL && (opt & OPT_S)) {
 3671         maybe_add_test_to_model(pmod, test);
 3672         } else {
 3673         model_test_free(test);
 3674         }
 3675     }
 3676     }
 3677 }
 3678 
 3679 static int real_arch_test (const double *u, int T, int order,
 3680                MODEL *pmod, gretlopt opt,
 3681                PRN *prn)
 3682 {
 3683     gretl_matrix *X = NULL;
 3684     gretl_matrix *y = NULL;
 3685     gretl_matrix *b = NULL;
 3686     gretl_matrix *V = NULL;
 3687     int i, k, s, t;
 3688     double x, s2, rsq;
 3689     double *ps2 = NULL;
 3690     int err = 0;
 3691 
 3692     gretl_error_clear();
 3693 
 3694     if (order < 1 || order > T - 1) {
 3695     gretl_errmsg_sprintf(_("Invalid lag order for arch (%d)"), order);
 3696     return E_DATA;
 3697     }
 3698 
 3699     T -= order;
 3700     k = order + 1;
 3701 
 3702     X = gretl_matrix_alloc(T, k);
 3703     y = gretl_column_vector_alloc(T);
 3704     b = gretl_column_vector_alloc(k);
 3705 
 3706     if (X == NULL || y == NULL || b == NULL) {
 3707     gretl_matrix_free(X);
 3708     gretl_matrix_free(y);
 3709     gretl_matrix_free(b);
 3710     return E_ALLOC;
 3711     }
 3712 
 3713     if (!(opt & OPT_Q)) {
 3714     V = gretl_matrix_alloc(k, k);
 3715     if (V == NULL) {
 3716         err = E_ALLOC;
 3717         goto bailout;
 3718     }
 3719     ps2 = &s2;
 3720     }
 3721 
 3722     /* fill out the matrices with squared residuals
 3723        and lags of same */
 3724 
 3725     for (i=0; i<k; i++) {
 3726     for (t=0; t<T; t++) {
 3727         s = t + order;
 3728         if (i == 0) {
 3729         x = u[s];
 3730         gretl_vector_set(y, t, x * x);
 3731         gretl_matrix_set(X, t, i, 1.0);
 3732         } else {
 3733         x = u[s - i];
 3734         gretl_matrix_set(X, t, i, x * x);
 3735         }
 3736     }
 3737     }
 3738 
 3739     err = gretl_matrix_ols(y, X, b, V, NULL, ps2);
 3740 
 3741     if (!err) {
 3742     rsq = gretl_matrix_r_squared(y, X, b, &err);
 3743     }
 3744 
 3745     if (!err) {
 3746     arch_test_save_or_print(b, V, T, order, rsq, pmod,
 3747                 opt, prn);
 3748     }
 3749 
 3750  bailout:
 3751 
 3752     gretl_matrix_free(X);
 3753     gretl_matrix_free(y);
 3754     gretl_matrix_free(b);
 3755     gretl_matrix_free(V);
 3756 
 3757     return err;
 3758 }
 3759 
 3760 /**
 3761  * arch_test:
 3762  * @pmod: model to be tested.
 3763  * @order: lag order for ARCH process.
 3764  * @dset: dataset struct.
 3765  * @opt: if flags include OPT_S, save test results to model;
 3766  * if OPT_Q, be less verbose; if OPT_I, be silent.
 3767  * @prn: gretl printing struct.
 3768  *
 3769  * Tests @pmod for AutoRegressive Conditional Heteroskedasticity.
 3770  *
 3771  * Returns: 0 on success, non-zero code on error.
 3772  */
 3773 
 3774 int arch_test (MODEL *pmod, int order, const DATASET *dset,
 3775            gretlopt opt, PRN *prn)
 3776 {
 3777     int err;
 3778 
 3779     if (pmod->missmask != NULL) {
 3780     err = E_MISSDATA;
 3781     } else {
 3782     const double *u = pmod->uhat + pmod->t1;
 3783 
 3784     if (order == 0) {
 3785         /* use data frequency as default lag order */
 3786         order = dset->pd;
 3787     }
 3788 
 3789     err = real_arch_test(u, pmod->nobs, order, pmod,
 3790                  opt, prn);
 3791     }
 3792 
 3793     return err;
 3794 }
 3795 
 3796 int array_arch_test (const double *u, int n, int order,
 3797              gretlopt opt, PRN *prn)
 3798 {
 3799     return real_arch_test(u, n, order, NULL, opt, prn);
 3800 }
 3801 
 3802 /**
 3803  * arch_model:
 3804  * @list: dependent variable plus list of regressors.
 3805  * @order: lag order for ARCH process.
 3806  * @dset: dataset struct.
 3807  * @opt: may contain OPT_O to print covariance matrix.
 3808  *
 3809  * Estimate the model given in @list via weighted least squares,
 3810  * with the weights based on the predicted error variances from
 3811  * an auxiliary regression of the squared residuals on their lagged
 3812  * values.
 3813  *
 3814  * Returns: a #MODEL struct, containing the estimates.
 3815  */
 3816 
 3817 MODEL arch_model (const int *list, int order, DATASET *dset,
 3818           gretlopt opt)
 3819 {
 3820     MODEL amod;
 3821     int *wlist = NULL, *alist = NULL;
 3822     int T = sample_size(dset);
 3823     int oldv = dset->v;
 3824     int i, t, nwt, k, n = dset->n;
 3825     double *a = NULL;
 3826     double *se = NULL;
 3827     double xx;
 3828 
 3829     gretl_error_clear();
 3830     gretl_model_init(&amod, dset);
 3831 
 3832     if (order == 0) {
 3833     /* use data frequency as default lag order */
 3834     order = dset->pd;
 3835     }
 3836 
 3837     if (order < 1 || order > T - list[0]) {
 3838     amod.errcode = E_UNSPEC;
 3839     gretl_errmsg_sprintf(_("Invalid lag order for arch (%d)"), order);
 3840     return amod;
 3841     }
 3842 
 3843     if (dataset_add_series(dset, order + 1)) {
 3844     amod.errcode = E_ALLOC;
 3845     } else {
 3846     alist = gretl_list_new(order + 2);
 3847     if (alist == NULL) {
 3848         amod.errcode = E_ALLOC;
 3849     }
 3850     }
 3851 
 3852     if (amod.errcode) {
 3853     goto bailout;
 3854     }
 3855 
 3856     /* start list for aux regression */
 3857     alist[1] = dset->v - order - 1;
 3858     alist[2] = 0;
 3859 
 3860     /* run initial OLS and get squared residuals */
 3861     amod = lsq(list, dset, OLS, OPT_A | OPT_M);
 3862     if (amod.errcode) {
 3863     goto bailout;
 3864     }
 3865 
 3866     k = dset->v - order - 1;
 3867     strcpy(dset->varname[k], "utsq");
 3868     for (t=0; t<n; t++) {
 3869     dset->Z[k][t] = NADBL;
 3870     }
 3871     for (t=amod.t1; t<=amod.t2; t++) {
 3872     xx = amod.uhat[t];
 3873     dset->Z[k][t] = xx * xx;
 3874     }
 3875     /* also lags of squared resids */
 3876     for (i=1; i<=order; i++) {
 3877     k =  dset->v - order + i - 1;
 3878     alist[i+2] = k;
 3879     sprintf(dset->varname[k], "utsq_%d", i);
 3880     for (t=0; t<n; t++) {
 3881         dset->Z[k][t] = NADBL;
 3882     }
 3883     for (t=amod.t1+i; t<=amod.t2; t++) {
 3884         dset->Z[k][t] = dset->Z[alist[1]][t-i];
 3885     }
 3886     }
 3887 
 3888     /* run auxiliary regression */
 3889     clear_model(&amod);
 3890     amod = lsq(alist, dset, OLS, OPT_A);
 3891     if (amod.errcode) {
 3892     goto bailout;
 3893     }
 3894 
 3895     /* steal the coefficients for reference */
 3896     a = amod.coeff;
 3897     amod.coeff = NULL;
 3898     se = amod.sderr;
 3899     amod.sderr = NULL;
 3900 
 3901     /* do weighted estimation */
 3902     wlist = gretl_list_new(list[0] + 1);
 3903 
 3904     if (wlist == NULL) {
 3905     amod.errcode = E_ALLOC;
 3906     } else {
 3907     /* construct the weight variable */
 3908     nwt = wlist[1] = dset->v - 1;
 3909     strcpy(dset->varname[nwt], "1/sigma");
 3910 
 3911     for (i=2; i<=wlist[0]; i++) {
 3912         wlist[i] = list[i-1];
 3913     }
 3914 
 3915     k = dset->v - order - 1;
 3916 
 3917     for (t=amod.t1; t<=amod.t2; t++) {
 3918         xx = amod.yhat[t];
 3919         if (xx <= 0.0) {
 3920         xx = dset->Z[k][t];
 3921         }
 3922         dset->Z[nwt][t] = 1.0 / xx; /* FIXME is this right? */
 3923     }
 3924 
 3925     clear_model(&amod);
 3926     amod = lsq(wlist, dset, WLS, OPT_NONE);
 3927     amod.ci = ARCH;
 3928 
 3929     if (!amod.errcode) {
 3930         gretl_model_set_int(&amod, "arch_order", order);
 3931         gretl_model_set_data(&amod, "arch_coeff", a,
 3932                  GRETL_TYPE_DOUBLE_ARRAY,
 3933                  (order + 1) * sizeof *a);
 3934         gretl_model_set_data(&amod, "arch_sderr", se,
 3935                  GRETL_TYPE_DOUBLE_ARRAY,
 3936                  (order + 1) * sizeof *se);
 3937     }
 3938     }
 3939 
 3940  bailout:
 3941 
 3942     if (alist != NULL) free(alist);
 3943     if (wlist != NULL) free(wlist);
 3944 
 3945     dataset_drop_last_variables(dset, dset->v - oldv);
 3946 
 3947     return amod;
 3948 }
 3949 
 3950 /**
 3951  * lad_model:
 3952  * @list: dependent variable plus list of regressors.
 3953  * @dset: dataset struct.
 3954  * @opt: may include OPT_Q for quiet operation.
 3955  *
 3956  * Estimate the model given in @list using the method of Least
 3957  * Absolute Deviation (LAD).
 3958  *
 3959  * Returns: a #MODEL struct, containing the estimates.
 3960  */
 3961 
 3962 MODEL lad_model (const int *list, DATASET *dset, gretlopt opt)
 3963 {
 3964     MODEL mod;
 3965     int (*lad_driver) (MODEL *, DATASET *, gretlopt);
 3966 
 3967     /* run an initial OLS to "set the model up" and check for errors.
 3968        the lad_driver function will overwrite the coefficients etc.
 3969     */
 3970 
 3971     mod = lsq(list, dset, OLS, OPT_A);
 3972 
 3973     if (mod.errcode) {
 3974         return mod;
 3975     }
 3976 
 3977     lad_driver = get_plugin_function("lad_driver");
 3978 
 3979     if (lad_driver == NULL) {
 3980     fputs(I_("Couldn't load plugin function\n"), stderr);
 3981     mod.errcode = E_FOPEN;
 3982     return mod;
 3983     }
 3984 
 3985     (*lad_driver) (&mod, dset, opt);
 3986     set_model_id(&mod, opt);
 3987 
 3988     return mod;
 3989 }
 3990 
 3991 /**
 3992  * quantreg:
 3993  * @tau: vector containing one or more quantile values, in the range
 3994  * 0.01 to 0.99.
 3995  * @list: model specification: dependent var and regressors.
 3996  * @dset: dataset struct.
 3997  * @opt: may contain OPT_R for robust standard errors,
 3998  * OPT_I to produce confidence intervals.
 3999  * @prn: gretl printing struct.
 4000  *
 4001  * Estimate the model given in @list using the method of
 4002  * quantile regression.
 4003  *
 4004  * Returns: a #MODEL struct, containing the estimates.
 4005  */
 4006 
 4007 MODEL quantreg (const gretl_matrix *tau, const int *list,
 4008         DATASET *dset, gretlopt opt, PRN *prn)
 4009 {
 4010     MODEL mod;
 4011     int (*rq_driver) (const gretl_matrix *, MODEL *, DATASET *,
 4012               gretlopt, PRN *);
 4013     gretlopt olsopt = (OPT_A | OPT_M);
 4014 
 4015     /* Run an initial OLS to "set the model up" and check for errors.
 4016        the driver function will overwrite the coefficients, etc.
 4017        For now we make life easier by rejecting within-sample missing
 4018        values (OPT_M).
 4019     */
 4020 
 4021     if (opt & OPT_R) {
 4022     olsopt |= OPT_R;
 4023     }
 4024 
 4025     mod = lsq(list, dset, OLS, olsopt);
 4026 
 4027     if (mod.errcode) {
 4028         return mod;
 4029     }
 4030 
 4031     rq_driver = get_plugin_function("rq_driver");
 4032 
 4033     if (rq_driver == NULL) {
 4034     fputs(I_("Couldn't load plugin function\n"), stderr);
 4035     mod.errcode = E_FOPEN;
 4036     return mod;
 4037     }
 4038 
 4039     (*rq_driver) (tau, &mod, dset, opt, prn);
 4040     set_model_id(&mod, opt);
 4041 
 4042     return mod;
 4043 }
 4044 
 4045 #ifndef WIN32
 4046 
 4047 static void gretl_glib_grab_output (const char *prog,
 4048                     char **sout)
 4049 {
 4050     gchar *argv[] = { (gchar *) prog, NULL };
 4051     gboolean ok;
 4052 
 4053     ok = g_spawn_sync(NULL, argv, NULL, G_SPAWN_SEARCH_PATH,
 4054               NULL, NULL, sout, NULL,
 4055               NULL, NULL);
 4056 
 4057     if (!ok && *sout != NULL) {
 4058     g_free(*sout);
 4059     *sout = NULL;
 4060     }
 4061 }
 4062 
 4063 #endif
 4064 
 4065 /*
 4066  * get_x12a_maxpd:
 4067  *
 4068  * Retrieve the highest data frequency handled by X-12-ARIMA,
 4069  * which may vary depending on how the program was built.
 4070  * This may be relevant when executing the gretl arima
 4071  * command with the option to use X-12-ARIMA.
 4072  */
 4073 
 4074 int get_x12a_maxpd (void)
 4075 {
 4076     static int n;
 4077 
 4078     if (n == 0) {
 4079     const char *x12a = gretl_x12_arima();
 4080     char *sout = NULL;
 4081 
 4082 #ifdef WIN32
 4083     gretl_win32_grab_output(x12a, gretl_dotdir(), &sout);
 4084 #else
 4085     gretl_glib_grab_output(x12a, &sout);
 4086 #endif
 4087     if (sout != NULL) {
 4088         char *p = strstr(sout, "PSP = ");
 4089 
 4090         if (p != NULL) {
 4091         n = atoi(p + 6);
 4092         }
 4093         free(sout);
 4094     }
 4095     if (n <= 0) {
 4096         n = 12;
 4097     }
 4098     }
 4099 
 4100     return n;
 4101 }
 4102 
 4103 /**
 4104  * arma:
 4105  * @list: AR and MA orders, dependent variable, and any exogenous
 4106  * regressors.
 4107  * @pqlags: list giving specific non-seasonal AR/MA lags (or NULL).
 4108  * @dset: dataset struct.
 4109  * @opt: option flags.
 4110  * @PRN: for printing details of iterations (or NULL).
 4111  *
 4112  * Calculates ARMA estimates. By default the estimator is
 4113  * exact ML, implemented via gretl's Kalman filter in
 4114  * conjunction with the BFGS maximizer. Other options:
 4115  * if @opt includes OPT_L we use L-BFGS-B rather than standard
 4116  * BFGS; OPT_C (incompatible with OPT_L) calls for use of
 4117  * conditional ML (BHHH algorithm); and OPT_X requests estimation via
 4118  * X-12-ARIMA rather than native code.
 4119  *
 4120  * If @pqlags is non-NULL it should take the form of two
 4121  * gretl sub-lists joined by #LISTSEP: the first sub-list holds
 4122  * a set of AR lags and the second a list of MA lags. If only
 4123  * AR lags are specified, a single list may be given; if only
 4124  * MA lags are specified, use #LISTSEP with a null first sub-list.
 4125  *
 4126  * If the model specification does not include a constant
 4127  * this is added automatically, unless @opt includes OPT_N
 4128  * ("no constant").
 4129  *
 4130  * When the estimation method is native exact ML, two (mutually
 4131  * exclusive) flags in @opt may be used to control the estimator
 4132  * of the covariance matrix: OPT_G specifies use of the outer
 4133  * product of the gradient (OPG), while OPT_H specifies use of
 4134  * the (numerical) Hessian. If neither of these flags is given, the
 4135  * default is to use the Hessian by preference, but to fall back
 4136  * to OPG if computation of the numerical Hessian fails. These
 4137  * flags are ignored if estimation is not via native exact ML.
 4138  *
 4139  * Returns: a #MODEL struct, containing the estimates.
 4140  */
 4141 
 4142 MODEL arma (const int *list, const int *pqlags,
 4143         DATASET *dset, gretlopt opt,
 4144         PRN *prn)
 4145 {
 4146     MODEL (*arma_model) (const int *, const int *,
 4147              DATASET *, gretlopt, PRN *);
 4148     MODEL (*arma_x12_model) (const int *, const int *,
 4149                  const DATASET *,
 4150                  int, gretlopt, PRN *);
 4151     MODEL armod;
 4152     int err = 0;
 4153 
 4154     gretl_model_init(&armod, dset);
 4155 
 4156     err = incompatible_options(opt, OPT_G | OPT_H);
 4157     if (!err) {
 4158     err = options_incompatible_with(opt, OPT_X, OPT_R | OPT_S);
 4159     }
 4160     if (err) {
 4161     armod.errcode = err;
 4162     return armod;
 4163     }
 4164 
 4165     if (opt & OPT_X) {
 4166     int pdmax = get_x12a_maxpd();
 4167 
 4168     if ((dset->t2 - dset->t1 + 1) > pdmax * 50) {
 4169         gretl_errmsg_sprintf(_("X-12-ARIMA can't handle more than %d observations.\n"
 4170                    "Please select a smaller sample."), pdmax * 50);
 4171         armod.errcode = E_DATA;
 4172         return armod;
 4173     }
 4174 
 4175     arma_x12_model = get_plugin_function("arma_x12_model");
 4176     if (arma_x12_model == NULL) {
 4177         err = E_FOPEN;
 4178     } else {
 4179         armod = (*arma_x12_model) (list, pqlags, dset, pdmax, opt, prn);
 4180     }
 4181     } else {
 4182     arma_model = get_plugin_function("arma_model");
 4183     if (arma_model == NULL) {
 4184         err = E_FOPEN;
 4185     } else {
 4186         armod = (*arma_model) (list, pqlags, dset, opt, prn);
 4187     }
 4188     }
 4189 
 4190     if (err) {
 4191     fputs(I_("Couldn't load plugin function\n"), stderr);
 4192     armod.errcode = err;
 4193     return armod;
 4194     }
 4195 
 4196     set_model_id(&armod, opt);
 4197 
 4198     return armod;
 4199 }
 4200 
 4201 /**
 4202  * garch:
 4203  * @list: dependent variable plus arch and garch orders.
 4204  * @dset: dataset struct.
 4205  * @opt: can specify robust standard errors and VCV.
 4206  * @prn: for printing details of iterations (or NULL).
 4207  *
 4208  * Calculate GARCH estimates.
 4209  *
 4210  * Returns: a #MODEL struct, containing the estimates.
 4211  */
 4212 
 4213 MODEL garch (const int *list, DATASET *dset, gretlopt opt,
 4214          PRN *prn)
 4215 {
 4216     MODEL mod;
 4217     PRN *myprn;
 4218     MODEL (*garch_model) (const int *, DATASET *, PRN *,
 4219               gretlopt);
 4220 
 4221     gretl_error_clear();
 4222 
 4223     garch_model = get_plugin_function("garch_model");
 4224 
 4225     if (garch_model == NULL) {
 4226     gretl_model_init(&mod, dset);
 4227     mod.errcode = E_FOPEN;
 4228     return mod;
 4229     }
 4230 
 4231     if (opt & OPT_V) {
 4232     myprn = prn;
 4233     } else {
 4234     myprn = NULL;
 4235     }
 4236 
 4237     mod = (*garch_model) (list, dset, myprn, opt);
 4238     set_model_id(&mod, opt);
 4239 
 4240     return mod;
 4241 }
 4242 
 4243 /**
 4244  * mp_ols:
 4245  * @list: specification of variables to use.
 4246  * @dset: dataset struct.
 4247  * @opt: maye include OPT_Q for quiet operation.
 4248  *
 4249  * Estimate an OLS model using multiple-precision arithmetic
 4250  * via the GMP library.
 4251  *
 4252  * Returns: a #MODEL struct, containing the estimates.
 4253  */
 4254 
 4255 MODEL mp_ols (const int *list, DATASET *dset, gretlopt opt)
 4256 {
 4257     int (*mplsq)(const int *, const int *, const int *,
 4258          DATASET *, MODEL *,
 4259          gretlopt);
 4260     MODEL mpmod;
 4261 
 4262     gretl_model_init(&mpmod, dset);
 4263 
 4264     mplsq = get_plugin_function("mplsq");
 4265     if (mplsq == NULL) {
 4266     mpmod.errcode = 1;
 4267     return mpmod;
 4268     }
 4269 
 4270     if (gretl_list_has_separator(list)) {
 4271     int *base = NULL;
 4272     int *poly = NULL;
 4273 
 4274     mpmod.errcode = gretl_list_split_on_separator(list, &base, &poly);
 4275     if (mpmod.errcode == 0 && (base == NULL || poly == NULL)) {
 4276         mpmod.errcode = E_ARGS;
 4277     } else {
 4278         mpmod.errcode = (*mplsq)(base, poly, NULL, dset,
 4279                      &mpmod, OPT_S);
 4280     }
 4281     free(base);
 4282     free(poly);
 4283     } else {
 4284     mpmod.errcode = (*mplsq)(list, NULL, NULL, dset,
 4285                  &mpmod, OPT_S);
 4286     }
 4287 
 4288     set_model_id(&mpmod, opt);
 4289 
 4290     return mpmod;
 4291 }
 4292 
 4293 static int check_panel_options (gretlopt opt)
 4294 {
 4295     if ((opt & OPT_U) && (opt & OPT_H)) {
 4296     /* can't specify random effects + weighted least squares */
 4297     return E_BADOPT;
 4298     } else if ((opt & OPT_T) && !(opt & OPT_H)) {
 4299     /* iterate option requires weighted least squares option */
 4300     return E_BADOPT;
 4301     } else if ((opt & OPT_N) && !(opt & OPT_U)) {
 4302     /* the Nerlove option requires random effects */
 4303     return E_BADOPT;
 4304     } else if ((opt & OPT_C) && !(opt & OPT_P)) {
 4305     /* explicit cluster option only OK with pooled OLS */
 4306     return E_BADOPT;
 4307     } else if (incompatible_options(opt, OPT_B | OPT_U | OPT_P)) {
 4308     /* mutually exclusive estimator requests */
 4309     return E_BADOPT;
 4310     }
 4311 
 4312     return 0;
 4313 }
 4314 
 4315 /**
 4316  * panel_model:
 4317  * @list: regression list (dependent variable plus independent
 4318  * variables).
 4319  * @dset: dataset struct.
 4320  * @opt: can include OPT_Q (quiet estimation), OPT_U (random
 4321  * effects model), OPT_H (weights based on the error variance
 4322  * for the respective cross-sectional units), OPT_I (iterate,
 4323  * only available in conjunction with OPT_H).
 4324  * @prn: printing struct (or NULL).
 4325  *
 4326  * Calculate estimates for a panel dataset, using fixed
 4327  * effects (the default), random effects, or weighted
 4328  * least squares based on the respective variances for the
 4329  * cross-sectional units.
 4330  *
 4331  * Returns: a #MODEL struct, containing the estimates.
 4332  */
 4333 
 4334 MODEL panel_model (const int *list, DATASET *dset,
 4335            gretlopt opt, PRN *prn)
 4336 {
 4337     MODEL mod;
 4338 
 4339     if (check_panel_options(opt)) {
 4340     gretl_model_init(&mod, dset);
 4341     mod.errcode = E_BADOPT;
 4342     } else if (opt & OPT_H) {
 4343     mod = panel_wls_by_unit(list, dset, opt, prn);
 4344     } else {
 4345     mod = real_panel_model(list, dset, opt, prn);
 4346     }
 4347 
 4348     return mod;
 4349 }
 4350 
 4351 /**
 4352  * ivreg:
 4353  * @list: regression list; see the documentation for the "tsls"
 4354  * command.
 4355  * @dset: dataset struct.
 4356  * @opt: option flags.
 4357  *
 4358  * Calculate IV estimates for a linear model, by default via two-stage
 4359  * least squares. The option flags can include OPT_Q for quiet
 4360  * operation, and either OPT_L to specify estimation via Limited
 4361  * Information Maximum Likelihood or OPT_G for estimation via
 4362  * Generalized method of Moments.
 4363  *
 4364  * Returns: a #MODEL struct, containing the estimates.
 4365  */
 4366 
 4367 MODEL ivreg (const int *list, DATASET *dset, gretlopt opt)
 4368 {
 4369     MODEL mod;
 4370     int err;
 4371 
 4372     gretl_error_clear();
 4373 
 4374     /* can't have both LIML and GMM options */
 4375     err = incompatible_options(opt, OPT_G | OPT_L);
 4376 
 4377     if (!err) {
 4378     /* two-step, iterate and weights options are GMM-only */
 4379     err = option_prereq_missing(opt, OPT_T | OPT_I | OPT_H, OPT_G);
 4380     }
 4381 
 4382     if (err) {
 4383     gretl_model_init(&mod, dset);
 4384     mod.errcode = err;
 4385     return mod;
 4386     }
 4387 
 4388     if (opt & OPT_L) {
 4389     mod = single_equation_liml(list, dset, opt);
 4390     } else if (opt & OPT_G) {
 4391     mod = ivreg_via_gmm(list, dset, opt);
 4392     } else {
 4393     mod = tsls(list, dset, opt);
 4394     }
 4395 
 4396     return mod;
 4397 }
 4398 
 4399 /**
 4400  * arbond_model:
 4401  * @list: regression list.
 4402  * @ispec: may contain additional instrument specification.
 4403  * @dset: dataset struct.
 4404  * @opt: may include OPT_D to include time dummies,
 4405  * OPT_H to transform the dependent variable via orthogonal
 4406  * deviations rather than first differences, OPT_T for two-step
 4407  * estimation, OPT_A to force production of asymptotic standard
 4408  * errors rather than finite-sample corrected ones.
 4409  * @prn: printing struct.
 4410  *
 4411  * Produces estimates of a dynamic panel-data model in
 4412  * the manner of Arellano and Bond. See the documentation for
 4413  * the "arbond" command in gretl for the construction of the
 4414  * @list argument and also the syntax of @ispec.
 4415  *
 4416  * Returns: a #MODEL struct, containing the estimates.
 4417  */
 4418 
 4419 MODEL arbond_model (const int *list, const char *ispec,
 4420             const DATASET *dset, gretlopt opt,
 4421             PRN *prn)
 4422 {
 4423     MODEL (*arbond_estimate) (const int *, const char *,
 4424                   const DATASET *, gretlopt, PRN *);
 4425     MODEL mod;
 4426 
 4427     gretl_model_init(&mod, dset);
 4428 
 4429     arbond_estimate = get_plugin_function("arbond_estimate");
 4430     if (arbond_estimate == NULL) {
 4431     mod.errcode = 1;
 4432     return mod;
 4433     }
 4434 
 4435     mod = (*arbond_estimate)(list, ispec, dset, opt, prn);
 4436     set_model_id(&mod, opt);
 4437 
 4438     return mod;
 4439 }
 4440 
 4441 /**
 4442  * dpd_model:
 4443  * @list: regression list.
 4444  * @laglist: list of specific lags of the dependent variable, or
 4445  * NULL.
 4446  * @ispec: may contain additional instrument specification.
 4447  * @dset: dataset struct.
 4448  * @opt: to be hooked up.
 4449  * @prn: printing struct.
 4450  *
 4451  * This is at present a secret function, for testing only.
 4452  *
 4453  * Returns: a #MODEL struct, containing the estimates.
 4454  */
 4455 
 4456 MODEL dpd_model (const int *list, const int *laglist,
 4457          const char *ispec, const DATASET *dset,
 4458          gretlopt opt, PRN *prn)
 4459 {
 4460     MODEL (*dpd_estimate) (const int *, const int *,
 4461                const char *, const DATASET *,
 4462                gretlopt, PRN *);
 4463     MODEL mod;
 4464 
 4465     gretl_model_init(&mod, dset);
 4466 
 4467     dpd_estimate = get_plugin_function("dpd_estimate");
 4468     if (dpd_estimate == NULL) {
 4469     mod.errcode = 1;
 4470     return mod;
 4471     }
 4472 
 4473     mod = (*dpd_estimate)(list, laglist, ispec, dset, opt, prn);
 4474     set_model_id(&mod, opt);
 4475 
 4476     return mod;
 4477 }
 4478 
 4479 /**
 4480  * anova:
 4481  * @list: must contain the response and treatment variables.
 4482  * @dset: dataset struct.
 4483  * @opt: unused at present.
 4484  * @prn: printing struct.
 4485  *
 4486  * Does one-way or two-way Analysis of Variance (prints table and F-test).
 4487  *
 4488  * Returns: 0 on success, non-zero on failure.
 4489 */
 4490 
 4491 int anova (const int *list, const DATASET *dset,
 4492        gretlopt opt, PRN *prn)
 4493 {
 4494     int (*gretl_anova) (const int *, const DATASET *,
 4495             gretlopt, PRN *);
 4496 
 4497     gretl_anova = get_plugin_function("gretl_anova");
 4498     if (gretl_anova == NULL) {
 4499     return 1;
 4500     }
 4501 
 4502     return (*gretl_anova)(list, dset, opt, prn);
 4503 }