"Fossies" - the Fresh Open Source Software Archive

Member "stress-ng-0.09.56/stress-matrix-3d.c" (15 Mar 2019, 21395 Bytes) of package /linux/privat/stress-ng-0.09.56.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "stress-matrix-3d.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.09.55_vs_0.09.56.

    1 /*
    2  * Copyright (C) 2013-2019 Canonical, Ltd.
    3  *
    4  * This program is free software; you can redistribute it and/or
    5  * modify it under the terms of the GNU General Public License
    6  * as published by the Free Software Foundation; either version 2
    7  * of the License, or (at your option) any later version.
    8  *
    9  * This program is distributed in the hope that it will be useful,
   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  * GNU General Public License for more details.
   13  *
   14  * You should have received a copy of the GNU General Public License
   15  * along with this program; if not, write to the Free Software
   16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
   17  *
   18  * This code is a complete clean re-write of the stress tool by
   19  * Colin Ian King <colin.king@canonical.com> and attempts to be
   20  * backwardly compatible with the stress tool by Amos Waterland
   21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
   22  * functionality.
   23  *
   24  */
   25 #include "stress-ng.h"
   26 
   27 #if defined(HAVE_VLA_ARG) &&    \
   28     !defined(__PCC__)
   29 
   30 typedef float   matrix_3d_type_t;
   31 
   32 /*
   33  *  the matrix stress test has different classes of maxtrix stressor
   34  */
   35 typedef void (*stress_matrix_3d_func)(
   36     const size_t n,
   37     matrix_3d_type_t a[RESTRICT n][n][n],
   38     matrix_3d_type_t b[RESTRICT n][n][n],
   39     matrix_3d_type_t r[RESTRICT n][n][n]);
   40 
   41 typedef struct {
   42     const char          *name;      /* human readable form of stressor */
   43     const stress_matrix_3d_func func[2];    /* method functions, x by y by z, z by y by x */
   44 } stress_matrix_3d_method_info_t;
   45 
   46 static const stress_matrix_3d_method_info_t matrix_3d_methods[];
   47 
   48 int stress_set_matrix_3d_size(const char *opt)
   49 {
   50     size_t matrix_3d_size;
   51 
   52     matrix_3d_size = get_uint64(opt);
   53     check_range("matrix-3d-size", matrix_3d_size,
   54         MIN_MATRIX3D_SIZE, MAX_MATRIX3D_SIZE);
   55     return set_setting("matrix-3d-size", TYPE_ID_SIZE_T, &matrix_3d_size);
   56 }
   57 
   58 int stress_set_matrix_3d_zyx(const char *opt)
   59 {
   60     size_t matrix_3d_zyx = 1;
   61 
   62         (void)opt;
   63 
   64         return set_setting("matrix-3d-zyx", TYPE_ID_SIZE_T, &matrix_3d_zyx);
   65 }
   66 
   67 /*
   68  *  stress_matrix_3d_xyz_add()
   69  *  matrix addition
   70  */
   71 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_add(
   72     const size_t n,
   73     matrix_3d_type_t a[RESTRICT n][n][n],
   74     matrix_3d_type_t b[RESTRICT n][n][n],
   75     matrix_3d_type_t r[RESTRICT n][n][n])
   76 {
   77     register size_t i;
   78 
   79     for (i = 0; i < n; i++) {
   80         register size_t j;
   81 
   82         for (j = 0; j < n; j++) {
   83             register size_t k;
   84 
   85             for (k = 0; k < n; k++) {
   86                 r[i][j][k] = a[i][j][k] + b[i][j][k];
   87             }
   88             if (UNLIKELY(!g_keep_stressing_flag))
   89                 return;
   90         }
   91     }
   92 }
   93 
   94 /*
   95  *  stress_matrix_3d_zyx_add()
   96  *  matrix addition
   97  */
   98 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_add(
   99     const size_t n,
  100     matrix_3d_type_t a[RESTRICT n][n][n],
  101     matrix_3d_type_t b[RESTRICT n][n][n],
  102     matrix_3d_type_t r[RESTRICT n][n][n])
  103 {
  104     register size_t k;
  105 
  106     for (k = 0; k < n; k++) {
  107         register size_t j;
  108 
  109         for (j = 0; j < n; j++) {
  110             register size_t i;
  111 
  112             for (i = 0; i < n; i++) {
  113                 r[i][j][k] = a[i][j][k] + b[i][j][k];
  114             }
  115             if (UNLIKELY(!g_keep_stressing_flag))
  116                 return;
  117         }
  118     }
  119 }
  120 
  121 /*
  122  *  stress_matrix_3d_xyz_sub()
  123  *  matrix subtraction
  124  */
  125 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_sub(
  126     const size_t n,
  127     matrix_3d_type_t a[RESTRICT n][n][n],
  128     matrix_3d_type_t b[RESTRICT n][n][n],
  129     matrix_3d_type_t r[RESTRICT n][n][n])
  130 {
  131     register size_t i;
  132 
  133     for (i = 0; i < n; i++) {
  134         register size_t j;
  135 
  136         for (j = 0; j < n; j++) {
  137             register size_t k;
  138 
  139             for (k = 0; k < n; k++) {
  140                 r[i][j][k] = a[i][j][k] - b[i][j][k];
  141             }
  142             if (UNLIKELY(!g_keep_stressing_flag))
  143                 return;
  144         }
  145     }
  146 }
  147 
  148 /*
  149  *  stress_matrix_3d_zyx_add()
  150  *  matrix subtraction
  151  */
  152 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_sub(
  153     const size_t n,
  154     matrix_3d_type_t a[RESTRICT n][n][n],
  155     matrix_3d_type_t b[RESTRICT n][n][n],
  156     matrix_3d_type_t r[RESTRICT n][n][n])
  157 {
  158     register size_t k;
  159 
  160     for (k = 0; k < n; k++) {
  161         register size_t j;
  162 
  163         for (j = 0; j < n; j++) {
  164             register size_t i;
  165 
  166             for (i = 0; i < n; i++) {
  167                 r[i][j][k] = a[i][j][k] + b[i][j][k];
  168             }
  169             if (UNLIKELY(!g_keep_stressing_flag))
  170                 return;
  171         }
  172     }
  173 }
  174 
  175 /*
  176  *  stress_matrix_3d_trans()
  177  *  matrix transpose
  178  */
  179 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_trans(
  180     const size_t n,
  181     matrix_3d_type_t a[RESTRICT n][n][n],
  182     matrix_3d_type_t b[RESTRICT n][n][n],   /* Ignored */
  183     matrix_3d_type_t r[RESTRICT n][n][n])
  184 {
  185     register size_t i;
  186 
  187     (void)b;
  188 
  189     for (i = 0; i < n; i++) {
  190         register size_t j;
  191 
  192         for (j = 0; j < n; j++) {
  193             register size_t k;
  194 
  195             for (k = 0; k < n; k++) {
  196                 r[i][j][k] = a[k][j][i];
  197             }
  198             if (UNLIKELY(!g_keep_stressing_flag))
  199                 return;
  200         }
  201     }
  202 }
  203 
  204 /*
  205  *  stress_matrix_3d_trans()
  206  *  matrix transpose
  207  */
  208 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_trans(
  209     const size_t n,
  210     matrix_3d_type_t a[RESTRICT n][n][n],
  211     matrix_3d_type_t b[RESTRICT n][n][n],   /* Ignored */
  212     matrix_3d_type_t r[RESTRICT n][n][n])
  213 {
  214     register size_t k;
  215 
  216     (void)b;
  217 
  218     for (k = 0; k < n; k++) {
  219         register size_t j;
  220 
  221         for (j = 0; j < n; j++) {
  222             register size_t i;
  223     
  224             for (i = 0; i < n; i++) {
  225                 r[i][j][k] = a[k][j][i];
  226             }
  227             if (UNLIKELY(!g_keep_stressing_flag))
  228                 return;
  229         }
  230     }
  231 }
  232 
  233 /*
  234  *  stress_matrix_3d_mult()
  235  *  matrix scalar multiply
  236  */
  237 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_mult(
  238     const size_t n,
  239     matrix_3d_type_t a[RESTRICT n][n][n],
  240     matrix_3d_type_t b[RESTRICT n][n][n],
  241     matrix_3d_type_t r[RESTRICT n][n][n])
  242 {
  243     register size_t i;
  244 
  245     (void)b;
  246     matrix_3d_type_t v = b[0][0][0];
  247 
  248     for (i = 0; i < n; i++) {
  249         register size_t j;
  250 
  251         for (j = 0; j < n; j++) {
  252             register size_t k;
  253 
  254             for (k = 0; k < n; k++) {
  255                 r[i][j][k] = v * a[i][j][k];
  256             }
  257             if (UNLIKELY(!g_keep_stressing_flag))
  258                 return;
  259         }
  260     }
  261 }
  262 
  263 /*
  264  *  stress_matrix_3d_mult()
  265  *  matrix scalar multiply
  266  */
  267 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_mult(
  268     const size_t n,
  269     matrix_3d_type_t a[RESTRICT n][n][n],
  270     matrix_3d_type_t b[RESTRICT n][n][n],
  271     matrix_3d_type_t r[RESTRICT n][n][n])
  272 {
  273     register size_t k;
  274 
  275     (void)b;
  276     matrix_3d_type_t v = b[0][0][0];
  277 
  278     for (k = 0; k < n; k++) {
  279         register size_t j;
  280 
  281         for (j = 0; j < n; j++) {
  282             register size_t i;
  283 
  284             for (i = 0; i < n; i++) {
  285                 r[i][j][k] = v * a[i][j][k];
  286             }
  287             if (UNLIKELY(!g_keep_stressing_flag))
  288                 return;
  289         }
  290     }
  291 }
  292 
  293 /*
  294  *  stress_matrix_3d_div()
  295  *  matrix scalar divide
  296  */
  297 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_div(
  298     const size_t n,
  299     matrix_3d_type_t a[RESTRICT n][n][n],
  300     matrix_3d_type_t b[RESTRICT n][n][n],
  301     matrix_3d_type_t r[RESTRICT n][n][n])
  302 {
  303     register size_t i;
  304 
  305     (void)b;
  306     matrix_3d_type_t v = b[0][0][0];
  307 
  308     for (i = 0; i < n; i++) {
  309         register size_t j;
  310 
  311         for (j = 0; j < n; j++) {
  312             register size_t k;
  313 
  314             for (k = 0; k < n; k++) {
  315                 r[i][j][k] = a[i][j][k] / v;
  316             }
  317             if (UNLIKELY(!g_keep_stressing_flag))
  318                 return;
  319         }
  320     }
  321 }
  322 
  323 /*
  324  *  stress_matrix_3d_div()
  325  *  matrix scalar divide
  326  */
  327 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_div(
  328     const size_t n,
  329     matrix_3d_type_t a[RESTRICT n][n][n],
  330     matrix_3d_type_t b[RESTRICT n][n][n],
  331     matrix_3d_type_t r[RESTRICT n][n][n])
  332 {
  333     register size_t k;
  334 
  335     (void)b;
  336     matrix_3d_type_t v = b[0][0][0];
  337 
  338     for (k = 0; k < n; k++) {
  339         register size_t j;
  340 
  341         for (j = 0; j < n; j++) {
  342             register size_t i;
  343 
  344             for (i = 0; i < n; i++) {
  345                 r[i][j][k] = a[i][j][k] / v;
  346             }
  347             if (UNLIKELY(!g_keep_stressing_flag))
  348                 return;
  349         }
  350     }
  351 }
  352 
  353 /*
  354  *  stress_matrix_3d_hadamard()
  355  *  matrix hadamard product
  356  *  (A o B)ij = AijBij
  357  */
  358 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_hadamard(
  359     const size_t n,
  360     matrix_3d_type_t a[RESTRICT n][n][n],
  361     matrix_3d_type_t b[RESTRICT n][n][n],
  362     matrix_3d_type_t r[RESTRICT n][n][n])
  363 {
  364     register size_t i;
  365 
  366     for (i = 0; i < n; i++) {
  367         register size_t j;
  368 
  369         for (j = 0; j < n; j++) {
  370             register size_t k;
  371 
  372             for (k = 0; k < n; k++) {
  373                 r[i][j][k] = a[i][j][k] * b[i][j][k];
  374             }
  375             if (UNLIKELY(!g_keep_stressing_flag))
  376                 return;
  377         }
  378     }
  379 }
  380 
  381 /*
  382  *  stress_matrix_3d_hadamard()
  383  *  matrix hadamard product
  384  *  (A o B)ij = AijBij
  385  */
  386 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_hadamard(
  387     const size_t n,
  388     matrix_3d_type_t a[RESTRICT n][n][n],
  389     matrix_3d_type_t b[RESTRICT n][n][n],
  390     matrix_3d_type_t r[RESTRICT n][n][n])
  391 {
  392     register size_t k;
  393 
  394     for (k = 0; k < n; k++) {
  395         register size_t j;
  396 
  397         for (j = 0; j < n; j++) {
  398             register size_t i;
  399 
  400             for (i = 0; i < n; i++) {
  401                 r[i][j][k] = a[i][j][k] * b[i][j][k];
  402             }
  403             if (UNLIKELY(!g_keep_stressing_flag))
  404                 return;
  405         }
  406     }
  407 }
  408 
  409 /*
  410  *  stress_matrix_3d_frobenius()
  411  *  matrix frobenius product
  412  *  A : B = Sum(AijBij)
  413  */
  414 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_frobenius(
  415     const size_t n,
  416     matrix_3d_type_t a[RESTRICT n][n][n],
  417     matrix_3d_type_t b[RESTRICT n][n][n],
  418     matrix_3d_type_t r[RESTRICT n][n][n])
  419 {
  420     register size_t i;
  421     matrix_3d_type_t sum = 0.0;
  422 
  423     (void)r;
  424 
  425     for (i = 0; i < n; i++) {
  426         register size_t j;
  427 
  428         for (j = 0; j < n; j++) {
  429             register size_t k;
  430 
  431             for (k = 0; k < n; k++) {
  432                 sum += a[i][j][k] * b[i][j][k];
  433             }
  434             if (UNLIKELY(!g_keep_stressing_flag))
  435                 return;
  436         }
  437     }
  438     double_put(sum);
  439 }
  440 
  441 /*
  442  *  stress_matrix_3d_frobenius()
  443  *  matrix frobenius product
  444  *  A : B = Sum(AijBij)
  445  */
  446 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_frobenius(
  447     const size_t n,
  448     matrix_3d_type_t a[RESTRICT n][n][n],
  449     matrix_3d_type_t b[RESTRICT n][n][n],
  450     matrix_3d_type_t r[RESTRICT n][n][n])
  451 {
  452     register size_t k;
  453     matrix_3d_type_t sum = 0.0;
  454 
  455     (void)r;
  456 
  457     for (k = 0; k < n; k++) {
  458         register size_t j;
  459 
  460         for (j = 0; j < n; j++) {
  461             register size_t i;
  462 
  463             for (i = 0; i < n; i++) {
  464                 sum += a[i][j][k] * b[i][j][k];
  465             }
  466             if (UNLIKELY(!g_keep_stressing_flag))
  467                 return;
  468         }
  469     }
  470     double_put(sum);
  471 }
  472 
  473 /*
  474  *  stress_matrix_3d_copy()
  475  *  naive matrix copy, r = a
  476  */
  477 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_copy(
  478     const size_t n,
  479     matrix_3d_type_t a[RESTRICT n][n][n],
  480     matrix_3d_type_t b[RESTRICT n][n][n],
  481     matrix_3d_type_t r[RESTRICT n][n][n])
  482 {
  483     register size_t i;
  484 
  485     (void)b;
  486 
  487     for (i = 0; i < n; i++) {
  488         register size_t j;
  489 
  490         for (j = 0; j < n; j++) {
  491             register size_t k;
  492 
  493             for (k = 0; k < n; k++) {
  494                 r[i][j][k] = a[i][j][k];
  495             }
  496             if (UNLIKELY(!g_keep_stressing_flag))
  497                 return;
  498         }
  499     }
  500 }
  501 
  502 /*
  503  *  stress_matrix_3d_copy()
  504  *  naive matrix copy, r = a
  505  */
  506 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_copy(
  507     const size_t n,
  508     matrix_3d_type_t a[RESTRICT n][n][n],
  509     matrix_3d_type_t b[RESTRICT n][n][n],
  510     matrix_3d_type_t r[RESTRICT n][n][n])
  511 {
  512     register size_t k;
  513 
  514     (void)b;
  515 
  516     for (k = 0; k < n; k++) {
  517         register size_t j;
  518 
  519         for (j = 0; j < n; j++) {
  520             register size_t i;
  521 
  522             for (i = 0; i < n; i++) {
  523                 r[i][j][k] = a[i][j][k];
  524             }
  525             if (UNLIKELY(!g_keep_stressing_flag))
  526                 return;
  527         }
  528     }
  529 }
  530 
  531 /*
  532  *  stress_matrix_3d_mean(void)
  533  *  arithmetic mean
  534  */
  535 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_mean(
  536     const size_t n,
  537     matrix_3d_type_t a[RESTRICT n][n][n],
  538     matrix_3d_type_t b[RESTRICT n][n][n],
  539     matrix_3d_type_t r[RESTRICT n][n][n])
  540 {
  541     register size_t i;
  542 
  543     for (i = 0; i < n; i++) {
  544         register size_t j;
  545 
  546         for (j = 0; j < n; j++) {
  547             register size_t k;
  548 
  549             for (k = 0; k < n; k++) {
  550                 r[i][j][k] = (a[i][j][k] + b[i][j][k]) / 2.0;
  551             }
  552             if (UNLIKELY(!g_keep_stressing_flag))
  553                 return;
  554         }
  555     }
  556 }
  557 
  558 /*
  559  *  stress_matrix_3d_mean(void)
  560  *  arithmetic mean
  561  */
  562 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_mean(
  563     const size_t n,
  564     matrix_3d_type_t a[RESTRICT n][n][n],
  565     matrix_3d_type_t b[RESTRICT n][n][n],
  566     matrix_3d_type_t r[RESTRICT n][n][n])
  567 {
  568     register size_t k;
  569 
  570     for (k = 0; k < n; k++) {
  571         register size_t j;
  572 
  573         for (j = 0; j < n; j++) {
  574             register size_t i;
  575 
  576             for (i = 0; i < n; i++) {
  577                 r[i][j][k] = (a[i][j][k] + b[i][j][k]) / 2.0;
  578             }
  579             if (UNLIKELY(!g_keep_stressing_flag))
  580                 return;
  581         }
  582     }
  583 }
  584 
  585 /*
  586  *  stress_matrix_3d_zero()
  587  *  simply zero the result matrix
  588  */
  589 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_zero(
  590     const size_t n,
  591     matrix_3d_type_t a[RESTRICT n][n][n],
  592     matrix_3d_type_t b[RESTRICT n][n][n],
  593     matrix_3d_type_t r[RESTRICT n][n][n])
  594 {
  595     register size_t i;
  596 
  597     (void)a;
  598     (void)b;
  599 
  600     for (i = 0; i < n; i++) {
  601         register size_t j;
  602 
  603         for (j = 0; j < n; j++) {
  604             register size_t k;
  605 
  606             for (k = 0; k < n; k++) {
  607                 r[i][j][k] = 0.0;
  608             }
  609             if (UNLIKELY(!g_keep_stressing_flag))
  610                 return;
  611         }
  612     }
  613 }
  614 
  615 /*
  616  *  stress_matrix_3d_zero()
  617  *  simply zero the result matrix
  618  */
  619 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_zero(
  620     const size_t n,
  621     matrix_3d_type_t a[RESTRICT n][n][n],
  622     matrix_3d_type_t b[RESTRICT n][n][n],
  623     matrix_3d_type_t r[RESTRICT n][n][n])
  624 {
  625     register size_t k;
  626 
  627     (void)a;
  628     (void)b;
  629 
  630     for (k = 0; k < n; k++) {
  631         register size_t j;
  632 
  633         for (j = 0; j < n; j++) {
  634             register size_t i;
  635 
  636             for (i = 0; i < n; i++) {
  637                 r[i][j][k] = 0.0;
  638             }
  639             if (UNLIKELY(!g_keep_stressing_flag))
  640                 return;
  641         }
  642     }
  643 }
  644 
  645 /*
  646  *  stress_matrix_3d_negate()
  647  *  simply negate the matrix a and put result in r
  648  */
  649 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_negate(
  650     const size_t n,
  651     matrix_3d_type_t a[RESTRICT n][n][n],
  652     matrix_3d_type_t b[RESTRICT n][n][n],
  653     matrix_3d_type_t r[RESTRICT n][n][n])
  654 {
  655     register size_t i;
  656 
  657     (void)a;
  658     (void)b;
  659 
  660     for (i = 0; i < n; i++) {
  661         register size_t j;
  662 
  663         for (j = 0; j < n; j++) {
  664             register size_t k;
  665 
  666             for (k = 0; k < n; k++) {
  667                 r[i][j][k] = -a[i][j][k];
  668             }
  669             if (UNLIKELY(!g_keep_stressing_flag))
  670                 return;
  671         }
  672     }
  673 }
  674 
  675 /*
  676  *  stress_matrix_3d_negate()
  677  *  simply negate the matrix a and put result in r
  678  */
  679 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_negate(
  680     const size_t n,
  681     matrix_3d_type_t a[RESTRICT n][n][n],
  682     matrix_3d_type_t b[RESTRICT n][n][n],
  683     matrix_3d_type_t r[RESTRICT n][n][n])
  684 {
  685     register size_t k;
  686 
  687     (void)a;
  688     (void)b;
  689 
  690     for (k = 0; k < n; k++) {
  691         register size_t j;
  692 
  693         for (j = 0; j < n; j++) {
  694             register size_t i;
  695 
  696             for (i = 0; i < n; i++) {
  697                 r[i][j][k] = -a[i][j][k];
  698             }
  699             if (UNLIKELY(!g_keep_stressing_flag))
  700                 return;
  701         }
  702     }
  703 }
  704 
  705 /*
  706  *  stress_matrix_3d_identity()
  707  *  set r to the identity matrix
  708  */
  709 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_xyz_identity(
  710     const size_t n,
  711     matrix_3d_type_t a[RESTRICT n][n][n],
  712     matrix_3d_type_t b[RESTRICT n][n][n],
  713     matrix_3d_type_t r[RESTRICT n][n][n])
  714 {
  715     register size_t i;
  716 
  717     (void)a;
  718     (void)b;
  719 
  720     for (i = 0; i < n; i++) {
  721         register size_t j;
  722 
  723         for (j = 0; j < n; j++) {
  724             register size_t k;
  725 
  726             for (k = 0; k < n; k++) {
  727                 r[i][j][k] = ((i == j) && (j == k)) ? 1.0 : 0.0;
  728             }
  729             if (UNLIKELY(!g_keep_stressing_flag))
  730                 return;
  731         }
  732     }
  733 }
  734 
  735 /*
  736  *  stress_matrix_3d_identity()
  737  *  set r to the identity matrix
  738  */
  739 static void OPTIMIZE3 TARGET_CLONES stress_matrix_3d_zyx_identity(
  740     const size_t n,
  741     matrix_3d_type_t a[RESTRICT n][n][n],
  742     matrix_3d_type_t b[RESTRICT n][n][n],
  743     matrix_3d_type_t r[RESTRICT n][n][n])
  744 {
  745     register size_t k;
  746 
  747     (void)a;
  748     (void)b;
  749 
  750     for (k = 0; k < n; k++) {
  751         register size_t j;
  752 
  753         for (j = 0; j < n; j++) {
  754             register size_t i;
  755 
  756             for (i = 0; i < n; i++) {
  757                 r[i][j][k] = ((i == j) && (j == k)) ? 1.0 : 0.0;
  758             }
  759             if (UNLIKELY(!g_keep_stressing_flag))
  760                 return;
  761         }
  762     }
  763 }
  764 
  765 /*
  766  *  stress_matrix_3d_all()
  767  *  iterate over all cpu stressors
  768  */
  769 static void OPTIMIZE3 stress_matrix_3d_xyz_all(
  770     const size_t n,
  771     matrix_3d_type_t a[RESTRICT n][n][n],
  772     matrix_3d_type_t b[RESTRICT n][n][n],
  773     matrix_3d_type_t r[RESTRICT n][n][n])
  774 {
  775     static int i = 1;   /* Skip over stress_matrix_3d_all */
  776 
  777     matrix_3d_methods[i++].func[0](n, a, b, r);
  778     if (!matrix_3d_methods[i].name)
  779         i = 1;
  780 }
  781 
  782 /*
  783  *  stress_matrix_3d_all()
  784  *  iterate over all cpu stressors
  785  */
  786 static void OPTIMIZE3 stress_matrix_3d_zyx_all(
  787     const size_t n,
  788     matrix_3d_type_t a[RESTRICT n][n][n],
  789     matrix_3d_type_t b[RESTRICT n][n][n],
  790     matrix_3d_type_t r[RESTRICT n][n][n])
  791 {
  792     static int i = 1;   /* Skip over stress_matrix_3d_all */
  793 
  794     matrix_3d_methods[i++].func[1](n, a, b, r);
  795     if (!matrix_3d_methods[i].name)
  796         i = 1;
  797 }
  798 
  799 
  800 /*
  801  * Table of cpu stress methods, ordered x by y by z and z by y by x
  802  */
  803 static const stress_matrix_3d_method_info_t matrix_3d_methods[] = {
  804     { "all",        { stress_matrix_3d_xyz_all, stress_matrix_3d_zyx_all } },/* Special "all" test */
  805 
  806     { "add",        { stress_matrix_3d_xyz_add, stress_matrix_3d_zyx_add } },
  807     { "copy",       { stress_matrix_3d_xyz_copy,    stress_matrix_3d_zyx_copy } },
  808     { "div",        { stress_matrix_3d_xyz_div, stress_matrix_3d_zyx_div } },
  809     { "frobenius",      { stress_matrix_3d_xyz_frobenius,stress_matrix_3d_zyx_frobenius } },
  810     { "hadamard",       { stress_matrix_3d_xyz_hadamard,    stress_matrix_3d_zyx_hadamard } },
  811     { "identity",       { stress_matrix_3d_xyz_identity,    stress_matrix_3d_zyx_identity } },
  812     { "mean",       { stress_matrix_3d_xyz_mean,    stress_matrix_3d_zyx_mean } },
  813     { "mult",       { stress_matrix_3d_xyz_mult,    stress_matrix_3d_zyx_mult } },
  814     { "negate",     { stress_matrix_3d_xyz_negate,  stress_matrix_3d_zyx_negate } },
  815     { "sub",        { stress_matrix_3d_xyz_sub, stress_matrix_3d_zyx_sub } },
  816     { "trans",      { stress_matrix_3d_xyz_trans,   stress_matrix_3d_zyx_trans } },
  817     { "zero",       { stress_matrix_3d_xyz_zero,    stress_matrix_3d_zyx_zero } },
  818     { NULL,         { NULL, NULL } }
  819 };
  820 
  821 static const stress_matrix_3d_method_info_t *stress_get_matrix_3d_method(
  822     const char *name)
  823 {
  824     const stress_matrix_3d_method_info_t *info;
  825 
  826     for (info = matrix_3d_methods; info->name; info++) {
  827         if (!strcmp(info->name, name)) {
  828             set_setting("matrix-3d-method", TYPE_ID_STR, name);
  829             return info;
  830         }
  831     }
  832     return NULL;
  833 }
  834 
  835 static void stress_matrix_3d_method_error(void)
  836 {
  837     const stress_matrix_3d_method_info_t *info;
  838 
  839     (void)fprintf(stderr, "matrix-3d-method must be one of:");
  840     for (info = matrix_3d_methods; info->name; info++)
  841         (void)fprintf(stderr, " %s", info->name);
  842     (void)fprintf(stderr, "\n");
  843 }
  844 
  845 /*
  846  *  stress_set_matrix_3d_method()
  847  *  set the default matrix stress method
  848  */
  849 int stress_set_matrix_3d_method(const char *name)
  850 {
  851     const stress_matrix_3d_method_info_t *info;
  852 
  853     info = stress_get_matrix_3d_method(name);
  854     if (info) {
  855         set_setting("matrix-3d-method", TYPE_ID_STR, name);
  856         return 0;
  857     }
  858     stress_matrix_3d_method_error();
  859 
  860     return -1;
  861 }
  862 
  863 static inline size_t round_up(size_t page_size, size_t n)
  864 {
  865     page_size = (page_size == 0) ? 4096 : page_size;
  866 
  867     return (n + page_size - 1) & (~(page_size -1));
  868 }
  869 
  870 static inline int stress_matrix_3d_exercise(
  871     const args_t *args,
  872     const stress_matrix_3d_func func,
  873     const size_t n)
  874 {
  875     int ret = EXIT_NO_RESOURCE;
  876     typedef matrix_3d_type_t (*matrix_3d_ptr_t)[n][n];
  877     size_t matrix_3d_size = round_up(args->page_size, (sizeof(matrix_3d_type_t) * n * n * n));
  878 
  879     matrix_3d_ptr_t a, b = NULL, r = NULL;
  880     register size_t i;
  881     const matrix_3d_type_t v = 65535 / (matrix_3d_type_t)((uint64_t)~0);
  882     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
  883 #if defined(MAP_POPULATE)
  884     flags |= MAP_POPULATE;
  885 #endif
  886 
  887     a = (matrix_3d_ptr_t)mmap(NULL, matrix_3d_size,
  888         PROT_READ | PROT_WRITE, flags, -1, 0);
  889     if (a == MAP_FAILED) {
  890         pr_fail("matrix allocation");
  891         goto tidy_ret;
  892     }
  893     b = (matrix_3d_ptr_t)mmap(NULL, matrix_3d_size,
  894         PROT_READ | PROT_WRITE, flags, -1, 0);
  895     if (b == MAP_FAILED) {
  896         pr_fail("matrix allocation");
  897         goto tidy_a;
  898     }
  899     r = (matrix_3d_ptr_t)mmap(NULL, matrix_3d_size,
  900         PROT_READ | PROT_WRITE, flags, -1, 0);
  901     if (r == MAP_FAILED) {
  902         pr_fail("matrix allocation");
  903         goto tidy_b;
  904     }
  905 
  906     /*
  907      *  Initialise matrices
  908      */
  909     for (i = 0; i < n; i++) {
  910         register size_t j;
  911 
  912         for (j = 0; j < n; j++) {
  913             register size_t k;
  914 
  915             for (k = 0; k < n; k++) {
  916                 a[i][j][k] = (matrix_3d_type_t)mwc64() * v;
  917                 b[i][j][k] = (matrix_3d_type_t)mwc64() * v;
  918                 r[i][j][k] = 0.0;
  919             }
  920         }
  921     }
  922 
  923     /*
  924      * Normal use case, 100% load, simple spinning on CPU
  925      */
  926     do {
  927         (void)func(n, a, b, r);
  928         inc_counter(args);
  929     } while (keep_stressing());
  930 
  931     ret = EXIT_SUCCESS;
  932 
  933     munmap((void *)r, matrix_3d_size);
  934 tidy_b:
  935     munmap((void *)b, matrix_3d_size);
  936 tidy_a:
  937     munmap((void *)a, matrix_3d_size);
  938 tidy_ret:
  939     return ret;
  940 }
  941 
  942 /*
  943  *  stress_matrix()
  944  *  stress CPU by doing floating point math ops
  945  */
  946 static int stress_matrix(const args_t *args)
  947 {
  948     char *matrix_3d_method_name;
  949     const stress_matrix_3d_method_info_t *matrix_3d_method;
  950     stress_matrix_3d_func func;
  951     size_t matrix_3d_size = 128;
  952     size_t matrix_3d_yx = 0;
  953 
  954     (void)get_setting("matrix-3d-method", &matrix_3d_method_name);
  955     (void)get_setting("matrix-3d-zyx", &matrix_3d_yx);
  956 
  957     matrix_3d_method = stress_get_matrix_3d_method(matrix_3d_method_name);
  958     if (!matrix_3d_method) {
  959         /* Should *never* get here... */
  960         stress_matrix_3d_method_error();
  961         return EXIT_FAILURE;
  962     }
  963 
  964     func = matrix_3d_method->func[matrix_3d_yx];
  965     if (args->instance == 0)
  966         pr_dbg("%s using method '%s' (%s)\n", args->name, matrix_3d_method->name,
  967             matrix_3d_yx ? "z by y by x" : "x by y by z");
  968 
  969     if (!get_setting("matrix-3d-size", &matrix_3d_size)) {
  970         if (g_opt_flags & OPT_FLAGS_MAXIMIZE)
  971             matrix_3d_size = MAX_MATRIX_SIZE;
  972         if (g_opt_flags & OPT_FLAGS_MINIMIZE)
  973             matrix_3d_size = MIN_MATRIX_SIZE;
  974     }
  975 
  976     return stress_matrix_3d_exercise(args, func, matrix_3d_size);
  977 }
  978 
  979 static void stress_matrix_3d_set_default(void)
  980 {
  981     stress_set_matrix_3d_method("all");
  982 }
  983 
  984 stressor_info_t stress_matrix_3d_info = {
  985     .stressor = stress_matrix,
  986     .set_default = stress_matrix_3d_set_default,
  987     .class = CLASS_CPU | CLASS_CPU_CACHE | CLASS_MEMORY
  988 };
  989 #else
  990 stressor_info_t stress_matrix_3d_info = {
  991     .stressor = stress_not_implemented,
  992     .class = CLASS_CPU | CLASS_CPU_CACHE | CLASS_MEMORY
  993 };
  994 #endif