"Fossies" - the Fresh Open Source Software Archive

Member "stress-ng-0.09.56/stress-matrix.c" (15 Mar 2019, 20563 Bytes) of package /linux/privat/stress-ng-0.09.56.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "stress-matrix.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 0.09.54_vs_0.09.55.

    1 /*
    2  * Copyright (C) 2013-2019 Canonical, Ltd.
    3  *
    4  * This program is free software; you can redistribute it and/or
    5  * modify it under the terms of the GNU General Public License
    6  * as published by the Free Software Foundation; either version 2
    7  * of the License, or (at your option) any later version.
    8  *
    9  * This program is distributed in the hope that it will be useful,
   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  * GNU General Public License for more details.
   13  *
   14  * You should have received a copy of the GNU General Public License
   15  * along with this program; if not, write to the Free Software
   16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
   17  *
   18  * This code is a complete clean re-write of the stress tool by
   19  * Colin Ian King <colin.king@canonical.com> and attempts to be
   20  * backwardly compatible with the stress tool by Amos Waterland
   21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
   22  * functionality.
   23  *
   24  */
   25 #include "stress-ng.h"
   26 
   27 #if defined(HAVE_VLA_ARG)
   28 
   29 typedef float   matrix_type_t;
   30 
   31 /*
   32  *  the matrix stress test has different classes of maxtrix stressor
   33  */
   34 typedef void (*stress_matrix_func)(
   35     const size_t n,
   36     matrix_type_t a[RESTRICT n][n],
   37     matrix_type_t b[RESTRICT n][n],
   38     matrix_type_t r[RESTRICT n][n]);
   39 
   40 typedef struct {
   41     const char          *name;      /* human readable form of stressor */
   42     const stress_matrix_func    func[2];    /* method functions, x by y, y by x */
   43 } stress_matrix_method_info_t;
   44 
   45 static const stress_matrix_method_info_t matrix_methods[];
   46 
   47 int stress_set_matrix_size(const char *opt)
   48 {
   49     size_t matrix_size;
   50 
   51     matrix_size = get_uint64(opt);
   52     check_range("matrix-size", matrix_size,
   53         MIN_MATRIX_SIZE, MAX_MATRIX_SIZE);
   54     return set_setting("matrix-size", TYPE_ID_SIZE_T, &matrix_size);
   55 }
   56 
   57 int stress_set_matrix_yx(const char *opt)
   58 {
   59     size_t matrix_yx = 1;
   60 
   61     (void)opt;
   62 
   63     return set_setting("matrix-yx", TYPE_ID_SIZE_T, &matrix_yx);
   64 }
   65 
   66 /*
   67  *  stress_matrix_xy_prod()
   68  *  matrix product
   69  */
   70 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_prod(
   71     const size_t n,
   72     matrix_type_t a[RESTRICT n][n],
   73     matrix_type_t b[RESTRICT n][n],
   74     matrix_type_t r[RESTRICT n][n])
   75 {
   76     size_t i;
   77 
   78     for (i = 0; i < n; i++) {
   79         register size_t j;
   80 
   81         for (j = 0; j < n; j++) {
   82             register size_t k;
   83 
   84             for (k = 0; k < n; k++) {
   85                 r[i][j] += a[i][k] * b[k][j];
   86             }
   87             if (!g_keep_stressing_flag)
   88                 return;
   89         }
   90     }
   91 }
   92 
   93 /*
   94  *  stress_matrix_yx_prod()
   95  *  matrix product
   96  */
   97 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_prod(
   98     const size_t n,
   99     matrix_type_t a[RESTRICT n][n],
  100     matrix_type_t b[RESTRICT n][n],
  101     matrix_type_t r[RESTRICT n][n])
  102 {
  103     size_t j;
  104 
  105     for (j = 0; j < n; j++) {
  106         register size_t i;
  107 
  108         for (i = 0; i < n; i++) {
  109             register size_t k;
  110 
  111             for (k = 0; k < n; k++) {
  112                 r[i][j] += a[i][k] * b[k][j];
  113             }
  114             if (UNLIKELY(!g_keep_stressing_flag))
  115                 return;
  116         }
  117     }
  118 }
  119 
  120 /*
  121  *  stress_matrix_xy_add()
  122  *  matrix addition
  123  */
  124 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_add(
  125     const size_t n,
  126     matrix_type_t a[RESTRICT n][n],
  127     matrix_type_t b[RESTRICT n][n],
  128     matrix_type_t r[RESTRICT n][n])
  129 {
  130     register size_t i;
  131 
  132     for (i = 0; i < n; i++) {
  133         register size_t j;
  134 
  135         for (j = 0; j < n; j++) {
  136             r[i][j] = a[i][j] + b[i][j];
  137         }
  138         if (UNLIKELY(!g_keep_stressing_flag))
  139             return;
  140     }
  141 }
  142 
  143 /*
  144  *  stress_matrix_yx_add()
  145  *  matrix addition
  146  */
  147 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_add(
  148     const size_t n,
  149     matrix_type_t a[RESTRICT n][n],
  150     matrix_type_t b[RESTRICT n][n],
  151     matrix_type_t r[RESTRICT n][n])
  152 {
  153     register size_t j;
  154 
  155     for (j = 0; j < n; j++) {
  156         register size_t i;
  157 
  158         for (i = 0; i < n; i++) {
  159             r[i][j] = a[i][j] + b[i][j];
  160         }
  161         if (UNLIKELY(!g_keep_stressing_flag))
  162             return;
  163     }
  164 }
  165 
  166 /*
  167  *  stress_matrix_xy_sub()
  168  *  matrix subtraction
  169  */
  170 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_sub(
  171     const size_t n,
  172     matrix_type_t a[RESTRICT n][n],
  173     matrix_type_t b[RESTRICT n][n],
  174     matrix_type_t r[RESTRICT n][n])
  175 {
  176     register size_t i;
  177 
  178     for (i = 0; i < n; i++) {
  179         register size_t j;
  180 
  181         for (j = 0; j < n; j++) {
  182             r[i][j] = a[i][j] - b[i][j];
  183         }
  184         if (UNLIKELY(!g_keep_stressing_flag))
  185             return;
  186     }
  187 }
  188 
  189 /*
  190  *  stress_matrix_xy_sub()
  191  *  matrix subtraction
  192  */
  193 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_sub(
  194     const size_t n,
  195     matrix_type_t a[RESTRICT n][n],
  196     matrix_type_t b[RESTRICT n][n],
  197     matrix_type_t r[RESTRICT n][n])
  198 {
  199     register size_t j;
  200 
  201     for (j = 0; j < n; j++) {
  202 
  203         register size_t i;
  204         for (i = 0; i < n; i++) {
  205             r[i][j] = a[i][j] - b[i][j];
  206         }
  207         if (UNLIKELY(!g_keep_stressing_flag))
  208             return;
  209     }
  210 }
  211 
  212 /*
  213  *  stress_matrix_trans()
  214  *  matrix transpose
  215  */
  216 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_trans(
  217     const size_t n,
  218     matrix_type_t a[RESTRICT n][n],
  219     matrix_type_t b[RESTRICT n][n], /* Ignored */
  220     matrix_type_t r[RESTRICT n][n])
  221 {
  222     register size_t i;
  223 
  224     (void)b;
  225 
  226     for (i = 0; i < n; i++) {
  227         register size_t j;
  228 
  229         for (j = 0; j < n; j++) {
  230             r[i][j] = a[j][i];
  231         }
  232         if (UNLIKELY(!g_keep_stressing_flag))
  233             return;
  234     }
  235 }
  236 
  237 /*
  238  *  stress_matrix_trans()
  239  *  matrix transpose
  240  */
  241 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_trans(
  242     const size_t n,
  243     matrix_type_t a[RESTRICT n][n],
  244     matrix_type_t b[RESTRICT n][n], /* Ignored */
  245     matrix_type_t r[RESTRICT n][n])
  246 {
  247     register size_t j;
  248 
  249     (void)b;
  250 
  251     for (j = 0; j < n; j++) {
  252         register size_t i;
  253 
  254         for (i = 0; i < n; i++) {
  255             r[i][j] = a[j][i];
  256         }
  257         if (UNLIKELY(!g_keep_stressing_flag))
  258             return;
  259     }
  260 }
  261 
  262 /*
  263  *  stress_matrix_mult()
  264  *  matrix scalar multiply
  265  */
  266 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_mult(
  267     const size_t n,
  268     matrix_type_t a[RESTRICT n][n],
  269     matrix_type_t b[RESTRICT n][n],
  270     matrix_type_t r[RESTRICT n][n])
  271 {
  272     register size_t i;
  273 
  274     (void)b;
  275     matrix_type_t v = b[0][0];
  276 
  277     for (i = 0; i < n; i++) {
  278         register size_t j;
  279 
  280         for (j = 0; j < n; j++) {
  281             r[i][j] = v * a[i][j];
  282         }
  283         if (UNLIKELY(!g_keep_stressing_flag))
  284             return;
  285     }
  286 }
  287 
  288 /*
  289  *  stress_matrix_mult()
  290  *  matrix scalar multiply
  291  */
  292 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_mult(
  293     const size_t n,
  294     matrix_type_t a[RESTRICT n][n],
  295     matrix_type_t b[RESTRICT n][n],
  296     matrix_type_t r[RESTRICT n][n])
  297 {
  298     register size_t j;
  299 
  300     (void)b;
  301     matrix_type_t v = b[0][0];
  302 
  303     for (j = 0; j < n; j++) {
  304         register size_t i;
  305 
  306         for (i = 0; i < n; i++) {
  307             r[i][j] = v * a[i][j];
  308         }
  309         if (UNLIKELY(!g_keep_stressing_flag))
  310             return;
  311     }
  312 }
  313 
  314 /*
  315  *  stress_matrix_div()
  316  *  matrix scalar divide
  317  */
  318 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_div(
  319     const size_t n,
  320     matrix_type_t a[RESTRICT n][n],
  321     matrix_type_t b[RESTRICT n][n],
  322     matrix_type_t r[RESTRICT n][n])
  323 {
  324     register size_t i;
  325 
  326     (void)b;
  327     matrix_type_t v = b[0][0];
  328 
  329     for (i = 0; i < n; i++) {
  330         register size_t j;
  331 
  332         for (j = 0; j < n; j++) {
  333             r[i][j] = a[i][j] / v;
  334         }
  335         if (UNLIKELY(!g_keep_stressing_flag))
  336             return;
  337     }
  338 }
  339 
  340 /*
  341  *  stress_matrix_div()
  342  *  matrix scalar divide
  343  */
  344 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_div(
  345     const size_t n,
  346     matrix_type_t a[RESTRICT n][n],
  347     matrix_type_t b[RESTRICT n][n],
  348     matrix_type_t r[RESTRICT n][n])
  349 {
  350     register size_t j;
  351 
  352     (void)b;
  353     matrix_type_t v = b[0][0];
  354 
  355     for (j = 0; j < n; j++) {
  356         register size_t i;
  357 
  358         for (i = 0; i < n; i++) {
  359             r[i][j] = a[i][j] / v;
  360         }
  361         if (UNLIKELY(!g_keep_stressing_flag))
  362             return;
  363     }
  364 }
  365 
  366 /*
  367  *  stress_matrix_hadamard()
  368  *  matrix hadamard product
  369  *  (A o B)ij = AijBij
  370  */
  371 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_hadamard(
  372     const size_t n,
  373     matrix_type_t a[RESTRICT n][n],
  374     matrix_type_t b[RESTRICT n][n],
  375     matrix_type_t r[RESTRICT n][n])
  376 {
  377     register size_t i;
  378 
  379     for (i = 0; i < n; i++) {
  380         register size_t j;
  381 
  382         for (j = 0; j < n; j++) {
  383             r[i][j] = a[i][j] * b[i][j];
  384         }
  385         if (UNLIKELY(!g_keep_stressing_flag))
  386             return;
  387     }
  388 }
  389 
  390 /*
  391  *  stress_matrix_hadamard()
  392  *  matrix hadamard product
  393  *  (A o B)ij = AijBij
  394  */
  395 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_hadamard(
  396     const size_t n,
  397     matrix_type_t a[RESTRICT n][n],
  398     matrix_type_t b[RESTRICT n][n],
  399     matrix_type_t r[RESTRICT n][n])
  400 {
  401     register size_t j;
  402 
  403     for (j = 0; j < n; j++) {
  404         register size_t i;
  405 
  406         for (i = 0; i < n; i++) {
  407             r[i][j] = a[i][j] * b[i][j];
  408         }
  409         if (UNLIKELY(!g_keep_stressing_flag))
  410             return;
  411     }
  412 }
  413 
  414 /*
  415  *  stress_matrix_frobenius()
  416  *  matrix frobenius product
  417  *  A : B = Sum(AijBij)
  418  */
  419 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_frobenius(
  420     const size_t n,
  421     matrix_type_t a[RESTRICT n][n],
  422     matrix_type_t b[RESTRICT n][n],
  423     matrix_type_t r[RESTRICT n][n])
  424 {
  425     register size_t i;
  426     matrix_type_t sum = 0.0;
  427 
  428     (void)r;
  429 
  430     for (i = 0; i < n; i++) {
  431         register size_t j;
  432 
  433         for (j = 0; j < n; j++) {
  434             sum += a[i][j] * b[i][j];
  435         }
  436         if (UNLIKELY(!g_keep_stressing_flag))
  437             return;
  438     }
  439     double_put(sum);
  440 }
  441 
  442 /*
  443  *  stress_matrix_frobenius()
  444  *  matrix frobenius product
  445  *  A : B = Sum(AijBij)
  446  */
  447 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_frobenius(
  448     const size_t n,
  449     matrix_type_t a[RESTRICT n][n],
  450     matrix_type_t b[RESTRICT n][n],
  451     matrix_type_t r[RESTRICT n][n])
  452 {
  453     register size_t j;
  454     matrix_type_t sum = 0.0;
  455 
  456     (void)r;
  457 
  458     for (j = 0; j < n; j++) {
  459         register size_t i;
  460 
  461         for (i = 0; i < n; i++) {
  462             sum += a[i][j] * b[i][j];
  463         }
  464         if (UNLIKELY(!g_keep_stressing_flag))
  465             return;
  466     }
  467     double_put(sum);
  468 }
  469 
  470 /*
  471  *  stress_matrix_copy()
  472  *  naive matrix copy, r = a
  473  */
  474 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_copy(
  475     const size_t n,
  476     matrix_type_t a[RESTRICT n][n],
  477     matrix_type_t b[RESTRICT n][n],
  478     matrix_type_t r[RESTRICT n][n])
  479 {
  480     register size_t i;
  481 
  482     (void)b;
  483 
  484     for (i = 0; i < n; i++) {
  485         register size_t j;
  486 
  487         for (j = 0; j < n; j++)
  488             r[i][j] = a[i][j];
  489 
  490         if (UNLIKELY(!g_keep_stressing_flag))
  491             return;
  492     }
  493 }
  494 
  495 /*
  496  *  stress_matrix_copy()
  497  *  naive matrix copy, r = a
  498  */
  499 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_copy(
  500     const size_t n,
  501     matrix_type_t a[RESTRICT n][n],
  502     matrix_type_t b[RESTRICT n][n],
  503     matrix_type_t r[RESTRICT n][n])
  504 {
  505     register size_t j;
  506 
  507     (void)b;
  508 
  509     for (j = 0; j < n; j++) {
  510         register size_t i;
  511 
  512         for (i = 0; i < n; i++)
  513             r[i][j] = a[i][j];
  514 
  515         if (UNLIKELY(!g_keep_stressing_flag))
  516             return;
  517     }
  518 }
  519 
  520 /*
  521  *  stress_matrix_mean(void)
  522  *  arithmetic mean
  523  */
  524 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_mean(
  525     const size_t n,
  526     matrix_type_t a[RESTRICT n][n],
  527     matrix_type_t b[RESTRICT n][n],
  528     matrix_type_t r[RESTRICT n][n])
  529 {
  530     register size_t i;
  531 
  532     for (i = 0; i < n; i++) {
  533         register size_t j;
  534 
  535         for (j = 0; j < n; j++)
  536             r[i][j] = (a[i][j] + b[i][j]) / 2.0;
  537 
  538         if (UNLIKELY(!g_keep_stressing_flag))
  539             return;
  540     }
  541 }
  542 
  543 /*
  544  *  stress_matrix_mean(void)
  545  *  arithmetic mean
  546  */
  547 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_mean(
  548     const size_t n,
  549     matrix_type_t a[RESTRICT n][n],
  550     matrix_type_t b[RESTRICT n][n],
  551     matrix_type_t r[RESTRICT n][n])
  552 {
  553     register size_t j;
  554 
  555     for (j = 0; j < n; j++) {
  556         register size_t i;
  557 
  558         for (i = 0; i < n; i++)
  559             r[i][j] = (a[i][j] + b[i][j]) / 2.0;
  560 
  561         if (UNLIKELY(!g_keep_stressing_flag))
  562             return;
  563     }
  564 }
  565 
  566 /*
  567  *  stress_matrix_zero()
  568  *  simply zero the result matrix
  569  */
  570 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_zero(
  571     const size_t n,
  572     matrix_type_t a[RESTRICT n][n],
  573     matrix_type_t b[RESTRICT n][n],
  574     matrix_type_t r[RESTRICT n][n])
  575 {
  576     register size_t i;
  577 
  578     (void)a;
  579     (void)b;
  580 
  581     for (i = 0; i < n; i++) {
  582         register size_t j;
  583 
  584         for (j = 0; j < n; j++)
  585             r[i][j] = 0.0;
  586 
  587         if (UNLIKELY(!g_keep_stressing_flag))
  588             return;
  589     }
  590 }
  591 
  592 /*
  593  *  stress_matrix_zero()
  594  *  simply zero the result matrix
  595  */
  596 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_zero(
  597     const size_t n,
  598     matrix_type_t a[RESTRICT n][n],
  599     matrix_type_t b[RESTRICT n][n],
  600     matrix_type_t r[RESTRICT n][n])
  601 {
  602     register size_t j;
  603 
  604     (void)a;
  605     (void)b;
  606 
  607     for (j = 0; j < n; j++) {
  608         register size_t i;
  609 
  610         for (i = 0; i < n; i++)
  611             r[i][j] = 0.0;
  612 
  613         if (UNLIKELY(!g_keep_stressing_flag))
  614             return;
  615     }
  616 }
  617 
  618 /*
  619  *  stress_matrix_negate()
  620  *  simply negate the matrix a and put result in r
  621  */
  622 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_negate(
  623     const size_t n,
  624     matrix_type_t a[RESTRICT n][n],
  625     matrix_type_t b[RESTRICT n][n],
  626     matrix_type_t r[RESTRICT n][n])
  627 {
  628     register size_t i;
  629 
  630     (void)a;
  631     (void)b;
  632 
  633     for (i = 0; i < n; i++) {
  634         register size_t j;
  635 
  636         for (j = 0; j < n; j++)
  637             r[i][j] = -a[i][j];
  638 
  639         if (UNLIKELY(!g_keep_stressing_flag))
  640             return;
  641     }
  642 }
  643 
  644 /*
  645  *  stress_matrix_negate()
  646  *  simply negate the matrix a and put result in r
  647  */
  648 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_negate(
  649     const size_t n,
  650     matrix_type_t a[RESTRICT n][n],
  651     matrix_type_t b[RESTRICT n][n],
  652     matrix_type_t r[RESTRICT n][n])
  653 {
  654     register size_t j;
  655 
  656     (void)a;
  657     (void)b;
  658 
  659     for (j = 0; j < n; j++) {
  660         register size_t i;
  661 
  662         for (i = 0; i < n; i++)
  663             r[i][j] = -a[i][j];
  664 
  665         if (UNLIKELY(!g_keep_stressing_flag))
  666             return;
  667     }
  668 }
  669 
  670 /*
  671  *  stress_matrix_identity()
  672  *  set r to the identity matrix
  673  */
  674 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_identity(
  675     const size_t n,
  676     matrix_type_t a[RESTRICT n][n],
  677     matrix_type_t b[RESTRICT n][n],
  678     matrix_type_t r[RESTRICT n][n])
  679 {
  680     register size_t i;
  681 
  682     (void)a;
  683     (void)b;
  684 
  685     for (i = 0; i < n; i++) {
  686         register size_t j;
  687 
  688         for (j = 0; j < n; j++)
  689             r[i][j] = (i == j) ? 1.0 : 0.0;
  690 
  691         if (UNLIKELY(!g_keep_stressing_flag))
  692             return;
  693     }
  694 }
  695 
  696 /*
  697  *  stress_matrix_identity()
  698  *  set r to the identity matrix
  699  */
  700 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_identity(
  701     const size_t n,
  702     matrix_type_t a[RESTRICT n][n],
  703     matrix_type_t b[RESTRICT n][n],
  704     matrix_type_t r[RESTRICT n][n])
  705 {
  706     register size_t j;
  707 
  708     (void)a;
  709     (void)b;
  710 
  711     for (j = 0; j < n; j++) {
  712         register size_t i;
  713 
  714         for (i = 0; i < n; i++)
  715             r[i][j] = (i == j) ? 1.0 : 0.0;
  716 
  717         if (UNLIKELY(!g_keep_stressing_flag))
  718             return;
  719     }
  720 }
  721 
  722 /*
  723  *  stress_matrix_xy_square()
  724  *  matrix product, r = a x a
  725  */
  726 static void OPTIMIZE3 TARGET_CLONES stress_matrix_xy_square(
  727     const size_t n,
  728     matrix_type_t a[RESTRICT n][n],
  729     matrix_type_t b[RESTRICT n][n],
  730     matrix_type_t r[RESTRICT n][n])
  731 {
  732     size_t i;
  733 
  734     (void)b;
  735 
  736     for (i = 0; i < n; i++) {
  737         register size_t j;
  738 
  739         for (j = 0; j < n; j++) {
  740             register size_t k;
  741 
  742             for (k = 0; k < n; k++) {
  743                 r[i][j] += a[i][k] * a[k][j];
  744             }
  745             if (UNLIKELY(!g_keep_stressing_flag))
  746                 return;
  747         }
  748     }
  749 }
  750 
  751 /*
  752  *  stress_matrix_yx_square()
  753  *  matrix product, r = a x a
  754  */
  755 static void OPTIMIZE3 TARGET_CLONES stress_matrix_yx_square(
  756     const size_t n,
  757     matrix_type_t a[RESTRICT n][n],
  758     matrix_type_t b[RESTRICT n][n],
  759     matrix_type_t r[RESTRICT n][n])
  760 {
  761     size_t j;
  762 
  763     (void)b;
  764 
  765     for (j = 0; j < n; j++) {
  766         register size_t i;
  767 
  768         for (i = 0; i < n; i++) {
  769             register size_t k;
  770 
  771             for (k = 0; k < n; k++) {
  772                 r[i][j] += a[i][k] * a[k][j];
  773             }
  774             if (UNLIKELY(!g_keep_stressing_flag))
  775                 return;
  776         }
  777     }
  778 }
  779 
  780 
  781 /*
  782  *  stress_matrix_all()
  783  *  iterate over all cpu stressors
  784  */
  785 static void OPTIMIZE3 stress_matrix_xy_all(
  786     const size_t n,
  787     matrix_type_t a[RESTRICT n][n],
  788     matrix_type_t b[RESTRICT n][n],
  789     matrix_type_t r[RESTRICT n][n])
  790 {
  791     static int i = 1;   /* Skip over stress_matrix_all */
  792 
  793     matrix_methods[i++].func[0](n, a, b, r);
  794     if (!matrix_methods[i].name)
  795         i = 1;
  796 }
  797 
  798 /*
  799  *  stress_matrix_all()
  800  *  iterate over all cpu stressors
  801  */
  802 static void OPTIMIZE3 stress_matrix_yx_all(
  803     const size_t n,
  804     matrix_type_t a[RESTRICT n][n],
  805     matrix_type_t b[RESTRICT n][n],
  806     matrix_type_t r[RESTRICT n][n])
  807 {
  808     static int i = 1;   /* Skip over stress_matrix_all */
  809 
  810     matrix_methods[i++].func[1](n, a, b, r);
  811     if (!matrix_methods[i].name)
  812         i = 1;
  813 }
  814 
  815 
  816 /*
  817  * Table of cpu stress methods, ordered x by y and y by x
  818  */
  819 static const stress_matrix_method_info_t matrix_methods[] = {
  820     { "all",        { stress_matrix_xy_all,     stress_matrix_yx_all } },/* Special "all" test */
  821 
  822     { "add",        { stress_matrix_xy_add,     stress_matrix_yx_add } },
  823     { "copy",       { stress_matrix_xy_copy,    stress_matrix_yx_copy } },
  824     { "div",        { stress_matrix_xy_div,     stress_matrix_yx_div } },
  825     { "frobenius",      { stress_matrix_xy_frobenius,   stress_matrix_yx_frobenius } },
  826     { "hadamard",       { stress_matrix_xy_hadamard,    stress_matrix_yx_hadamard } },
  827     { "identity",       { stress_matrix_xy_identity,    stress_matrix_yx_identity } },
  828     { "mean",       { stress_matrix_xy_mean,    stress_matrix_yx_mean } },
  829     { "mult",       { stress_matrix_xy_mult,    stress_matrix_yx_mult } },
  830     { "negate",     { stress_matrix_xy_negate,  stress_matrix_yx_negate } },
  831     { "prod",       { stress_matrix_xy_prod,    stress_matrix_yx_prod } },
  832     { "sub",        { stress_matrix_xy_sub,     stress_matrix_yx_sub } },
  833     { "square",     { stress_matrix_xy_square,  stress_matrix_yx_square } },
  834     { "trans",      { stress_matrix_xy_trans,   stress_matrix_yx_trans } },
  835     { "zero",       { stress_matrix_xy_zero,    stress_matrix_yx_zero } },
  836     { NULL,         { NULL, NULL } }
  837 };
  838 
  839 static const stress_matrix_method_info_t *stress_get_matrix_method(
  840     const char *name)
  841 {
  842     const stress_matrix_method_info_t *info;
  843 
  844     for (info = matrix_methods; info->name; info++) {
  845         if (!strcmp(info->name, name)) {
  846             set_setting("matrix-method", TYPE_ID_STR, name);
  847             return info;
  848         }
  849     }
  850     return NULL;
  851 }
  852 
  853 static void stress_matrix_method_error(void)
  854 {
  855     const stress_matrix_method_info_t *info;
  856 
  857     (void)fprintf(stderr, "matrix-method must be one of:");
  858     for (info = matrix_methods; info->name; info++)
  859         (void)fprintf(stderr, " %s", info->name);
  860     (void)fprintf(stderr, "\n");
  861 }
  862 
  863 /*
  864  *  stress_set_matrix_method()
  865  *  set the default matrix stress method
  866  */
  867 int stress_set_matrix_method(const char *name)
  868 {
  869     const stress_matrix_method_info_t *info;
  870 
  871     info = stress_get_matrix_method(name);
  872     if (info) {
  873         set_setting("matrix-method", TYPE_ID_STR, name);
  874         return 0;
  875     }
  876     stress_matrix_method_error();
  877 
  878     return -1;
  879 }
  880 
  881 static inline size_t round_up(size_t page_size, size_t n)
  882 {
  883     page_size = (page_size == 0) ? 4096 : page_size;
  884 
  885     return (n + page_size - 1) & (~(page_size -1));
  886 }
  887 
  888 static inline int stress_matrix_exercise(
  889     const args_t *args,
  890     const stress_matrix_func func,
  891     const size_t n)
  892 {
  893     int ret = EXIT_NO_RESOURCE;
  894     typedef matrix_type_t (*matrix_ptr_t)[n];
  895     size_t matrix_size = round_up(args->page_size, (sizeof(matrix_type_t) * n * n));
  896 
  897     matrix_ptr_t a, b = NULL, r = NULL;
  898     register size_t i;
  899     const matrix_type_t v = 65535 / (matrix_type_t)((uint64_t)~0);
  900     int flags = MAP_PRIVATE | MAP_ANONYMOUS;
  901 #if defined(MAP_POPULATE)
  902     flags |= MAP_POPULATE;
  903 #endif
  904 
  905     a = (matrix_ptr_t)mmap(NULL, matrix_size,
  906         PROT_READ | PROT_WRITE, flags, -1, 0);
  907     if (a == MAP_FAILED) {
  908         pr_fail("matrix allocation");
  909         goto tidy_ret;
  910     }
  911     b = (matrix_ptr_t)mmap(NULL, matrix_size,
  912         PROT_READ | PROT_WRITE, flags, -1, 0);
  913     if (b == MAP_FAILED) {
  914         pr_fail("matrix allocation");
  915         goto tidy_a;
  916     }
  917     r = (matrix_ptr_t)mmap(NULL, matrix_size,
  918         PROT_READ | PROT_WRITE, flags, -1, 0);
  919     if (r == MAP_FAILED) {
  920         pr_fail("matrix allocation");
  921         goto tidy_b;
  922     }
  923 
  924     /*
  925      *  Initialise matrices
  926      */
  927     for (i = 0; i < n; i++) {
  928         register size_t j;
  929 
  930         for (j = 0; j < n; j++) {
  931             a[i][j] = (matrix_type_t)mwc64() * v;
  932             b[i][j] = (matrix_type_t)mwc64() * v;
  933             r[i][j] = 0.0;
  934         }
  935     }
  936 
  937     /*
  938      * Normal use case, 100% load, simple spinning on CPU
  939      */
  940     do {
  941         (void)func(n, a, b, r);
  942         inc_counter(args);
  943     } while (keep_stressing());
  944 
  945     ret = EXIT_SUCCESS;
  946 
  947     munmap((void *)r, matrix_size);
  948 tidy_b:
  949     munmap((void *)b, matrix_size);
  950 tidy_a:
  951     munmap((void *)a, matrix_size);
  952 tidy_ret:
  953     return ret;
  954 }
  955 
  956 /*
  957  *  stress_matrix()
  958  *  stress CPU by doing floating point math ops
  959  */
  960 static int stress_matrix(const args_t *args)
  961 {
  962     char *matrix_method_name;
  963     const stress_matrix_method_info_t *matrix_method;
  964     stress_matrix_func func;
  965     size_t matrix_size = 128;
  966     size_t matrix_yx = 0;
  967 
  968     (void)get_setting("matrix-method", &matrix_method_name);
  969     (void)get_setting("matrix-yx", &matrix_yx);
  970 
  971     matrix_method = stress_get_matrix_method(matrix_method_name);
  972     if (!matrix_method) {
  973         /* Should *never* get here... */
  974         stress_matrix_method_error();
  975         return EXIT_FAILURE;
  976     }
  977 
  978     func = matrix_method->func[matrix_yx];
  979     if (args->instance == 0)
  980         pr_dbg("%s using method '%s' (%s)\n", args->name, matrix_method->name,
  981             matrix_yx ? "y by x" : "x by y");
  982 
  983     if (!get_setting("matrix-size", &matrix_size)) {
  984         if (g_opt_flags & OPT_FLAGS_MAXIMIZE)
  985             matrix_size = MAX_MATRIX_SIZE;
  986         if (g_opt_flags & OPT_FLAGS_MINIMIZE)
  987             matrix_size = MIN_MATRIX_SIZE;
  988     }
  989 
  990     return stress_matrix_exercise(args, func, matrix_size);
  991 }
  992 
  993 static void stress_matrix_set_default(void)
  994 {
  995     stress_set_matrix_method("all");
  996 }
  997 
  998 stressor_info_t stress_matrix_info = {
  999     .stressor = stress_matrix,
 1000     .set_default = stress_matrix_set_default,
 1001     .class = CLASS_CPU | CLASS_CPU_CACHE | CLASS_MEMORY
 1002 };
 1003 
 1004 #else
 1005 stressor_info_t stress_matrix_info = {
 1006     .stressor = stress_not_implemented,
 1007     .class = CLASS_CPU | CLASS_CPU_CACHE | CLASS_MEMORY
 1008 };
 1009 #endif