"Fossies" - the Fresh Open Source Software Archive

Member "stress-ng-0.09.56/stress-stream.c" (15 Mar 2019, 14051 Bytes) of package /linux/privat/stress-ng-0.09.56.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "stress-stream.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 0.09.49_vs_0.09.50.

    1 /*
    2  * Copyright (C) 2016-2019 Canonical, Ltd.
    3  *
    4  * This program is free software; you can redistribute it and/or
    5  * modify it under the terms of the GNU General Public License
    6  * as published by the Free Software Foundation; either version 2
    7  * of the License, or (at your option) any later version.
    8  *
    9  * This program is distributed in the hope that it will be useful,
   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  * GNU General Public License for more details.
   13  *
   14  * You should have received a copy of the GNU General Public License
   15  * along with this program; if not, write to the Free Software
   16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
   17  *
   18  * This code is a complete clean re-write of the stress tool by
   19  * Colin Ian King <colin.king@canonical.com> and attempts to be
   20  * backwardly compatible with the stress tool by Amos Waterland
   21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
   22  * functionality.
   23  *
   24  * This stressor is loosely based on the STREAM Sustainable
   25  * Memory Bandwidth In High Performance Computers tool.
   26  *   https://www.cs.virginia.edu/stream/
   27  *   https://www.cs.virginia.edu/stream/FTP/Code/stream.c
   28  *
   29  * This is loosely based on a variant of the STREAM benchmark code,
   30  * so DO NOT submit results based on this as it is intended to
   31  * stress memory and compute and NOT intended for STREAM accurate
   32  * tuned or non-tuned benchmarking whatsoever.  I believe this
   33  * conforms to section 3a, 3b of the original License.
   34  *
   35  */
   36 #include "stress-ng.h"
   37 
   38 typedef struct {
   39     const char *name;
   40         const int advice;
   41 } stream_madvise_info_t;
   42 
   43 static const stream_madvise_info_t stream_madvise_info[] = {
   44 #if defined(HAVE_MADVISE)
   45 #if defined(MADV_HUGEPAGE)
   46     { "hugepage",   MADV_HUGEPAGE },
   47 #endif
   48 #if defined(MADV_NOHUGEPAGE)
   49     { "nohugepage", MADV_NOHUGEPAGE },
   50 #endif
   51 #if defined(MADV_NORMAL)
   52     { "normal", MADV_NORMAL },
   53 #endif
   54 #else
   55     /* No MADVISE, default to normal, ignored */
   56     { "normal", 0 },
   57 #endif
   58         { NULL,         0 },
   59 };
   60 
   61 
   62 int stress_set_stream_L3_size(const char *opt)
   63 {
   64     uint64_t stream_L3_size;
   65 
   66     stream_L3_size = get_uint64_byte(opt);
   67     check_range_bytes("stream-L3-size", stream_L3_size,
   68         MIN_STREAM_L3_SIZE, MAX_STREAM_L3_SIZE);
   69     return set_setting("stream-L3-size", TYPE_ID_UINT64, &stream_L3_size);
   70 }
   71 
   72 int stress_set_stream_madvise(const char *opt)
   73 {
   74     const stream_madvise_info_t *info;
   75 
   76     for (info = stream_madvise_info; info->name; info++) {
   77         if (!strcmp(opt, info->name)) {
   78             set_setting("stream-madvise", TYPE_ID_INT, &info->advice);
   79             return 0;
   80         }
   81     }
   82     (void)fprintf(stderr, "invalid stream-madvise advice '%s', allowed advice options are:", opt);
   83     for (info = stream_madvise_info; info->name; info++) {
   84         (void)fprintf(stderr, " %s", info->name);
   85         }
   86     (void)fprintf(stderr, "\n");
   87     return -1;
   88 }
   89 
   90 int stress_set_stream_index(const char *opt)
   91 {
   92     uint32_t stream_index;
   93 
   94     stream_index = get_int32(opt);
   95     check_range("stream-index", stream_index, 0, 3);
   96     return set_setting("stream-index", TYPE_ID_UINT32, &stream_index);
   97 }
   98 
   99 static inline void OPTIMIZE3 stress_stream_copy_index0(
  100     double *RESTRICT c,
  101     const double *RESTRICT a,
  102     const uint64_t n)
  103 {
  104     register uint64_t i;
  105 
  106     for (i = 0; i < n; i++)
  107         c[i] = a[i];
  108 }
  109 
  110 static inline void OPTIMIZE3 stress_stream_copy_index1(
  111     double *RESTRICT c,
  112     const double *RESTRICT a,
  113     size_t *RESTRICT idx1,
  114     const uint64_t n)
  115 {
  116     register uint64_t i;
  117 
  118     for (i = 0; i < n; i++)
  119         c[idx1[i]] = a[idx1[i]];
  120 }
  121 
  122 static inline void OPTIMIZE3 stress_stream_copy_index2(
  123     double *RESTRICT c,
  124     const double *RESTRICT a,
  125     size_t *RESTRICT idx1,
  126     size_t *RESTRICT idx2,
  127     const uint64_t n)
  128 {
  129     register uint64_t i;
  130 
  131     for (i = 0; i < n; i++)
  132         c[idx1[i]] = a[idx2[i]];
  133 }
  134 
  135 static inline void OPTIMIZE3 stress_stream_copy_index3(
  136     double *RESTRICT c,
  137     const double *RESTRICT a,
  138     size_t *RESTRICT idx1,
  139     size_t *RESTRICT idx2,
  140     size_t *RESTRICT idx3,
  141     const uint64_t n)
  142 {
  143     register uint64_t i;
  144 
  145     for (i = 0; i < n; i++)
  146         c[idx3[idx1[i]]] = a[idx2[i]];
  147 }
  148 
  149 static inline void OPTIMIZE3 stress_stream_scale_index0(
  150     double *RESTRICT b,
  151     const double *RESTRICT c,
  152     const double q,
  153     const uint64_t n)
  154 {
  155     register uint64_t i;
  156 
  157     for (i = 0; i < n; i++)
  158         b[i] = q * c[i];
  159 }
  160 
  161 static inline void OPTIMIZE3 stress_stream_scale_index1(
  162     double *RESTRICT b,
  163     const double *RESTRICT c,
  164     const double q,
  165     size_t *RESTRICT idx1,
  166     const uint64_t n)
  167 {
  168     register uint64_t i;
  169 
  170     for (i = 0; i < n; i++)
  171         b[idx1[i]] = q * c[idx1[i]];
  172 }
  173 
  174 static inline void OPTIMIZE3 stress_stream_scale_index2(
  175     double *RESTRICT b,
  176     const double *RESTRICT c,
  177     const double q,
  178     size_t *RESTRICT idx1,
  179     size_t *RESTRICT idx2,
  180     const uint64_t n)
  181 {
  182     register uint64_t i;
  183 
  184     for (i = 0; i < n; i++)
  185         b[idx1[i]] = q * c[idx2[i]];
  186 }
  187 
  188 static inline void OPTIMIZE3 stress_stream_scale_index3(
  189     double *RESTRICT b,
  190     const double *RESTRICT c,
  191     const double q,
  192     size_t *RESTRICT idx1,
  193     size_t *RESTRICT idx2,
  194     size_t *RESTRICT idx3,
  195     const uint64_t n)
  196 {
  197     register uint64_t i;
  198 
  199     for (i = 0; i < n; i++)
  200         b[idx3[idx1[i]]] = q * c[idx2[i]];
  201 }
  202 
  203 static inline void OPTIMIZE3 stress_stream_add_index0(
  204     const double *RESTRICT a,
  205     const double *RESTRICT b,
  206     double *RESTRICT c,
  207     const uint64_t n)
  208 {
  209     register uint64_t i;
  210 
  211     for (i = 0; i < n; i++)
  212         c[i] = a[i] + b[i];
  213 }
  214 
  215 static inline void OPTIMIZE3 stress_stream_add_index1(
  216     const double *RESTRICT a,
  217     const double *RESTRICT b,
  218     double *RESTRICT c,
  219     size_t *RESTRICT idx1,
  220     const uint64_t n)
  221 {
  222     register uint64_t i;
  223 
  224     for (i = 0; i < n; i++)
  225         c[idx1[i]] = a[idx1[i]] + b[idx1[i]];
  226 }
  227 
  228 static inline void OPTIMIZE3 stress_stream_add_index2(
  229     const double *RESTRICT a,
  230     const double *RESTRICT b,
  231     double *RESTRICT c,
  232     size_t *RESTRICT idx1,
  233     size_t *RESTRICT idx2,
  234     const uint64_t n)
  235 {
  236     register uint64_t i;
  237 
  238     for (i = 0; i < n; i++)
  239         c[idx1[i]] = a[idx2[i]] + b[idx1[i]];
  240 }
  241 
  242 static inline void OPTIMIZE3 stress_stream_add_index3(
  243     const double *RESTRICT a,
  244     const double *RESTRICT b,
  245     double *RESTRICT c,
  246     size_t *RESTRICT idx1,
  247     size_t *RESTRICT idx2,
  248     size_t *RESTRICT idx3,
  249     const uint64_t n)
  250 {
  251     register uint64_t i;
  252 
  253     for (i = 0; i < n; i++)
  254         c[idx1[i]] = a[idx2[i]] + b[idx3[i]];
  255 }
  256 
  257 static inline void OPTIMIZE3 stress_stream_triad_index0(
  258     double *RESTRICT a,
  259     const double *RESTRICT b,
  260     const double *RESTRICT c,
  261     const double q,
  262     const uint64_t n)
  263 {
  264     register uint64_t i;
  265 
  266     for (i = 0; i < n; i++)
  267         a[i] = b[i] + (c[i] * q);
  268 }
  269 
  270 static inline void OPTIMIZE3 stress_stream_triad_index1(
  271     double *RESTRICT a,
  272     const double *RESTRICT b,
  273     const double *RESTRICT c,
  274     const double q,
  275     size_t *RESTRICT idx1,
  276     const uint64_t n)
  277 {
  278     register uint64_t i;
  279 
  280     for (i = 0; i < n; i++)
  281         a[idx1[i]] = b[idx1[i]] + (c[idx1[i]] * q);
  282 }
  283 
  284 static inline void OPTIMIZE3 stress_stream_triad_index2(
  285     double *RESTRICT a,
  286     const double *RESTRICT b,
  287     const double *RESTRICT c,
  288     const double q,
  289     size_t *RESTRICT idx1,
  290     size_t *RESTRICT idx2,
  291     const uint64_t n)
  292 {
  293     register uint64_t i;
  294 
  295     for (i = 0; i < n; i++)
  296         a[idx1[i]] = b[idx2[i]] + (c[idx1[i]] * q);
  297 }
  298 
  299 static inline void OPTIMIZE3 stress_stream_triad_index3(
  300     double *RESTRICT a,
  301     const double *RESTRICT b,
  302     const double *RESTRICT c,
  303     const double q,
  304     size_t *RESTRICT idx1,
  305     size_t *RESTRICT idx2,
  306     size_t *RESTRICT idx3,
  307     const uint64_t n)
  308 {
  309     register uint64_t i;
  310 
  311     for (i = 0; i < n; i++)
  312         a[idx1[i]] = b[idx2[i]] + (c[idx3[i]] * q);
  313 }
  314 
  315 static void stress_stream_init_data(
  316     double *RESTRICT data,
  317     const uint64_t n)
  318 {
  319     uint64_t i;
  320 
  321     for (i = 0; i < n; i++)
  322         data[i] = (double)mwc32() / (double)mwc64();
  323 }
  324 
  325 static inline void *stress_stream_mmap(const args_t *args, uint64_t sz)
  326 {
  327     void *ptr;
  328 
  329     ptr = mmap(NULL, (size_t)sz, PROT_READ | PROT_WRITE,
  330 #if defined(MAP_POPULATE)
  331         MAP_POPULATE |
  332 #endif
  333 #if defined(HAVE_MADVISE)
  334         MAP_PRIVATE |
  335 #else
  336         MAP_SHARED |
  337 #endif
  338         MAP_ANONYMOUS, -1, 0);
  339     /* Coverity Scan believes NULL can be returned, doh */
  340     if (!ptr || (ptr == MAP_FAILED)) {
  341         pr_err("%s: cannot allocate %" PRIu64 " bytes\n",
  342             args->name, sz);
  343         ptr = MAP_FAILED;
  344     } else {
  345 #if defined(HAVE_MADVISE)
  346         int ret, advice = MADV_NORMAL;
  347 
  348         (void)get_setting("stream-madvise", &advice);
  349 
  350         ret = madvise(ptr, sz, advice);
  351         (void)ret;
  352 #endif
  353     }
  354     return ptr;
  355 }
  356 
  357 static inline uint64_t get_stream_L3_size(const args_t *args)
  358 {
  359     uint64_t cache_size = MEM_CACHE_SIZE;
  360 #if defined(__linux__)
  361     cpus_t *cpu_caches;
  362     cpu_cache_t *cache = NULL;
  363     uint16_t max_cache_level;
  364 
  365     cpu_caches = get_all_cpu_cache_details();
  366     if (!cpu_caches) {
  367         if (!args->instance)
  368             pr_inf("%s: using built-in defaults as unable to "
  369                 "determine cache details\n", args->name);
  370         return cache_size;
  371     }
  372     max_cache_level = get_max_cache_level(cpu_caches);
  373     if ((max_cache_level > 0) && (max_cache_level < 3) && (!args->instance))
  374         pr_inf("%s: no L3 cache, using L%" PRIu16 " size instead\n",
  375             args->name, max_cache_level);
  376 
  377     cache = get_cpu_cache(cpu_caches, max_cache_level);
  378     if (!cache) {
  379         if (!args->instance)
  380             pr_inf("%s: using built-in defaults as no suitable "
  381                 "cache found\n", args->name);
  382         free_cpu_caches(cpu_caches);
  383         return cache_size;
  384     }
  385     if (!cache->size) {
  386         if (!args->instance)
  387             pr_inf("%s: using built-in defaults as unable to "
  388                 "determine cache size\n", args->name);
  389         free_cpu_caches(cpu_caches);
  390         return cache_size;
  391     }
  392     cache_size = cache->size;
  393 
  394     free_cpu_caches(cpu_caches);
  395 #else
  396     if (!args->instance)
  397         pr_inf("%s: using built-in defaults as unable to "
  398             "determine cache details\n", args->name);
  399 #endif
  400     return cache_size;
  401 }
  402 
  403 static void stress_stream_init_index(
  404     size_t *RESTRICT idx,
  405     const uint64_t n)
  406 {
  407     uint64_t i;
  408 
  409     for (i = 0; i < n; i++)
  410         idx[i] = i;
  411 
  412     for (i = 0; i < n; i++) {
  413         register uint64_t j = mwc64() % n;
  414         register uint64_t tmp;
  415 
  416         tmp = idx[i];
  417         idx[i] = idx[j];
  418         idx[j] = tmp;
  419     }
  420 }
  421 
  422 /*
  423  *  stress_stream()
  424  *  stress cache/memory/CPU with stream stressors
  425  */
  426 static int stress_stream(const args_t *args)
  427 {
  428     int rc = EXIT_FAILURE;
  429     double *a, *b, *c;
  430     size_t *idx1 = NULL, *idx2 = NULL, *idx3 = NULL;
  431     const double q = 3.0;
  432     double mb_rate, mb, fp_rate, fp, t1, t2, dt;
  433     uint32_t stream_index = 0;
  434     uint64_t L3, sz, n, sz_idx;
  435     uint64_t stream_L3_size = DEFAULT_STREAM_L3_SIZE;
  436     bool guess = false;
  437 
  438     if (get_setting("stream-L3-size", &stream_L3_size))
  439         L3 = stream_L3_size;
  440     else
  441         L3 = get_stream_L3_size(args);
  442 
  443     (void)get_setting("stream-index", &stream_index);
  444 
  445     /* Have to take a hunch and badly guess size */
  446     if (!L3) {
  447         guess = true;
  448         L3 = stress_get_processors_configured() * DEFAULT_STREAM_L3_SIZE;
  449     }
  450 
  451     if (args->instance == 0) {
  452         pr_inf("%s: stressor loosely based on a variant of the "
  453             "STREAM benchmark code\n", args->name);
  454         pr_inf("%s: do NOT submit any of these results "
  455             "to the STREAM benchmark results\n", args->name);
  456         if (guess) {
  457             pr_inf("%s: cannot determine CPU L3 cache size, "
  458                 "defaulting to %" PRIu64 "K\n",
  459                 args->name, L3 / 1024);
  460         } else {
  461             pr_inf("%s: Using CPU cache size of %" PRIu64 "K\n",
  462                 args->name, L3 / 1024);
  463         }
  464     }
  465 
  466     /* ..and shared amongst all the STREAM stressor instances */
  467     L3 /= args->num_instances;
  468     if (L3 < args->page_size)
  469         L3 = args->page_size;
  470 
  471     /*
  472      *  Each array must be at least 4 x the
  473      *  size of the L3 cache
  474      */
  475     sz = (L3 * 4);
  476     n = sz / sizeof(*a);
  477 
  478     a = stress_stream_mmap(args, sz);
  479     if (a == MAP_FAILED)
  480         goto err_a;
  481     b = stress_stream_mmap(args, sz);
  482     if (b == MAP_FAILED)
  483         goto err_b;
  484     c = stress_stream_mmap(args, sz);
  485     if (c == MAP_FAILED)
  486         goto err_c;
  487 
  488     sz_idx = n * sizeof(size_t);
  489     switch (stream_index) {
  490     case 3:
  491         idx3 = stress_stream_mmap(args, sz_idx);
  492         if (idx3 == MAP_FAILED)
  493             goto err_idx3;
  494         stress_stream_init_index(idx3, n);
  495         CASE_FALLTHROUGH;
  496     case 2:
  497         idx2 = stress_stream_mmap(args, sz_idx);
  498         if (idx2 == MAP_FAILED)
  499             goto err_idx2;
  500         stress_stream_init_index(idx2, n);
  501         CASE_FALLTHROUGH;
  502     case 1:
  503         idx1 = stress_stream_mmap(args, sz_idx);
  504         if (idx1 == MAP_FAILED)
  505             goto err_idx1;
  506         stress_stream_init_index(idx1, n);
  507         CASE_FALLTHROUGH;
  508     case 0:
  509     default:
  510         break;
  511     }
  512 
  513     stress_stream_init_data(a, n);
  514     stress_stream_init_data(b, n);
  515     stress_stream_init_data(c, n);
  516 
  517     t1 = time_now();
  518     do {
  519         switch (stream_index) {
  520         case 3:
  521             stress_stream_copy_index3(c, a, idx1, idx2, idx3, n);
  522             stress_stream_scale_index3(b, c, q, idx1, idx2, idx3, n);
  523             stress_stream_add_index3(c, b, a, idx1, idx2, idx3, n);
  524             stress_stream_triad_index3(a, b, c, q, idx1, idx2, idx3, n);
  525             break;
  526         case 2:
  527             stress_stream_copy_index2(c, a, idx1, idx2, n);
  528             stress_stream_scale_index2(b, c, q, idx1, idx2, n);
  529             stress_stream_add_index2(c, b, a, idx1, idx2, n);
  530             stress_stream_triad_index2(a, b, c, q, idx1, idx2, n);
  531             break;
  532         case 1:
  533             stress_stream_copy_index1(c, a, idx1, n);
  534             stress_stream_scale_index1(b, c, q, idx1, n);
  535             stress_stream_add_index1(c, b, a, idx1, n);
  536             stress_stream_triad_index1(a, b, c, q, idx1, n);
  537             break;
  538         case 0:
  539         default:
  540             stress_stream_copy_index0(c, a, n);
  541             stress_stream_scale_index0(b, c, q, n);
  542             stress_stream_add_index0(c, b, a, n);
  543             stress_stream_triad_index0(a, b, c, q, n);
  544             break;
  545         }
  546         inc_counter(args);
  547     } while (keep_stressing());
  548     t2 = time_now();
  549 
  550     mb = ((double)(get_counter(args) * 10) * (double)sz) / (double)MB;
  551     fp = ((double)(get_counter(args) * 4) * (double)sz) / (double)MB;
  552     dt = t2 - t1;
  553     if (dt >= 4.5) {
  554         mb_rate = mb / (dt);
  555         fp_rate = fp / (dt);
  556         pr_inf("%s: memory rate: %.2f MB/sec, %.2f Mflop/sec"
  557             " (instance %" PRIu32 ")\n",
  558             args->name, mb_rate, fp_rate, args->instance);
  559     } else {
  560         if (args->instance == 0)
  561             pr_inf("%s: run too short to determine memory rate\n", args->name);
  562     }
  563 
  564     rc = EXIT_SUCCESS;
  565 
  566     if (idx3)
  567         (void)munmap((void *)idx3, sz_idx);
  568 err_idx3:
  569     if (idx2)
  570         (void)munmap((void *)idx2, sz_idx);
  571 err_idx2:
  572     if (idx1)
  573         (void)munmap((void *)idx1, sz_idx);
  574 err_idx1:
  575     (void)munmap((void *)c, sz);
  576 err_c:
  577     (void)munmap((void *)b, sz);
  578 err_b:
  579     (void)munmap((void *)a, sz);
  580 err_a:
  581 
  582     return rc;
  583 }
  584 
  585 stressor_info_t stress_stream_info = {
  586     .stressor = stress_stream,
  587     .class = CLASS_CPU | CLASS_CPU_CACHE | CLASS_MEMORY
  588 };