"Fossies" - the Fresh Open Source Software Archive

Member "stress-ng-0.09.56/stress-vecmath.c" (15 Mar 2019, 7318 Bytes) of package /linux/privat/stress-ng-0.09.56.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "stress-vecmath.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 0.09.50_vs_0.09.51.

    1 /*
    2  * Copyright (C) 2013-2019 Canonical, Ltd.
    3  *
    4  * This program is free software; you can redistribute it and/or
    5  * modify it under the terms of the GNU General Public License
    6  * as published by the Free Software Foundation; either version 2
    7  * of the License, or (at your option) any later version.
    8  *
    9  * This program is distributed in the hope that it will be useful,
   10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  * GNU General Public License for more details.
   13  *
   14  * You should have received a copy of the GNU General Public License
   15  * along with this program; if not, write to the Free Software
   16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
   17  *
   18  * This code is a complete clean re-write of the stress tool by
   19  * Colin Ian King <colin.king@canonical.com> and attempts to be
   20  * backwardly compatible with the stress tool by Amos Waterland
   21  * <apw@rossby.metr.ou.edu> but has more stress tests and more
   22  * functionality.
   23  *
   24  */
   25 #include "stress-ng.h"
   26 
   27 /*
   28  *  Clang 5.0 is the lowest version of clang that
   29  *  can build this without issues (clang 4.0 seems
   30  *  to spend forever optimizing this and causes the build
   31  *  to never complete)
   32  */
   33 #if defined(__clang__) && \
   34     defined(__clang_major__) && \
   35     __clang_major__ < 5
   36 #undef HAVE_VECMATH
   37 #endif
   38 
   39 /*
   40  *  gcc 5.x or earlier breaks on 128 bit vector maths on
   41  *  PPC64 for some reason with some flavours of the toolchain
   42  *  so disable this test for now
   43  */
   44 #if defined(STRESS_PPC64) && \
   45     defined(__GNUC__) && \
   46     __GNUC__ < 6
   47 #undef HAVE_VECMATH
   48 #endif
   49 
   50 #if defined(HAVE_VECMATH)
   51 
   52 typedef int8_t  vint8_t  __attribute__ ((vector_size (16)));
   53 typedef int16_t vint16_t __attribute__ ((vector_size (16)));
   54 typedef int32_t vint32_t __attribute__ ((vector_size (16)));
   55 typedef int64_t vint64_t __attribute__ ((vector_size (16)));
   56 #if defined(HAVE_INT128_T)
   57 typedef __uint128_t vint128_t __attribute__ ((vector_size (16)));
   58 #endif
   59 
   60 #define INT128(hi, lo)  (((__uint128_t)hi << 64) | (__uint128_t)lo)
   61 
   62 #define OPS(a, b, c, s, v23, v3) \
   63     a += b;     \
   64     a |= b;     \
   65     a -= b;     \
   66     a &= ~b;    \
   67     a *= c;     \
   68     a = ~a;     \
   69     a *= s;     \
   70     a ^= c;     \
   71     a <<= 1;    \
   72     b >>= 1;    \
   73     b += c;     \
   74     a %= v23;   \
   75     c /= v3;    \
   76     b = b ^ c;  \
   77     c = b ^ c;  \
   78     b = b ^ c;  \
   79 
   80 /*
   81  *  stress_vecmath()
   82  *  stress GCC vector maths
   83  */
   84 static int HOT TARGET_CLONES stress_vecmath(const args_t *args)
   85 {
   86     vint8_t a8 = {
   87         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   88         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
   89     vint8_t b8 = {
   90         0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
   91         0x0f, 0x1e, 0x2d, 0x3c, 0x4b, 0x5a, 0x69, 0x78 };
   92     vint8_t c8 = {
   93         0x01, 0x02, 0x03, 0x02, 0x01, 0x02, 0x03, 0x02,
   94         0x03, 0x02, 0x01, 0x02, 0x03, 0x02, 0x01, 0x02 };
   95     vint8_t s8 = {
   96         0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02,
   97         0x01, 0x01, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02 };
   98     const vint8_t v23_8 = {
   99         0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
  100         0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 };
  101     const vint8_t v3_8 = {
  102         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
  103         0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 };
  104 
  105     vint16_t a16 = {
  106         0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 };
  107     vint16_t b16 = {
  108         0x0123, 0x4567, 0x89ab, 0xcdef, 0x0f1e, 0x2d3c, 0x4b5a, 0x6978 };
  109     vint16_t c16 = {
  110         0x0102, 0x0302, 0x0102, 0x0302, 0x0302, 0x0102, 0x0302, 0x0102 };
  111     vint16_t s16 = {
  112         0x0001, 0x0001, 0x0002, 0x0002, 0x0001, 0x0002, 0x0001, 0x0002 };
  113     const vint16_t v23_16 = {
  114         0x0017, 0x0017, 0x0017, 0x0017, 0x0017, 0x0017, 0x0017, 0x0017 };
  115     const vint16_t v3_16 = {
  116         0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003 };
  117 
  118     vint32_t a32 = {
  119         0x00000000, 0x00000000, 0x00000000, 0x00000000 };
  120     vint32_t b32 = {
  121         0x01234567, 0x89abcdef, 0x0f1e2d3c, 0x4b5a6978 };
  122     vint32_t c32 = {
  123         0x01020302, 0x01020302, 0x03020102, 0x03020102 };
  124     vint32_t s32 = {
  125         0x00000001, 0x00000002, 0x00000002, 0000000001 };
  126     const vint32_t v23_32 = {
  127         0x00000017, 0x00000017, 0x00000017, 0x00000017 };
  128     const vint32_t v3_32 = {
  129         0x00000003, 0x00000003, 0x00000003, 0x00000003 };
  130 
  131     vint64_t a64 = {
  132         0x0000000000000000ULL, 0x0000000000000000ULL };
  133     vint64_t b64 = {
  134         0x0123456789abcdefULL, 0x0f1e2d3c4b5a6979ULL };
  135     vint64_t c64 = {
  136         0x0102030201020302ULL, 0x0302010203020102ULL };
  137     vint64_t s64 = {
  138         0x0000000000000001ULL, 0x0000000000000002ULL };
  139     const vint64_t v23_64 = {
  140         0x0000000000000023ULL, 0x0000000000000023ULL };
  141     const vint64_t v3_64 = {
  142         0x0000000000000003ULL, 0x0000000000000003ULL };
  143 
  144 #if defined(HAVE_INT128_T)
  145     vint128_t a128 = {
  146         INT128(0x0000000000000000ULL, 0x0000000000000000ULL) };
  147     vint128_t b128 = {
  148         INT128(0x0123456789abcdefULL, 0x0f1e2d3c4b5a6979ULL) };
  149     vint128_t c128 = {
  150         INT128(0x0102030201020302ULL, 0x0302010203020102ULL) };
  151     vint128_t s128 = {
  152         INT128(0x0000000000000001ULL, 0x0000000000000002ULL) };
  153     const vint128_t v23_128 = {
  154         INT128(0x0000000000000000ULL, 0x0000000000000023ULL) };
  155     const vint128_t v3_128 = {
  156         INT128(0x0000000000000000ULL, 0x0000000000000003ULL) };
  157 #endif
  158 
  159     do {
  160         int i;
  161         for (i = 1000; i; i--) {
  162             /* Good mix of vector ops */
  163             OPS(a8, b8, c8, s8, v23_8, v3_8);
  164             OPS(a16, b16, c16, s16, v23_16, v3_16);
  165             OPS(a32, b32, c32, s32, v23_32, v3_32);
  166             OPS(a64, b64, c64, s64, v23_64, v3_64);
  167 #if defined(HAVE_INT128_T)
  168             OPS(a128, b128, c128, s128, v23_128, v3_128);
  169 #endif
  170 
  171             OPS(a32, b32, c32, s32, v23_32, v3_32);
  172             OPS(a16, b16, c16, s16, v23_16, v3_16);
  173 #if defined(HAVE_INT128_T)
  174             OPS(a128, b128, c128, s128, v23_128, v3_128);
  175 #endif
  176             OPS(a8, b8, c8, s8, v23_8, v3_8);
  177             OPS(a64, b64, c64, s64, v23_64, v3_64);
  178 
  179             OPS(a8, b8, c8, s8, v23_8, v3_8);
  180             OPS(a8, b8, c8, s8, v23_8, v3_8);
  181             OPS(a8, b8, c8, s8, v23_8, v3_8);
  182             OPS(a8, b8, c8, s8, v23_8, v3_8);
  183 
  184             OPS(a16, b16, c16, s16, v23_16, v3_16);
  185             OPS(a16, b16, c16, s16, v23_16, v3_16);
  186             OPS(a16, b16, c16, s16, v23_16, v3_16);
  187             OPS(a16, b16, c16, s16, v23_16, v3_16);
  188 
  189             OPS(a32, b32, c32, s32, v23_32, v3_32);
  190             OPS(a32, b32, c32, s32, v23_32, v3_32);
  191             OPS(a32, b32, c32, s32, v23_32, v3_32);
  192             OPS(a32, b32, c32, s32, v23_32, v3_32);
  193 
  194             OPS(a64, b64, c64, s64, v23_64, v3_64);
  195             OPS(a64, b64, c64, s64, v23_64, v3_64);
  196             OPS(a64, b64, c64, s64, v23_64, v3_64);
  197             OPS(a64, b64, c64, s64, v23_64, v3_64);
  198 #if defined(HAVE_INT128_T)
  199             OPS(a128, b128, c128, s128, v23_128, v3_128);
  200             OPS(a128, b128, c128, s128, v23_128, v3_128);
  201             OPS(a128, b128, c128, s128, v23_128, v3_128);
  202             OPS(a128, b128, c128, s128, v23_128, v3_128);
  203 #endif
  204         }
  205         inc_counter(args);
  206     } while (keep_stressing());
  207 
  208     /* Forces the compiler to actually compute the terms */
  209     uint64_put(a8[0] + a8[1] + a8[2] + a8[3] +
  210            a8[4] + a8[5] + a8[6] + a8[7] +
  211            a8[8] + a8[9] + a8[10] + a8[11] +
  212            a8[12] + a8[13] + a8[14] + a8[15]);
  213 
  214     uint64_put(a16[0] + a16[1] + a16[2] + a16[3] +
  215            a16[4] + a16[5] + a16[6] + a16[7]);
  216 
  217     uint64_put(a32[0] + a32[1] + a32[2] + a32[3]);
  218 
  219     uint64_put(a64[0] + a64[1]);
  220 
  221 #if defined(HAVE_INT128_T)
  222     uint128_put(a128[0]);
  223 #endif
  224 
  225     return EXIT_SUCCESS;
  226 }
  227 
  228 stressor_info_t stress_vecmath_info = {
  229     .stressor = stress_vecmath,
  230     .class = CLASS_CPU | CLASS_CPU_CACHE
  231 };
  232 #else
  233 stressor_info_t stress_vecmath_info = {
  234     .stressor = stress_not_implemented,
  235     .class = CLASS_CPU | CLASS_CPU_CACHE
  236 };
  237 #endif