"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp" (16 Sep 2020, 15541 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "simdlib_types.hpp" see the Fossies "Dox" file reference documentation.

    1 /****************************************************************************
    2  * Copyright (C) 2017 Intel Corporation.   All Rights Reserved.
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  ****************************************************************************/
   23 #pragma once
   24 
   25 #if !defined(__cplusplus)
   26 #error C++ compilation required
   27 #endif
   28 
   29 #include <immintrin.h>
   30 #include <inttypes.h>
   31 #include <stdint.h>
   32 
   33 #define SIMD_ARCH_AVX 0
   34 #define SIMD_ARCH_AVX2 1
   35 #define SIMD_ARCH_AVX512 2
   36 
   37 #if !defined(SIMD_ARCH)
   38 #define SIMD_ARCH SIMD_ARCH_AVX
   39 #endif
   40 
   41 #if defined(_MSC_VER)
   42 #define SIMDCALL __vectorcall
   43 #define SIMDINLINE __forceinline
   44 #define SIMDALIGN(type_, align_) __declspec(align(align_)) type_
   45 #else
   46 #define SIMDCALL
   47 #define SIMDINLINE inline
   48 #define SIMDALIGN(type_, align_) type_ __attribute__((aligned(align_)))
   49 #endif
   50 
   51 // For documentation, please see the following include...
   52 // #include "simdlib_interface.hpp"
   53 
   54 namespace SIMDImpl
   55 {
   56     enum class CompareType
   57     {
   58         EQ_OQ    = 0x00, // Equal (ordered, nonsignaling)
   59         LT_OS    = 0x01, // Less-than (ordered, signaling)
   60         LE_OS    = 0x02, // Less-than-or-equal (ordered, signaling)
   61         UNORD_Q  = 0x03, // Unordered (nonsignaling)
   62         NEQ_UQ   = 0x04, // Not-equal (unordered, nonsignaling)
   63         NLT_US   = 0x05, // Not-less-than (unordered, signaling)
   64         NLE_US   = 0x06, // Not-less-than-or-equal (unordered, signaling)
   65         ORD_Q    = 0x07, // Ordered (nonsignaling)
   66         EQ_UQ    = 0x08, // Equal (unordered, non-signaling)
   67         NGE_US   = 0x09, // Not-greater-than-or-equal (unordered, signaling)
   68         NGT_US   = 0x0A, // Not-greater-than (unordered, signaling)
   69         FALSE_OQ = 0x0B, // False (ordered, nonsignaling)
   70         NEQ_OQ   = 0x0C, // Not-equal (ordered, non-signaling)
   71         GE_OS    = 0x0D, // Greater-than-or-equal (ordered, signaling)
   72         GT_OS    = 0x0E, // Greater-than (ordered, signaling)
   73         TRUE_UQ  = 0x0F, // True (unordered, non-signaling)
   74         EQ_OS    = 0x10, // Equal (ordered, signaling)
   75         LT_OQ    = 0x11, // Less-than (ordered, nonsignaling)
   76         LE_OQ    = 0x12, // Less-than-or-equal (ordered, nonsignaling)
   77         UNORD_S  = 0x13, // Unordered (signaling)
   78         NEQ_US   = 0x14, // Not-equal (unordered, signaling)
   79         NLT_UQ   = 0x15, // Not-less-than (unordered, nonsignaling)
   80         NLE_UQ   = 0x16, // Not-less-than-or-equal (unordered, nonsignaling)
   81         ORD_S    = 0x17, // Ordered (signaling)
   82         EQ_US    = 0x18, // Equal (unordered, signaling)
   83         NGE_UQ   = 0x19, // Not-greater-than-or-equal (unordered, nonsignaling)
   84         NGT_UQ   = 0x1A, // Not-greater-than (unordered, nonsignaling)
   85         FALSE_OS = 0x1B, // False (ordered, signaling)
   86         NEQ_OS   = 0x1C, // Not-equal (ordered, signaling)
   87         GE_OQ    = 0x1D, // Greater-than-or-equal (ordered, nonsignaling)
   88         GT_OQ    = 0x1E, // Greater-than (ordered, nonsignaling)
   89         TRUE_US  = 0x1F, // True (unordered, signaling)
   90     };
   91 
   92 #if SIMD_ARCH >= SIMD_ARCH_AVX512
   93     enum class CompareTypeInt
   94     {
   95         EQ = _MM_CMPINT_EQ, // Equal
   96         LT = _MM_CMPINT_LT, // Less than
   97         LE = _MM_CMPINT_LE, // Less than or Equal
   98         NE = _MM_CMPINT_NE, // Not Equal
   99         GE = _MM_CMPINT_GE, // Greater than or Equal
  100         GT = _MM_CMPINT_GT, // Greater than
  101     };
  102 #endif // SIMD_ARCH >= SIMD_ARCH_AVX512
  103 
  104     enum class ScaleFactor
  105     {
  106         SF_1 = 1, // No scaling
  107         SF_2 = 2, // Scale offset by 2
  108         SF_4 = 4, // Scale offset by 4
  109         SF_8 = 8, // Scale offset by 8
  110     };
  111 
  112     enum class RoundMode
  113     {
  114         TO_NEAREST_INT = 0x00, // Round to nearest integer == TRUNCATE(value + 0.5)
  115         TO_NEG_INF     = 0x01, // Round to negative infinity
  116         TO_POS_INF     = 0x02, // Round to positive infinity
  117         TO_ZERO        = 0x03, // Round to 0 a.k.a. truncate
  118         CUR_DIRECTION  = 0x04, // Round in direction set in MXCSR register
  119 
  120         RAISE_EXC = 0x00, // Raise exception on overflow
  121         NO_EXC    = 0x08, // Suppress exceptions
  122 
  123         NINT        = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(RAISE_EXC),
  124         NINT_NOEXC  = static_cast<int>(TO_NEAREST_INT) | static_cast<int>(NO_EXC),
  125         FLOOR       = static_cast<int>(TO_NEG_INF) | static_cast<int>(RAISE_EXC),
  126         FLOOR_NOEXC = static_cast<int>(TO_NEG_INF) | static_cast<int>(NO_EXC),
  127         CEIL        = static_cast<int>(TO_POS_INF) | static_cast<int>(RAISE_EXC),
  128         CEIL_NOEXC  = static_cast<int>(TO_POS_INF) | static_cast<int>(NO_EXC),
  129         TRUNC       = static_cast<int>(TO_ZERO) | static_cast<int>(RAISE_EXC),
  130         TRUNC_NOEXC = static_cast<int>(TO_ZERO) | static_cast<int>(NO_EXC),
  131         RINT        = static_cast<int>(CUR_DIRECTION) | static_cast<int>(RAISE_EXC),
  132         NEARBYINT   = static_cast<int>(CUR_DIRECTION) | static_cast<int>(NO_EXC),
  133     };
  134 
  135     struct Traits
  136     {
  137         using CompareType = SIMDImpl::CompareType;
  138         using ScaleFactor = SIMDImpl::ScaleFactor;
  139         using RoundMode   = SIMDImpl::RoundMode;
  140     };
  141 
  142     // Attribute, 4-dimensional attribute in SIMD SOA layout
  143     template <typename Float, typename Integer, typename Double>
  144     union Vec4
  145     {
  146         Float   v[4];
  147         Integer vi[4];
  148         Double  vd[4];
  149         struct
  150         {
  151             Float x;
  152             Float y;
  153             Float z;
  154             Float w;
  155         };
  156         SIMDINLINE Float& SIMDCALL operator[](const int i) { return v[i]; }
  157         SIMDINLINE Float const& SIMDCALL operator[](const int i) const { return v[i]; }
  158         SIMDINLINE Vec4& SIMDCALL operator=(Vec4 const& in)
  159         {
  160             v[0] = in.v[0];
  161             v[1] = in.v[1];
  162             v[2] = in.v[2];
  163             v[3] = in.v[3];
  164             return *this;
  165         }
  166     };
  167 
  168     namespace SIMD128Impl
  169     {
  170         union Float
  171         {
  172             SIMDINLINE Float() = default;
  173             SIMDINLINE Float(__m128 in) : v(in) {}
  174             SIMDINLINE Float& SIMDCALL operator=(__m128 in)
  175             {
  176                 v = in;
  177                 return *this;
  178             }
  179             SIMDINLINE Float& SIMDCALL operator=(Float const& in)
  180             {
  181                 v = in.v;
  182                 return *this;
  183             }
  184             SIMDINLINE SIMDCALL operator __m128() const { return v; }
  185 
  186             SIMDALIGN(__m128, 16) v;
  187         };
  188 
  189         union Integer
  190         {
  191             SIMDINLINE Integer() = default;
  192             SIMDINLINE Integer(__m128i in) : v(in) {}
  193             SIMDINLINE Integer& SIMDCALL operator=(__m128i in)
  194             {
  195                 v = in;
  196                 return *this;
  197             }
  198             SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
  199             {
  200                 v = in.v;
  201                 return *this;
  202             }
  203             SIMDINLINE SIMDCALL operator __m128i() const { return v; }
  204 
  205             SIMDALIGN(__m128i, 16) v;
  206         };
  207 
  208         union Double
  209         {
  210             SIMDINLINE Double() = default;
  211             SIMDINLINE Double(__m128d in) : v(in) {}
  212             SIMDINLINE Double& SIMDCALL operator=(__m128d in)
  213             {
  214                 v = in;
  215                 return *this;
  216             }
  217             SIMDINLINE Double& SIMDCALL operator=(Double const& in)
  218             {
  219                 v = in.v;
  220                 return *this;
  221             }
  222             SIMDINLINE SIMDCALL operator __m128d() const { return v; }
  223 
  224             SIMDALIGN(__m128d, 16) v;
  225         };
  226 
  227         using Vec4 = SIMDImpl::Vec4<Float, Integer, Double>;
  228         using Mask = uint8_t;
  229 
  230         static const uint32_t SIMD_WIDTH = 4;
  231     } // namespace SIMD128Impl
  232 
  233     namespace SIMD256Impl
  234     {
  235         union Float
  236         {
  237             SIMDINLINE Float() = default;
  238             SIMDINLINE Float(__m256 in) : v(in) {}
  239             SIMDINLINE Float(SIMD128Impl::Float const& in_lo,
  240                              SIMD128Impl::Float const& in_hi = _mm_setzero_ps())
  241             {
  242                 v = _mm256_insertf128_ps(_mm256_castps128_ps256(in_lo), in_hi, 0x1);
  243             }
  244             SIMDINLINE Float& SIMDCALL operator=(__m256 in)
  245             {
  246                 v = in;
  247                 return *this;
  248             }
  249             SIMDINLINE Float& SIMDCALL operator=(Float const& in)
  250             {
  251                 v = in.v;
  252                 return *this;
  253             }
  254             SIMDINLINE SIMDCALL operator __m256() const { return v; }
  255 
  256             SIMDALIGN(__m256, 32) v;
  257             SIMD128Impl::Float v4[2];
  258         };
  259 
  260         union Integer
  261         {
  262             SIMDINLINE Integer() = default;
  263             SIMDINLINE Integer(__m256i in) : v(in) {}
  264             SIMDINLINE Integer(SIMD128Impl::Integer const& in_lo,
  265                                SIMD128Impl::Integer const& in_hi = _mm_setzero_si128())
  266             {
  267                 v = _mm256_insertf128_si256(_mm256_castsi128_si256(in_lo), in_hi, 0x1);
  268             }
  269             SIMDINLINE Integer& SIMDCALL operator=(__m256i in)
  270             {
  271                 v = in;
  272                 return *this;
  273             }
  274             SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
  275             {
  276                 v = in.v;
  277                 return *this;
  278             }
  279             SIMDINLINE SIMDCALL operator __m256i() const { return v; }
  280 
  281             SIMDALIGN(__m256i, 32) v;
  282             SIMD128Impl::Integer v4[2];
  283         };
  284 
  285         union Double
  286         {
  287             SIMDINLINE Double() = default;
  288             SIMDINLINE Double(__m256d const& in) : v(in) {}
  289             SIMDINLINE Double(SIMD128Impl::Double const& in_lo,
  290                               SIMD128Impl::Double const& in_hi = _mm_setzero_pd())
  291             {
  292                 v = _mm256_insertf128_pd(_mm256_castpd128_pd256(in_lo), in_hi, 0x1);
  293             }
  294             SIMDINLINE Double& SIMDCALL operator=(__m256d in)
  295             {
  296                 v = in;
  297                 return *this;
  298             }
  299             SIMDINLINE Double& SIMDCALL operator=(Double const& in)
  300             {
  301                 v = in.v;
  302                 return *this;
  303             }
  304             SIMDINLINE SIMDCALL operator __m256d() const { return v; }
  305 
  306             SIMDALIGN(__m256d, 32) v;
  307             SIMD128Impl::Double v4[2];
  308         };
  309 
  310         using Vec4 = SIMDImpl::Vec4<Float, Integer, Double>;
  311         using Mask = uint8_t;
  312 
  313         static const uint32_t SIMD_WIDTH = 8;
  314     } // namespace SIMD256Impl
  315 
  316     namespace SIMD512Impl
  317     {
  318 #if !(defined(__AVX512F__) || defined(_ZMMINTRIN_H_INCLUDED))
  319         // Define AVX512 types if not included via immintrin.h.
  320         // All data members of these types are ONLY to viewed
  321         // in a debugger.  Do NOT access them via code!
  322         union __m512
  323         {
  324         private:
  325             float m512_f32[16];
  326         };
  327         struct __m512d
  328         {
  329         private:
  330             double m512d_f64[8];
  331         };
  332 
  333         union __m512i
  334         {
  335         private:
  336             int8_t   m512i_i8[64];
  337             int16_t  m512i_i16[32];
  338             int32_t  m512i_i32[16];
  339             int64_t  m512i_i64[8];
  340             uint8_t  m512i_u8[64];
  341             uint16_t m512i_u16[32];
  342             uint32_t m512i_u32[16];
  343             uint64_t m512i_u64[8];
  344         };
  345 
  346         using __mmask16 = uint16_t;
  347 #endif
  348 
  349 #if defined(__INTEL_COMPILER) || (SIMD_ARCH >= SIMD_ARCH_AVX512)
  350 #define SIMD_ALIGNMENT_BYTES 64
  351 #else
  352 #define SIMD_ALIGNMENT_BYTES 32
  353 #endif
  354 
  355         union Float
  356         {
  357             SIMDINLINE Float() = default;
  358             SIMDINLINE Float(__m512 in) : v(in) {}
  359             SIMDINLINE Float(SIMD256Impl::Float const& in_lo,
  360                              SIMD256Impl::Float const& in_hi = _mm256_setzero_ps())
  361             {
  362                 v8[0] = in_lo;
  363                 v8[1] = in_hi;
  364             }
  365             SIMDINLINE Float& SIMDCALL operator=(__m512 in)
  366             {
  367                 v = in;
  368                 return *this;
  369             }
  370             SIMDINLINE Float& SIMDCALL operator=(Float const& in)
  371             {
  372 #if SIMD_ARCH >= SIMD_ARCH_AVX512
  373                 v = in.v;
  374 #else
  375                 v8[0] = in.v8[0];
  376                 v8[1] = in.v8[1];
  377 #endif
  378                 return *this;
  379             }
  380             SIMDINLINE SIMDCALL operator __m512() const { return v; }
  381 
  382             SIMDALIGN(__m512, SIMD_ALIGNMENT_BYTES) v;
  383             SIMD256Impl::Float v8[2];
  384         };
  385 
  386         union Integer
  387         {
  388             SIMDINLINE Integer() = default;
  389             SIMDINLINE Integer(__m512i in) : v(in) {}
  390             SIMDINLINE Integer(SIMD256Impl::Integer const& in_lo,
  391                                SIMD256Impl::Integer const& in_hi = _mm256_setzero_si256())
  392             {
  393                 v8[0] = in_lo;
  394                 v8[1] = in_hi;
  395             }
  396             SIMDINLINE Integer& SIMDCALL operator=(__m512i in)
  397             {
  398                 v = in;
  399                 return *this;
  400             }
  401             SIMDINLINE Integer& SIMDCALL operator=(Integer const& in)
  402             {
  403 #if SIMD_ARCH >= SIMD_ARCH_AVX512
  404                 v = in.v;
  405 #else
  406                 v8[0] = in.v8[0];
  407                 v8[1] = in.v8[1];
  408 #endif
  409                 return *this;
  410             }
  411 
  412             SIMDINLINE SIMDCALL operator __m512i() const { return v; }
  413 
  414             SIMDALIGN(__m512i, SIMD_ALIGNMENT_BYTES) v;
  415             SIMD256Impl::Integer v8[2];
  416         };
  417 
  418         union Double
  419         {
  420             SIMDINLINE Double() = default;
  421             SIMDINLINE Double(__m512d in) : v(in) {}
  422             SIMDINLINE Double(SIMD256Impl::Double const& in_lo,
  423                               SIMD256Impl::Double const& in_hi = _mm256_setzero_pd())
  424             {
  425                 v8[0] = in_lo;
  426                 v8[1] = in_hi;
  427             }
  428             SIMDINLINE Double& SIMDCALL operator=(__m512d in)
  429             {
  430                 v = in;
  431                 return *this;
  432             }
  433             SIMDINLINE Double& SIMDCALL operator=(Double const& in)
  434             {
  435 #if SIMD_ARCH >= SIMD_ARCH_AVX512
  436                 v = in.v;
  437 #else
  438                 v8[0] = in.v8[0];
  439                 v8[1] = in.v8[1];
  440 #endif
  441                 return *this;
  442             }
  443 
  444             SIMDINLINE SIMDCALL operator __m512d() const { return v; }
  445 
  446             SIMDALIGN(__m512d, SIMD_ALIGNMENT_BYTES) v;
  447             SIMD256Impl::Double v8[2];
  448         };
  449 
  450         typedef SIMDImpl::Vec4<Float, Integer, Double> SIMDALIGN(Vec4, 64);
  451         using Mask = __mmask16;
  452 
  453         static const uint32_t SIMD_WIDTH = 16;
  454 
  455 #undef SIMD_ALIGNMENT_BYTES
  456     } // namespace SIMD512Impl
  457 } // namespace SIMDImpl