"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/swr/rasterizer/common/simdintrin.h" (16 Sep 2020, 13326 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "simdintrin.h" see the Fossies "Dox" file reference documentation.

    1 /****************************************************************************
    2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  ****************************************************************************/
   23 
   24 #ifndef __SWR_SIMDINTRIN_H__
   25 #define __SWR_SIMDINTRIN_H__
   26 
   27 #include "common/intrin.h"
   28 #include "common/simdlib.hpp"
   29 
   30 #if KNOB_SIMD_WIDTH == 8
   31 typedef SIMD256 SIMD;
   32 #else
   33 #error Unsupported vector width
   34 #endif // KNOB_SIMD16_WIDTH == 16
   35 
   36 #define _simd128_maskstore_ps SIMD128::maskstore_ps
   37 #define _simd128_fmadd_ps SIMD128::fmadd_ps
   38 
   39 #define _simd_load_ps SIMD::load_ps
   40 #define _simd_load1_ps SIMD::broadcast_ss
   41 #define _simd_loadu_ps SIMD::loadu_ps
   42 #define _simd_setzero_ps SIMD::setzero_ps
   43 #define _simd_set1_ps SIMD::set1_ps
   44 #define _simd_blend_ps(a, b, i) SIMD::blend_ps<i>(a, b)
   45 #define _simd_blend_epi32(a, b, i) SIMD::blend_epi32<i>(a, b)
   46 #define _simd_blendv_ps SIMD::blendv_ps
   47 #define _simd_store_ps SIMD::store_ps
   48 #define _simd_mul_ps SIMD::mul_ps
   49 #define _simd_add_ps SIMD::add_ps
   50 #define _simd_sub_ps SIMD::sub_ps
   51 #define _simd_rsqrt_ps SIMD::rsqrt_ps
   52 #define _simd_min_ps SIMD::min_ps
   53 #define _simd_max_ps SIMD::max_ps
   54 #define _simd_movemask_ps SIMD::movemask_ps
   55 #define _simd_cvtps_epi32 SIMD::cvtps_epi32
   56 #define _simd_cvttps_epi32 SIMD::cvttps_epi32
   57 #define _simd_cvtepi32_ps SIMD::cvtepi32_ps
   58 #define _simd_cmplt_ps SIMD::cmplt_ps
   59 #define _simd_cmpgt_ps SIMD::cmpgt_ps
   60 #define _simd_cmpneq_ps SIMD::cmpneq_ps
   61 #define _simd_cmpeq_ps SIMD::cmpeq_ps
   62 #define _simd_cmpge_ps SIMD::cmpge_ps
   63 #define _simd_cmple_ps SIMD::cmple_ps
   64 #define _simd_cmp_ps(a, b, imm) SIMD::cmp_ps<SIMD::CompareType(imm)>(a, b)
   65 #define _simd_and_ps SIMD::and_ps
   66 #define _simd_or_ps SIMD::or_ps
   67 #define _simd_rcp_ps SIMD::rcp_ps
   68 #define _simd_div_ps SIMD::div_ps
   69 #define _simd_castsi_ps SIMD::castsi_ps
   70 #define _simd_castps_pd SIMD::castps_pd
   71 #define _simd_castpd_ps SIMD::castpd_ps
   72 #define _simd_andnot_ps SIMD::andnot_ps
   73 #define _simd_round_ps(a, i) SIMD::round_ps<SIMD::RoundMode(i)>(a)
   74 #define _simd_castpd_ps SIMD::castpd_ps
   75 #define _simd_broadcast_ps(a) SIMD::broadcast_ps((SIMD128::Float const*)(a))
   76 #define _simd_stream_ps SIMD::stream_ps
   77 
   78 #define _simd_movemask_pd SIMD::movemask_pd
   79 #define _simd_castsi_pd SIMD::castsi_pd
   80 
   81 #define _simd_mul_epi32 SIMD::mul_epi32
   82 #define _simd_mullo_epi32 SIMD::mullo_epi32
   83 #define _simd_sub_epi32 SIMD::sub_epi32
   84 #define _simd_sub_epi64 SIMD::sub_epi64
   85 #define _simd_min_epi32 SIMD::min_epi32
   86 #define _simd_min_epu32 SIMD::min_epu32
   87 #define _simd_max_epi32 SIMD::max_epi32
   88 #define _simd_max_epu32 SIMD::max_epu32
   89 #define _simd_add_epi32 SIMD::add_epi32
   90 #define _simd_and_si SIMD::and_si
   91 #define _simd_andnot_si SIMD::andnot_si
   92 #define _simd_cmpeq_epi32 SIMD::cmpeq_epi32
   93 #define _simd_cmplt_epi32 SIMD::cmplt_epi32
   94 #define _simd_cmpgt_epi32 SIMD::cmpgt_epi32
   95 #define _simd_or_si SIMD::or_si
   96 #define _simd_xor_si SIMD::xor_si
   97 #define _simd_castps_si SIMD::castps_si
   98 #define _simd_adds_epu8 SIMD::adds_epu8
   99 #define _simd_subs_epu8 SIMD::subs_epu8
  100 #define _simd_add_epi8 SIMD::add_epi8
  101 #define _simd_cmpeq_epi64 SIMD::cmpeq_epi64
  102 #define _simd_cmpgt_epi64 SIMD::cmpgt_epi64
  103 #define _simd_cmpgt_epi8 SIMD::cmpgt_epi8
  104 #define _simd_cmpeq_epi8 SIMD::cmpeq_epi8
  105 #define _simd_cmpgt_epi16 SIMD::cmpgt_epi16
  106 #define _simd_cmpeq_epi16 SIMD::cmpeq_epi16
  107 #define _simd_movemask_epi8 SIMD::movemask_epi8
  108 #define _simd_permute_ps_i(a, i) SIMD::permute_ps<i>(a)
  109 #define _simd_permute_ps SIMD::permute_ps
  110 #define _simd_permute_epi32 SIMD::permute_epi32
  111 #define _simd_srlv_epi32 SIMD::srlv_epi32
  112 #define _simd_sllv_epi32 SIMD::sllv_epi32
  113 
  114 #define _simd_unpacklo_epi8 SIMD::unpacklo_epi8
  115 #define _simd_unpackhi_epi8 SIMD::unpackhi_epi8
  116 #define _simd_unpacklo_epi16 SIMD::unpacklo_epi16
  117 #define _simd_unpackhi_epi16 SIMD::unpackhi_epi16
  118 #define _simd_unpacklo_epi32 SIMD::unpacklo_epi32
  119 #define _simd_unpackhi_epi32 SIMD::unpackhi_epi32
  120 #define _simd_unpacklo_epi64 SIMD::unpacklo_epi64
  121 #define _simd_unpackhi_epi64 SIMD::unpackhi_epi64
  122 
  123 #define _simd_slli_epi32(a, i) SIMD::slli_epi32<i>(a)
  124 #define _simd_srai_epi32(a, i) SIMD::srai_epi32<i>(a)
  125 #define _simd_srli_epi32(a, i) SIMD::srli_epi32<i>(a)
  126 #define _simd_srlisi_ps(a, i) SIMD::srlisi_ps<i>(a)
  127 
  128 #define _simd_fmadd_ps SIMD::fmadd_ps
  129 #define _simd_fmsub_ps SIMD::fmsub_ps
  130 #define _simd_shuffle_epi8 SIMD::shuffle_epi8
  131 
  132 #define _simd_i32gather_ps(p, o, s) SIMD::i32gather_ps<SIMD::ScaleFactor(s)>(p, o)
  133 #define _simd_mask_i32gather_ps(r, p, o, m, s) \
  134     SIMD::mask_i32gather_ps<SIMD::ScaleFactor(s)>(r, p, o, m)
  135 #define _simd_abs_epi32 SIMD::abs_epi32
  136 
  137 #define _simd_cvtepu8_epi16 SIMD::cvtepu8_epi16
  138 #define _simd_cvtepu8_epi32 SIMD::cvtepu8_epi32
  139 #define _simd_cvtepu16_epi32 SIMD::cvtepu16_epi32
  140 #define _simd_cvtepu16_epi64 SIMD::cvtepu16_epi64
  141 #define _simd_cvtepu32_epi64 SIMD::cvtepu32_epi64
  142 
  143 #define _simd_packus_epi16 SIMD::packus_epi16
  144 #define _simd_packs_epi16 SIMD::packs_epi16
  145 #define _simd_packus_epi32 SIMD::packus_epi32
  146 #define _simd_packs_epi32 SIMD::packs_epi32
  147 
  148 #define _simd_unpacklo_ps SIMD::unpacklo_ps
  149 #define _simd_unpackhi_ps SIMD::unpackhi_ps
  150 #define _simd_unpacklo_pd SIMD::unpacklo_pd
  151 #define _simd_unpackhi_pd SIMD::unpackhi_pd
  152 #define _simd_insertf128_ps SIMD::insertf128_ps
  153 #define _simd_insertf128_pd SIMD::insertf128_pd
  154 #define _simd_insertf128_si(a, b, i) SIMD::insertf128_si<i>(a, b)
  155 #define _simd_extractf128_ps(a, i) SIMD::extractf128_ps<i>(a)
  156 #define _simd_extractf128_pd(a, i) SIMD::extractf128_pd<i>(a)
  157 #define _simd_extractf128_si(a, i) SIMD::extractf128_si<i>(a)
  158 #define _simd_permute2f128_ps(a, b, i) SIMD::permute2f128_ps<i>(a, b)
  159 #define _simd_permute2f128_pd(a, b, i) SIMD::permute2f128_pd<i>(a, b)
  160 #define _simd_permute2f128_si(a, b, i) SIMD::permute2f128_si<i>(a, b)
  161 #define _simd_shuffle_ps(a, b, i) SIMD::shuffle_ps<i>(a, b)
  162 #define _simd_shuffle_pd(a, b, i) SIMD::shuffle_pd<i>(a, b)
  163 #define _simd_shuffle_epi32(a, b, imm8) SIMD::shuffle_epi32<imm8>(a, b)
  164 #define _simd_shuffle_epi64(a, b, imm8) SIMD::shuffle_epi64<imm8>(a, b)
  165 #define _simd_set1_epi32 SIMD::set1_epi32
  166 #define _simd_set_epi32 SIMD::set_epi32
  167 #define _simd_set_ps SIMD::set_ps
  168 #define _simd_set1_epi8 SIMD::set1_epi8
  169 #define _simd_setzero_si SIMD::setzero_si
  170 #define _simd_cvttps_epi32 SIMD::cvttps_epi32
  171 #define _simd_store_si SIMD::store_si
  172 #define _simd_broadcast_ss SIMD::broadcast_ss
  173 #define _simd_maskstore_ps SIMD::maskstore_ps
  174 #define _simd_load_si SIMD::load_si
  175 #define _simd_loadu_si SIMD::loadu_si
  176 #define _simd_sub_ps SIMD::sub_ps
  177 #define _simd_testz_ps SIMD::testz_ps
  178 #define _simd_testz_si SIMD::testz_si
  179 #define _simd_xor_ps SIMD::xor_ps
  180 
  181 #define _simd_loadu2_si SIMD::loadu2_si
  182 #define _simd_storeu2_si SIMD::storeu2_si
  183 
  184 #define _simd_blendv_epi32 SIMD::blendv_epi32
  185 #define _simd_vmask_ps SIMD::vmask_ps
  186 
  187 template <int mask>
  188 SIMDINLINE SIMD128::Integer _simd_blend4_epi32(SIMD128::Integer const& a, SIMD128::Integer const& b)
  189 {
  190     return SIMD128::castps_si(
  191         SIMD128::blend_ps<mask>(SIMD128::castsi_ps(a), SIMD128::castsi_ps(b)));
  192 }
  193 
  194 //////////////////////////////////////////////////////////////////////////
  195 /// @brief Compute plane equation vA * vX + vB * vY + vC
  196 SIMDINLINE simdscalar vplaneps(simdscalar const& vA,
  197                                simdscalar const& vB,
  198                                simdscalar const& vC,
  199                                simdscalar const& vX,
  200                                simdscalar const& vY)
  201 {
  202     simdscalar vOut = _simd_fmadd_ps(vA, vX, vC);
  203     vOut            = _simd_fmadd_ps(vB, vY, vOut);
  204     return vOut;
  205 }
  206 
  207 //////////////////////////////////////////////////////////////////////////
  208 /// @brief Compute plane equation vA * vX + vB * vY + vC
  209 SIMDINLINE simd4scalar vplaneps(simd4scalar const& vA,
  210                                 simd4scalar const& vB,
  211                                 simd4scalar const& vC,
  212                                 simd4scalar const& vX,
  213                                 simd4scalar const& vY)
  214 {
  215     simd4scalar vOut = _simd128_fmadd_ps(vA, vX, vC);
  216     vOut             = _simd128_fmadd_ps(vB, vY, vOut);
  217     return vOut;
  218 }
  219 
  220 //////////////////////////////////////////////////////////////////////////
  221 /// @brief Interpolates a single component.
  222 /// @param vI - barycentric I
  223 /// @param vJ - barycentric J
  224 /// @param pInterpBuffer - pointer to attribute barycentric coeffs
  225 template <UINT Attrib, UINT Comp, UINT numComponents = 4>
  226 static SIMDINLINE simdscalar InterpolateComponent(simdscalar const& vI,
  227                                                   simdscalar const& vJ,
  228                                                   const float*      pInterpBuffer)
  229 {
  230     const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
  231     const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
  232     const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
  233 
  234     if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))
  235     {
  236         // Ensure constant attribs are constant.  Required for proper
  237         // 3D resource copies.
  238         return _simd_broadcast_ss(pInterpA);
  239     }
  240 
  241     simdscalar vA = _simd_broadcast_ss(pInterpA);
  242     simdscalar vB = _simd_broadcast_ss(pInterpB);
  243     simdscalar vC = _simd_broadcast_ss(pInterpC);
  244 
  245     simdscalar vk = _simd_sub_ps(_simd_sub_ps(_simd_set1_ps(1.0f), vI), vJ);
  246     vC            = _simd_mul_ps(vk, vC);
  247 
  248     return vplaneps(vA, vB, vC, vI, vJ);
  249 }
  250 
  251 //////////////////////////////////////////////////////////////////////////
  252 /// @brief Interpolates a single component (flat shade).
  253 /// @param pInterpBuffer - pointer to attribute barycentric coeffs
  254 template <UINT Attrib, UINT Comp, UINT numComponents = 4>
  255 static SIMDINLINE simdscalar InterpolateComponentFlat(const float* pInterpBuffer)
  256 {
  257     const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
  258 
  259     simdscalar vA = _simd_broadcast_ss(pInterpA);
  260 
  261     return vA;
  262 }
  263 
  264 //////////////////////////////////////////////////////////////////////////
  265 /// @brief Interpolates a single component (flat shade).
  266 /// @param pInterpBuffer - pointer to attribute barycentric coeffs
  267 template <UINT Attrib, UINT Comp, UINT numComponents = 4>
  268 static SIMDINLINE simdscalari InterpolateComponentFlatInt(const uint32_t* pInterpBuffer)
  269 {
  270     const uint32_t interpA = pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
  271 
  272     simdscalari vA = _simd_set1_epi32(interpA);
  273 
  274     return vA;
  275 }
  276 
  277 //////////////////////////////////////////////////////////////////////////
  278 /// @brief Interpolates a single component.
  279 /// @param vI - barycentric I
  280 /// @param vJ - barycentric J
  281 /// @param pInterpBuffer - pointer to attribute barycentric coeffs
  282 template <UINT Attrib, UINT Comp, UINT numComponents = 4>
  283 static SIMDINLINE simd4scalar InterpolateComponent(simd4scalar const& vI,
  284                                                    simd4scalar const& vJ,
  285                                                    const float*       pInterpBuffer)
  286 {
  287     const float* pInterpA = &pInterpBuffer[Attrib * 3 * numComponents + 0 + Comp];
  288     const float* pInterpB = &pInterpBuffer[Attrib * 3 * numComponents + numComponents + Comp];
  289     const float* pInterpC = &pInterpBuffer[Attrib * 3 * numComponents + numComponents * 2 + Comp];
  290 
  291     if ((pInterpA[0] == pInterpB[0]) && (pInterpA[0] == pInterpC[0]))
  292     {
  293         // Ensure constant attribs are constant.  Required for proper
  294         // 3D resource copies.
  295         return SIMD128::broadcast_ss(pInterpA);
  296     }
  297 
  298     simd4scalar vA = SIMD128::broadcast_ss(pInterpA);
  299     simd4scalar vB = SIMD128::broadcast_ss(pInterpB);
  300     simd4scalar vC = SIMD128::broadcast_ss(pInterpC);
  301 
  302     simd4scalar vk = SIMD128::sub_ps(SIMD128::sub_ps(SIMD128::set1_ps(1.0f), vI), vJ);
  303     vC             = SIMD128::mul_ps(vk, vC);
  304 
  305     return vplaneps(vA, vB, vC, vI, vJ);
  306 }
  307 
  308 static SIMDINLINE simd4scalar _simd128_abs_ps(simd4scalar const& a)
  309 {
  310     simd4scalari ai = SIMD128::castps_si(a);
  311     return SIMD128::castsi_ps(SIMD128::and_si(ai, SIMD128::set1_epi32(0x7fffffff)));
  312 }
  313 
  314 static SIMDINLINE simdscalar _simd_abs_ps(simdscalar const& a)
  315 {
  316     simdscalari ai = _simd_castps_si(a);
  317     return _simd_castsi_ps(_simd_and_si(ai, _simd_set1_epi32(0x7fffffff)));
  318 }
  319 
  320 #include "simd16intrin.h"
  321 
  322 #endif //__SWR_SIMDINTRIN_H__