"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/swr/rasterizer/core/backend_singlesample.cpp" (16 Sep 2020, 18551 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "backend_singlesample.cpp" see the Fossies "Dox" file reference documentation.

    1 /****************************************************************************
    2  * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  *
   23  * @file backend.cpp
   24  *
   25  * @brief Backend handles rasterization, pixel shading and output merger
   26  *        operations.
   27  *
   28  ******************************************************************************/
   29 
   30 #include <smmintrin.h>
   31 
   32 #include "backend.h"
   33 #include "backend_impl.h"
   34 #include "tilemgr.h"
   35 #include "memory/tilingtraits.h"
   36 #include "core/multisample.h"
   37 
   38 #include <algorithm>
   39 
   40 template <typename T>
   41 void BackendSingleSample(DRAW_CONTEXT*        pDC,
   42                          uint32_t             workerId,
   43                          uint32_t             x,
   44                          uint32_t             y,
   45                          SWR_TRIANGLE_DESC&   work,
   46                          RenderOutputBuffers& renderBuffers)
   47 {
   48     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESingleSampleBackend, pDC->drawId);
   49     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
   50 
   51     void* pWorkerData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
   52 
   53     const API_STATE& state = GetApiState(pDC);
   54 
   55     BarycentricCoeffs coeffs;
   56     SetupBarycentricCoeffs(&coeffs, work);
   57 
   58     SWR_PS_CONTEXT             psContext;
   59     const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
   60     SetupPixelShaderContext<T>(&psContext, samplePos, work);
   61 
   62     uint8_t *pDepthBuffer, *pStencilBuffer;
   63     SetupRenderBuffers(psContext.pColorBuffer,
   64                        &pDepthBuffer,
   65                        &pStencilBuffer,
   66                        state.colorHottileEnable,
   67                        renderBuffers);
   68 
   69     // Indicates backend rendered something to the color buffer
   70     bool isTileDirty = false;
   71 
   72     RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 1);
   73 
   74     psContext.vY.UL     = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
   75     psContext.vY.center = _simd_add_ps(vCenterOffsetsY, _simd_set1_ps(static_cast<float>(y)));
   76 
   77     const simdscalar dy = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
   78 
   79     for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
   80     {
   81         psContext.vX.UL     = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
   82         psContext.vX.center = _simd_add_ps(vCenterOffsetsX, _simd_set1_ps(static_cast<float>(x)));
   83 
   84         const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
   85 
   86         for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
   87         {
   88             const bool useAlternateOffset = ((xx & SIMD_TILE_X_DIM) != 0);
   89 
   90 
   91             simdmask coverageMask = work.coverageMask[0] & MASK;
   92 
   93             if (coverageMask)
   94             {
   95                 if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
   96                 {
   97                     static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
   98                                   "Unsupported depth hot tile format");
   99 
  100                     const simdscalar z =
  101                         _simd_load_ps(reinterpret_cast<const float*>(pDepthBuffer));
  102 
  103                     const float minz = state.depthBoundsState.depthBoundsTestMinValue;
  104                     const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
  105 
  106                     coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
  107                 }
  108 
  109                 if (T::InputCoverage != SWR_INPUT_COVERAGE_NONE)
  110                 {
  111                     const uint64_t* pCoverageMask =
  112                         (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
  113                             ? &work.innerCoverageMask
  114                             : &work.coverageMask[0];
  115 
  116                     generateInputCoverage<T, T::InputCoverage>(
  117                         pCoverageMask, psContext.inputMask, state.blendState.sampleMask);
  118                 }
  119 
  120                 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
  121 
  122                 CalcPixelBarycentrics(coeffs, psContext);
  123 
  124                 CalcCentroid<T, true>(
  125                     &psContext, samplePos, coeffs, work.coverageMask, state.blendState.sampleMask);
  126 
  127                 // interpolate and quantize z
  128                 psContext.vZ = vplaneps(
  129                     coeffs.vZa, coeffs.vZb, coeffs.vZc, psContext.vI.center, psContext.vJ.center);
  130                 psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
  131 
  132                 RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 1);
  133 
  134                 // interpolate user clip distance if available
  135                 if (state.backendState.clipDistanceMask)
  136                 {
  137                     coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
  138                                                          work.pUserClipBuffer,
  139                                                          psContext.vI.center,
  140                                                          psContext.vJ.center);
  141                 }
  142 
  143                 simdscalar vCoverageMask   = _simd_vmask_ps(coverageMask);
  144                 simdscalar depthPassMask   = vCoverageMask;
  145                 simdscalar stencilPassMask = vCoverageMask;
  146 
  147                 // Early-Z?
  148                 if (T::bCanEarlyZ)
  149                 {
  150                     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
  151                     depthPassMask = DepthStencilTest(&state,
  152                                                      work.triFlags.frontFacing,
  153                                                      work.triFlags.viewportIndex,
  154                                                      psContext.vZ,
  155                                                      pDepthBuffer,
  156                                                      vCoverageMask,
  157                                                      pStencilBuffer,
  158                                                      &stencilPassMask);
  159                     AR_EVENT(EarlyDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
  160                                                                _simd_movemask_ps(stencilPassMask),
  161                                                                _simd_movemask_ps(vCoverageMask)));
  162                     RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
  163 
  164                     // early-exit if no pixels passed depth or earlyZ is forced on
  165                     if (state.psState.forceEarlyZ || !_simd_movemask_ps(depthPassMask))
  166                     {
  167                         DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
  168                                           &state.depthStencilState,
  169                                           work.triFlags.frontFacing,
  170                                           psContext.vZ,
  171                                           pDepthBuffer,
  172                                           depthPassMask,
  173                                           vCoverageMask,
  174                                           pStencilBuffer,
  175                                           stencilPassMask);
  176 
  177                         if (!_simd_movemask_ps(depthPassMask))
  178                         {
  179                             goto Endtile;
  180                         }
  181                     }
  182                 }
  183 
  184                 psContext.sampleIndex = 0;
  185                 psContext.activeMask  = _simd_castps_si(vCoverageMask);
  186 
  187                 // execute pixel shader
  188                 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEPixelShader, pDC->drawId);
  189                 state.psState.pfnPixelShader(GetPrivateState(pDC), pWorkerData, &psContext);
  190                 RDTSC_END(pDC->pContext->pBucketMgr, BEPixelShader, 0);
  191 
  192                 // update stats
  193                 UPDATE_STAT_BE(PsInvocations, _mm_popcnt_u32(_simd_movemask_ps(vCoverageMask)));
  194                 AR_EVENT(PSStats((HANDLE)&psContext.stats));
  195 
  196                 vCoverageMask = _simd_castsi_ps(psContext.activeMask);
  197 
  198                 if (_simd_movemask_ps(vCoverageMask))
  199                 {
  200                     isTileDirty = true;
  201                 }
  202 
  203                 // late-Z
  204                 if (!T::bCanEarlyZ)
  205                 {
  206                     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BELateDepthTest, pDC->drawId);
  207                     depthPassMask = DepthStencilTest(&state,
  208                                                      work.triFlags.frontFacing,
  209                                                      work.triFlags.viewportIndex,
  210                                                      psContext.vZ,
  211                                                      pDepthBuffer,
  212                                                      vCoverageMask,
  213                                                      pStencilBuffer,
  214                                                      &stencilPassMask);
  215                     AR_EVENT(LateDepthStencilInfoSingleSample(_simd_movemask_ps(depthPassMask),
  216                                                               _simd_movemask_ps(stencilPassMask),
  217                                                               _simd_movemask_ps(vCoverageMask)));
  218                     RDTSC_END(pDC->pContext->pBucketMgr, BELateDepthTest, 0);
  219 
  220                     if (!_simd_movemask_ps(depthPassMask))
  221                     {
  222                         // need to call depth/stencil write for stencil write
  223                         DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
  224                                           &state.depthStencilState,
  225                                           work.triFlags.frontFacing,
  226                                           psContext.vZ,
  227                                           pDepthBuffer,
  228                                           depthPassMask,
  229                                           vCoverageMask,
  230                                           pStencilBuffer,
  231                                           stencilPassMask);
  232                         goto Endtile;
  233                     }
  234                 }
  235                 else
  236                 {
  237                     // for early z, consolidate discards from shader
  238                     // into depthPassMask
  239                     depthPassMask = _simd_and_ps(depthPassMask, vCoverageMask);
  240                 }
  241 
  242                 uint32_t statMask  = _simd_movemask_ps(depthPassMask);
  243                 uint32_t statCount = _mm_popcnt_u32(statMask);
  244                 UPDATE_STAT_BE(DepthPassCount, statCount);
  245 
  246                 // output merger
  247                 RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEOutputMerger, pDC->drawId);
  248 
  249                 OutputMerger8x2(pDC,
  250                                 psContext,
  251                                 psContext.pColorBuffer,
  252                                 0,
  253                                 &state.blendState,
  254                                 state.pfnBlendFunc,
  255                                 vCoverageMask,
  256                                 depthPassMask,
  257                                 state.psState.renderTargetMask,
  258                                 useAlternateOffset,
  259                                 workerId);
  260 
  261                 // do final depth write after all pixel kills
  262                 if (!state.psState.forceEarlyZ)
  263                 {
  264                     DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
  265                                       &state.depthStencilState,
  266                                       work.triFlags.frontFacing,
  267                                       psContext.vZ,
  268                                       pDepthBuffer,
  269                                       depthPassMask,
  270                                       vCoverageMask,
  271                                       pStencilBuffer,
  272                                       stencilPassMask);
  273                 }
  274                 RDTSC_END(pDC->pContext->pBucketMgr, BEOutputMerger, 0);
  275             }
  276 
  277         Endtile:
  278             RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEndTile, pDC->drawId);
  279 
  280             work.coverageMask[0] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
  281             if (T::InputCoverage == SWR_INPUT_COVERAGE_INNER_CONSERVATIVE)
  282             {
  283                 work.innerCoverageMask >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
  284             }
  285 
  286             if (useAlternateOffset)
  287             {
  288                 DWORD    rt;
  289                 uint32_t rtMask = state.colorHottileEnable;
  290                 while (_BitScanForward(&rt, rtMask))
  291                 {
  292                     rtMask &= ~(1 << rt);
  293                     psContext.pColorBuffer[rt] +=
  294                         (2 * KNOB_SIMD_WIDTH * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp) / 8;
  295                 }
  296             }
  297 
  298             pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
  299             pStencilBuffer +=
  300                 (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
  301 
  302             RDTSC_END(pDC->pContext->pBucketMgr, BEEndTile, 0);
  303 
  304             psContext.vX.UL     = _simd_add_ps(psContext.vX.UL, dx);
  305             psContext.vX.center = _simd_add_ps(psContext.vX.center, dx);
  306         }
  307 
  308         psContext.vY.UL     = _simd_add_ps(psContext.vY.UL, dy);
  309         psContext.vY.center = _simd_add_ps(psContext.vY.center, dy);
  310     }
  311 
  312     if (isTileDirty)
  313     {
  314         SetRenderHotTilesDirty(pDC, renderBuffers);
  315     }
  316 
  317     RDTSC_END(pDC->pContext->pBucketMgr, BESingleSampleBackend, 0);
  318 }
  319 
  320 // Recursive template used to auto-nest conditionals.  Converts dynamic enum function
  321 // arguments to static template arguments.
  322 template <uint32_t... ArgsT>
  323 struct BEChooserSingleSample
  324 {
  325     // Last Arg Terminator
  326     static PFN_BACKEND_FUNC GetFunc(SWR_BACKEND_FUNCS tArg)
  327     {
  328         switch (tArg)
  329         {
  330         case SWR_BACKEND_SINGLE_SAMPLE:
  331             return BackendSingleSample<SwrBackendTraits<ArgsT...>>;
  332             break;
  333         case SWR_BACKEND_MSAA_PIXEL_RATE:
  334         case SWR_BACKEND_MSAA_SAMPLE_RATE:
  335         default:
  336             SWR_ASSERT(0 && "Invalid backend func\n");
  337             return nullptr;
  338             break;
  339         }
  340     }
  341 
  342     // Recursively parse args
  343     template <typename... TArgsT>
  344     static PFN_BACKEND_FUNC GetFunc(SWR_INPUT_COVERAGE tArg, TArgsT... remainingArgs)
  345     {
  346         switch (tArg)
  347         {
  348         case SWR_INPUT_COVERAGE_NONE:
  349             return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
  350                 remainingArgs...);
  351             break;
  352         case SWR_INPUT_COVERAGE_NORMAL:
  353             return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NORMAL>::GetFunc(
  354                 remainingArgs...);
  355             break;
  356         case SWR_INPUT_COVERAGE_INNER_CONSERVATIVE:
  357             return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_INNER_CONSERVATIVE>::GetFunc(
  358                 remainingArgs...);
  359             break;
  360         default:
  361             SWR_ASSERT(0 && "Invalid sample pattern\n");
  362             return BEChooserSingleSample<ArgsT..., SWR_INPUT_COVERAGE_NONE>::GetFunc(
  363                 remainingArgs...);
  364             break;
  365         }
  366     }
  367 
  368     // Recursively parse args
  369     template <typename... TArgsT>
  370     static PFN_BACKEND_FUNC GetFunc(SWR_MULTISAMPLE_COUNT tArg, TArgsT... remainingArgs)
  371     {
  372         switch (tArg)
  373         {
  374         case SWR_MULTISAMPLE_1X:
  375             return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
  376             break;
  377         case SWR_MULTISAMPLE_2X:
  378             return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_2X>::GetFunc(remainingArgs...);
  379             break;
  380         case SWR_MULTISAMPLE_4X:
  381             return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_4X>::GetFunc(remainingArgs...);
  382             break;
  383         case SWR_MULTISAMPLE_8X:
  384             return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_8X>::GetFunc(remainingArgs...);
  385             break;
  386         case SWR_MULTISAMPLE_16X:
  387             return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_16X>::GetFunc(remainingArgs...);
  388             break;
  389         default:
  390             SWR_ASSERT(0 && "Invalid sample count\n");
  391             return BEChooserSingleSample<ArgsT..., SWR_MULTISAMPLE_1X>::GetFunc(remainingArgs...);
  392             break;
  393         }
  394     }
  395 
  396     // Recursively parse args
  397     template <typename... TArgsT>
  398     static PFN_BACKEND_FUNC GetFunc(bool tArg, TArgsT... remainingArgs)
  399     {
  400         if (tArg == true)
  401         {
  402             return BEChooserSingleSample<ArgsT..., 1>::GetFunc(remainingArgs...);
  403         }
  404 
  405         return BEChooserSingleSample<ArgsT..., 0>::GetFunc(remainingArgs...);
  406     }
  407 };
  408 
  409 void InitBackendSingleFuncTable(PFN_BACKEND_FUNC (&table)[SWR_INPUT_COVERAGE_COUNT][2][2])
  410 {
  411     for (uint32_t inputCoverage = 0; inputCoverage < SWR_INPUT_COVERAGE_COUNT; inputCoverage++)
  412     {
  413         for (uint32_t isCentroid = 0; isCentroid < 2; isCentroid++)
  414         {
  415             for (uint32_t canEarlyZ = 0; canEarlyZ < 2; canEarlyZ++)
  416             {
  417                 table[inputCoverage][isCentroid][canEarlyZ] =
  418                     BEChooserSingleSample<>::GetFunc(SWR_MULTISAMPLE_1X,
  419                                                      false,
  420                                                      (SWR_INPUT_COVERAGE)inputCoverage,
  421                                                      (isCentroid > 0),
  422                                                      false,
  423                                                      (canEarlyZ > 0),
  424                                                      SWR_BACKEND_SINGLE_SAMPLE);
  425             }
  426         }
  427     }
  428 }