"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/swr/rasterizer/core/backend.cpp" (16 Sep 2020, 18168 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "backend.cpp" see the Fossies "Dox" file reference documentation.

    1 /****************************************************************************
    2  * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  *
   23  * @file backend.cpp
   24  *
   25  * @brief Backend handles rasterization, pixel shading and output merger
   26  *        operations.
   27  *
   28  ******************************************************************************/
   29 
   30 #include <smmintrin.h>
   31 
   32 #include "backend.h"
   33 #include "backend_impl.h"
   34 #include "tilemgr.h"
   35 #include "memory/tilingtraits.h"
   36 #include "core/multisample.h"
   37 #include "backends/gen_BackendPixelRate.hpp"
   38 
   39 #include <algorithm>
   40 
   41 
   42 //////////////////////////////////////////////////////////////////////////
   43 /// @brief Process compute work.
   44 /// @param pDC - pointer to draw context (dispatch).
   45 /// @param workerId - The unique worker ID that is assigned to this thread.
   46 /// @param threadGroupId - the linear index for the thread group within the dispatch.
   47 void ProcessComputeBE(DRAW_CONTEXT* pDC,
   48                       uint32_t      workerId,
   49                       uint32_t      threadGroupId,
   50                       void*&        pSpillFillBuffer,
   51                       void*&        pScratchSpace)
   52 {
   53     SWR_CONTEXT* pContext = pDC->pContext;
   54 
   55     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEDispatch, pDC->drawId);
   56 
   57     const COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pDispatch->GetTasksData();
   58     SWR_ASSERT(pTaskData != nullptr);
   59 
   60     // Ensure spill fill memory has been allocated.
   61     size_t spillFillSize = pDC->pState->state.totalSpillFillSize;
   62     if (spillFillSize && pSpillFillBuffer == nullptr)
   63     {
   64         pSpillFillBuffer = pDC->pArena->AllocAlignedSync(spillFillSize, KNOB_SIMD16_BYTES);
   65     }
   66 
   67     size_t scratchSpaceSize =
   68         pDC->pState->state.scratchSpaceSizePerWarp * pDC->pState->state.scratchSpaceNumWarps;
   69     if (scratchSpaceSize && pScratchSpace == nullptr)
   70     {
   71         pScratchSpace = pDC->pArena->AllocAlignedSync(scratchSpaceSize, KNOB_SIMD16_BYTES);
   72     }
   73 
   74     const API_STATE& state = GetApiState(pDC);
   75 
   76     SWR_CS_CONTEXT csContext{0};
   77     csContext.tileCounter         = threadGroupId;
   78     csContext.dispatchDims[0]     = pTaskData->threadGroupCountX;
   79     csContext.dispatchDims[1]     = pTaskData->threadGroupCountY;
   80     csContext.dispatchDims[2]     = pTaskData->threadGroupCountZ;
   81     csContext.pTGSM               = pContext->ppScratch[workerId];
   82     csContext.pSpillFillBuffer    = (uint8_t*)pSpillFillBuffer;
   83     csContext.pScratchSpace       = (uint8_t*)pScratchSpace;
   84     csContext.scratchSpacePerWarp = pDC->pState->state.scratchSpaceSizePerWarp;
   85 
   86     state.pfnCsFunc(GetPrivateState(pDC),
   87                     pContext->threadPool.pThreadData[workerId].pWorkerPrivateData,
   88                     &csContext);
   89 
   90     UPDATE_STAT_BE(CsInvocations, state.totalThreadsInGroup);
   91     AR_EVENT(CSStats((HANDLE)&csContext.stats));
   92 
   93     RDTSC_END(pDC->pContext->pBucketMgr, BEDispatch, 1);
   94 }
   95 
   96 //////////////////////////////////////////////////////////////////////////
   97 /// @brief Process shutdown.
   98 /// @param pDC - pointer to draw context (dispatch).
   99 /// @param workerId - The unique worker ID that is assigned to this thread.
  100 /// @param threadGroupId - the linear index for the thread group within the dispatch.
  101 void ProcessShutdownBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
  102 {
  103     // Dummy function
  104 }
  105 
  106 void ProcessSyncBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pUserData)
  107 {
  108     uint32_t x, y;
  109     MacroTileMgr::getTileIndices(macroTile, x, y);
  110     SWR_ASSERT(x == 0 && y == 0);
  111 }
  112 
  113 void ProcessStoreTileBE(DRAW_CONTEXT*               pDC,
  114                         uint32_t                    workerId,
  115                         uint32_t                    macroTile,
  116                         STORE_TILES_DESC*           pDesc,
  117                         SWR_RENDERTARGET_ATTACHMENT attachment)
  118 {
  119     SWR_CONTEXT* pContext           = pDC->pContext;
  120     HANDLE       hWorkerPrivateData = pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  121 
  122     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEStoreTiles, pDC->drawId);
  123 
  124     SWR_FORMAT srcFormat;
  125     switch (attachment)
  126     {
  127     case SWR_ATTACHMENT_COLOR0:
  128     case SWR_ATTACHMENT_COLOR1:
  129     case SWR_ATTACHMENT_COLOR2:
  130     case SWR_ATTACHMENT_COLOR3:
  131     case SWR_ATTACHMENT_COLOR4:
  132     case SWR_ATTACHMENT_COLOR5:
  133     case SWR_ATTACHMENT_COLOR6:
  134     case SWR_ATTACHMENT_COLOR7:
  135         srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
  136         break;
  137     case SWR_ATTACHMENT_DEPTH:
  138         srcFormat = KNOB_DEPTH_HOT_TILE_FORMAT;
  139         break;
  140     case SWR_ATTACHMENT_STENCIL:
  141         srcFormat = KNOB_STENCIL_HOT_TILE_FORMAT;
  142         break;
  143     default:
  144         SWR_INVALID("Unknown attachment: %d", attachment);
  145         srcFormat = KNOB_COLOR_HOT_TILE_FORMAT;
  146         break;
  147     }
  148 
  149     uint32_t x, y;
  150     MacroTileMgr::getTileIndices(macroTile, x, y);
  151 
  152     // Only need to store the hottile if it's been rendered to...
  153     HOTTILE* pHotTile =
  154         pContext->pHotTileMgr->GetHotTileNoLoad(pContext, pDC, macroTile, attachment, false);
  155     if (pHotTile)
  156     {
  157         // clear if clear is pending (i.e., not rendered to), then mark as dirty for store.
  158         if (pHotTile->state == HOTTILE_CLEAR)
  159         {
  160             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[srcFormat];
  161             SWR_ASSERT(pfnClearTiles != nullptr);
  162 
  163             pfnClearTiles(pDC,
  164                           hWorkerPrivateData,
  165                           attachment,
  166                           macroTile,
  167                           pHotTile->renderTargetArrayIndex,
  168                           pHotTile->clearData,
  169                           pDesc->rect);
  170         }
  171 
  172         if (pHotTile->state == HOTTILE_DIRTY ||
  173             pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY)
  174         {
  175             int32_t destX = KNOB_MACROTILE_X_DIM * x;
  176             int32_t destY = KNOB_MACROTILE_Y_DIM * y;
  177 
  178             pContext->pfnStoreTile(pDC,
  179                                    hWorkerPrivateData,
  180                                    srcFormat,
  181                                    attachment,
  182                                    destX,
  183                                    destY,
  184                                    pHotTile->renderTargetArrayIndex,
  185                                    pHotTile->pBuffer);
  186         }
  187 
  188         if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_RESOLVED) 
  189         {
  190             if (!(pDesc->postStoreTileState == (SWR_TILE_STATE)HOTTILE_DIRTY &&
  191                   pHotTile->state == HOTTILE_RESOLVED))
  192             {
  193                 pHotTile->state = (HOTTILE_STATE)pDesc->postStoreTileState;
  194             }
  195         }
  196     }
  197     RDTSC_END(pDC->pContext->pBucketMgr, BEStoreTiles, 1);
  198 }
  199 
  200 void ProcessStoreTilesBE(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroTile, void* pData)
  201 {
  202     STORE_TILES_DESC* pDesc = (STORE_TILES_DESC*)pData;
  203 
  204     unsigned long rt   = 0;
  205     uint32_t      mask = pDesc->attachmentMask;
  206     while (_BitScanForward(&rt, mask))
  207     {
  208         mask &= ~(1 << rt);
  209         ProcessStoreTileBE(pDC, workerId, macroTile, pDesc, (SWR_RENDERTARGET_ATTACHMENT)rt);
  210     }
  211 }
  212 
  213 void ProcessDiscardInvalidateTilesBE(DRAW_CONTEXT* pDC,
  214                                      uint32_t      workerId,
  215                                      uint32_t      macroTile,
  216                                      void*         pData)
  217 {
  218     DISCARD_INVALIDATE_TILES_DESC* pDesc    = (DISCARD_INVALIDATE_TILES_DESC*)pData;
  219     SWR_CONTEXT*                   pContext = pDC->pContext;
  220 
  221     const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
  222 
  223     for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; ++i)
  224     {
  225         if (pDesc->attachmentMask & (1 << i))
  226         {
  227             HOTTILE* pHotTile =
  228                 pContext->pHotTileMgr->GetHotTileNoLoad(pContext,
  229                                                         pDC,
  230                                                         macroTile,
  231                                                         (SWR_RENDERTARGET_ATTACHMENT)i,
  232                                                         pDesc->createNewTiles,
  233                                                         numSamples);
  234             if (pHotTile)
  235             {
  236                 HOTTILE_STATE newState = (HOTTILE_STATE)pDesc->newTileState;;
  237                 if (pHotTile->state == HOTTILE_DIRTY || pHotTile->state == HOTTILE_CLEAR)
  238                 {
  239                     if (newState == HOTTILE_INVALID)
  240                     {
  241                         // This is OK for APIs that explicitly allow discards
  242                         // (for e.g. depth / stencil data)
  243                         //SWR_INVALID("Discarding valid data!");
  244                     }
  245                 }
  246                 pHotTile->state = newState;
  247             }
  248         }
  249     }
  250 }
  251 
  252 template <uint32_t sampleCountT>
  253 void BackendNullPS(DRAW_CONTEXT*        pDC,
  254                    uint32_t             workerId,
  255                    uint32_t             x,
  256                    uint32_t             y,
  257                    SWR_TRIANGLE_DESC&   work,
  258                    RenderOutputBuffers& renderBuffers)
  259 {
  260     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BENullBackend, pDC->drawId);
  261     ///@todo: handle center multisample pattern
  262     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BESetup, pDC->drawId);
  263 
  264     const API_STATE& state = GetApiState(pDC);
  265 
  266     BarycentricCoeffs coeffs;
  267     SetupBarycentricCoeffs(&coeffs, work);
  268 
  269     uint8_t *pDepthBuffer, *pStencilBuffer;
  270     SetupRenderBuffers(NULL, &pDepthBuffer, &pStencilBuffer, 0, renderBuffers);
  271 
  272     SWR_PS_CONTEXT psContext;
  273     // skip SetupPixelShaderContext(&psContext, ...); // not needed here
  274 
  275     RDTSC_END(pDC->pContext->pBucketMgr, BESetup, 0);
  276 
  277     simdscalar vYSamplePosUL = _simd_add_ps(vULOffsetsY, _simd_set1_ps(static_cast<float>(y)));
  278 
  279     const simdscalar           dy        = _simd_set1_ps(static_cast<float>(SIMD_TILE_Y_DIM));
  280     const SWR_MULTISAMPLE_POS& samplePos = state.rastState.samplePositions;
  281     for (uint32_t yy = y; yy < y + KNOB_TILE_Y_DIM; yy += SIMD_TILE_Y_DIM)
  282     {
  283         simdscalar vXSamplePosUL = _simd_add_ps(vULOffsetsX, _simd_set1_ps(static_cast<float>(x)));
  284 
  285         const simdscalar dx = _simd_set1_ps(static_cast<float>(SIMD_TILE_X_DIM));
  286 
  287         for (uint32_t xx = x; xx < x + KNOB_TILE_X_DIM; xx += SIMD_TILE_X_DIM)
  288         {
  289             // iterate over active samples
  290             unsigned long sample     = 0;
  291             uint32_t      sampleMask = state.blendState.sampleMask;
  292             while (_BitScanForward(&sample, sampleMask))
  293             {
  294                 sampleMask &= ~(1 << sample);
  295 
  296                 simdmask coverageMask = work.coverageMask[sample] & MASK;
  297 
  298                 if (coverageMask)
  299                 {
  300                     // offset depth/stencil buffers current sample
  301                     uint8_t* pDepthSample   = pDepthBuffer + RasterTileDepthOffset(sample);
  302                     uint8_t* pStencilSample = pStencilBuffer + RasterTileStencilOffset(sample);
  303 
  304                     if (state.depthHottileEnable && state.depthBoundsState.depthBoundsTestEnable)
  305                     {
  306                         static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT,
  307                                       "Unsupported depth hot tile format");
  308 
  309                         const simdscalar z =
  310                             _simd_load_ps(reinterpret_cast<const float*>(pDepthSample));
  311 
  312                         const float minz = state.depthBoundsState.depthBoundsTestMinValue;
  313                         const float maxz = state.depthBoundsState.depthBoundsTestMaxValue;
  314 
  315                         coverageMask &= CalcDepthBoundsAcceptMask(z, minz, maxz);
  316                     }
  317 
  318                     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEBarycentric, pDC->drawId);
  319 
  320                     // calculate per sample positions
  321                     psContext.vX.sample = _simd_add_ps(vXSamplePosUL, samplePos.vX(sample));
  322                     psContext.vY.sample = _simd_add_ps(vYSamplePosUL, samplePos.vY(sample));
  323 
  324                     CalcSampleBarycentrics(coeffs, psContext);
  325 
  326                     // interpolate and quantize z
  327                     psContext.vZ = vplaneps(coeffs.vZa,
  328                                             coeffs.vZb,
  329                                             coeffs.vZc,
  330                                             psContext.vI.sample,
  331                                             psContext.vJ.sample);
  332                     psContext.vZ = state.pfnQuantizeDepth(psContext.vZ);
  333 
  334                     RDTSC_END(pDC->pContext->pBucketMgr, BEBarycentric, 0);
  335 
  336                     // interpolate user clip distance if available
  337                     if (state.backendState.clipDistanceMask)
  338                     {
  339                         coverageMask &= ~ComputeUserClipMask(state.backendState.clipDistanceMask,
  340                                                              work.pUserClipBuffer,
  341                                                              psContext.vI.sample,
  342                                                              psContext.vJ.sample);
  343                     }
  344 
  345                     simdscalar vCoverageMask   = _simd_vmask_ps(coverageMask);
  346                     simdscalar stencilPassMask = vCoverageMask;
  347 
  348                     RDTSC_BEGIN(pDC->pContext->pBucketMgr, BEEarlyDepthTest, pDC->drawId);
  349                     simdscalar depthPassMask = DepthStencilTest(&state,
  350                                                                 work.triFlags.frontFacing,
  351                                                                 work.triFlags.viewportIndex,
  352                                                                 psContext.vZ,
  353                                                                 pDepthSample,
  354                                                                 vCoverageMask,
  355                                                                 pStencilSample,
  356                                                                 &stencilPassMask);
  357                     AR_EVENT(EarlyDepthStencilInfoNullPS(_simd_movemask_ps(depthPassMask),
  358                                                          _simd_movemask_ps(stencilPassMask),
  359                                                          _simd_movemask_ps(vCoverageMask)));
  360                     DepthStencilWrite(&state.vp[work.triFlags.viewportIndex],
  361                                       &state.depthStencilState,
  362                                       work.triFlags.frontFacing,
  363                                       psContext.vZ,
  364                                       pDepthSample,
  365                                       depthPassMask,
  366                                       vCoverageMask,
  367                                       pStencilSample,
  368                                       stencilPassMask);
  369                     RDTSC_END(pDC->pContext->pBucketMgr, BEEarlyDepthTest, 0);
  370 
  371                     uint32_t statMask  = _simd_movemask_ps(depthPassMask);
  372                     uint32_t statCount = _mm_popcnt_u32(statMask);
  373                     UPDATE_STAT_BE(DepthPassCount, statCount);
  374                 }
  375 
  376             Endtile:
  377                 ATTR_UNUSED;
  378                 work.coverageMask[sample] >>= (SIMD_TILE_Y_DIM * SIMD_TILE_X_DIM);
  379             }
  380 
  381             pDepthBuffer += (KNOB_SIMD_WIDTH * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp) / 8;
  382             pStencilBuffer +=
  383                 (KNOB_SIMD_WIDTH * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp) / 8;
  384 
  385             vXSamplePosUL = _simd_add_ps(vXSamplePosUL, dx);
  386         }
  387 
  388         vYSamplePosUL = _simd_add_ps(vYSamplePosUL, dy);
  389     }
  390 
  391     RDTSC_END(pDC->pContext->pBucketMgr, BENullBackend, 0);
  392 }
  393 
  394 PFN_CLEAR_TILES  gClearTilesTable[NUM_SWR_FORMATS] = {};
  395 PFN_BACKEND_FUNC gBackendNullPs[SWR_MULTISAMPLE_TYPE_COUNT];
  396 PFN_BACKEND_FUNC gBackendSingleSample[SWR_INPUT_COVERAGE_COUNT][2] // centroid
  397                                      [2]                           // canEarlyZ
  398     = {};
  399 PFN_BACKEND_FUNC gBackendPixelRateTable[SWR_MULTISAMPLE_TYPE_COUNT][2] // isCenterPattern
  400                                        [SWR_INPUT_COVERAGE_COUNT][2]   // centroid
  401                                        [2]                             // forcedSampleCount
  402                                        [2]                             // canEarlyZ
  403     = {};
  404 PFN_BACKEND_FUNC gBackendSampleRateTable[SWR_MULTISAMPLE_TYPE_COUNT][SWR_INPUT_COVERAGE_COUNT]
  405                                         [2] // centroid
  406                                         [2] // canEarlyZ
  407     = {};
  408 
  409 void InitBackendFuncTables()
  410 {
  411     InitBackendPixelRate();
  412     InitBackendSingleFuncTable(gBackendSingleSample);
  413     InitBackendSampleFuncTable(gBackendSampleRateTable);
  414 
  415     gBackendNullPs[SWR_MULTISAMPLE_1X]  = &BackendNullPS<SWR_MULTISAMPLE_1X>;
  416     gBackendNullPs[SWR_MULTISAMPLE_2X]  = &BackendNullPS<SWR_MULTISAMPLE_2X>;
  417     gBackendNullPs[SWR_MULTISAMPLE_4X]  = &BackendNullPS<SWR_MULTISAMPLE_4X>;
  418     gBackendNullPs[SWR_MULTISAMPLE_8X]  = &BackendNullPS<SWR_MULTISAMPLE_8X>;
  419     gBackendNullPs[SWR_MULTISAMPLE_16X] = &BackendNullPS<SWR_MULTISAMPLE_16X>;
  420 }