"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/gallium/drivers/swr/rasterizer/core/tilemgr.cpp" (16 Sep 2020, 17730 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tilemgr.cpp" see the Fossies "Dox" file reference documentation.

    1 /****************************************************************************
    2  * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
    3  *
    4  * Permission is hereby granted, free of charge, to any person obtaining a
    5  * copy of this software and associated documentation files (the "Software"),
    6  * to deal in the Software without restriction, including without limitation
    7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
    8  * and/or sell copies of the Software, and to permit persons to whom the
    9  * Software is furnished to do so, subject to the following conditions:
   10  *
   11  * The above copyright notice and this permission notice (including the next
   12  * paragraph) shall be included in all copies or substantial portions of the
   13  * Software.
   14  *
   15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
   18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   21  * IN THE SOFTWARE.
   22  *
   23  * @file tilemgr.cpp
   24  *
   25  * @brief Implementation for Macro Tile Manager which provides the facilities
   26  *        for threads to work on an macro tile.
   27  *
   28  ******************************************************************************/
   29 #include <unordered_map>
   30 
   31 #include "fifo.hpp"
   32 #include "core/tilemgr.h"
   33 #include "core/multisample.h"
   34 #include "rdtsc_core.h"
   35 
   36 MacroTileMgr::MacroTileMgr(CachingArena& arena) : mArena(arena) {}
   37 
   38 void MacroTileMgr::enqueue(uint32_t x, uint32_t y, BE_WORK* pWork)
   39 {
   40     // Should not enqueue more then what we have backing for in the hot tile manager.
   41     SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
   42     SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
   43 
   44     if ((x & ~(KNOB_NUM_HOT_TILES_X - 1)) | (y & ~(KNOB_NUM_HOT_TILES_Y - 1)))
   45     {
   46         return;
   47     }
   48 
   49     uint32_t id = getTileId(x, y);
   50 
   51     if (id >= mTiles.size())
   52     {
   53         mTiles.resize((16 + id) * 2);
   54     }
   55 
   56     MacroTileQueue* pTile = mTiles[id];
   57     if (!pTile)
   58     {
   59         pTile = mTiles[id] = new MacroTileQueue();
   60     }
   61     pTile->mWorkItemsFE++;
   62     pTile->mId = id;
   63 
   64     if (pTile->mWorkItemsFE == 1)
   65     {
   66         pTile->clear(mArena);
   67         mDirtyTiles.push_back(pTile);
   68     }
   69 
   70     mWorkItemsProduced++;
   71     pTile->enqueue_try_nosync(mArena, pWork);
   72 }
   73 
   74 void MacroTileMgr::markTileComplete(uint32_t id)
   75 {
   76     SWR_ASSERT(mTiles.size() > id);
   77     MacroTileQueue& tile     = *mTiles[id];
   78     uint32_t        numTiles = tile.mWorkItemsFE;
   79     InterlockedExchangeAdd(&mWorkItemsConsumed, numTiles);
   80 
   81     _ReadWriteBarrier();
   82     tile.mWorkItemsBE += numTiles;
   83     SWR_ASSERT(tile.mWorkItemsFE == tile.mWorkItemsBE);
   84 
   85     // clear out tile, but defer fifo clear until the next DC first queues to it.
   86     // this prevents worker threads from constantly locking a completed macro tile
   87     tile.mWorkItemsFE = 0;
   88     tile.mWorkItemsBE = 0;
   89 }
   90 
   91 HOTTILE* HotTileMgr::GetHotTile(SWR_CONTEXT*                pContext,
   92                                 DRAW_CONTEXT*               pDC,
   93                                 HANDLE                      hWorkerPrivateData,
   94                                 uint32_t                    macroID,
   95                                 SWR_RENDERTARGET_ATTACHMENT attachment,
   96                                 bool                        create,
   97                                 uint32_t                    numSamples,
   98                                 uint32_t                    renderTargetArrayIndex)
   99 {
  100     uint32_t x, y;
  101     MacroTileMgr::getTileIndices(macroID, x, y);
  102 
  103     SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
  104     SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
  105 
  106     HotTileSet& tile    = mHotTiles[x][y];
  107     HOTTILE&    hotTile = tile.Attachment[attachment];
  108     if (hotTile.pBuffer == NULL)
  109     {
  110         if (create)
  111         {
  112             uint32_t size     = numSamples * mHotTileSize[attachment];
  113             uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
  114             hotTile.pBuffer =
  115                 (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
  116             hotTile.state                  = HOTTILE_INVALID;
  117             hotTile.numSamples             = numSamples;
  118             hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
  119         }
  120         else
  121         {
  122             return NULL;
  123         }
  124     }
  125     else
  126     {
  127         // free the old tile and create a new one with enough space to hold all samples
  128         if (numSamples > hotTile.numSamples)
  129         {
  130             // tile should be either uninitialized or resolved if we're deleting and switching to a
  131             // new sample count
  132             SWR_ASSERT((hotTile.state == HOTTILE_INVALID) || (hotTile.state == HOTTILE_RESOLVED) ||
  133                        (hotTile.state == HOTTILE_CLEAR));
  134             FreeHotTileMem(hotTile.pBuffer);
  135 
  136             uint32_t size     = numSamples * mHotTileSize[attachment];
  137             uint32_t numaNode = ((x ^ y) & pContext->threadPool.numaMask);
  138             hotTile.pBuffer =
  139                 (uint8_t*)AllocHotTileMem(size, 64, numaNode + pContext->threadInfo.BASE_NUMA_NODE);
  140             hotTile.state      = HOTTILE_INVALID;
  141             hotTile.numSamples = numSamples;
  142         }
  143 
  144         // if requested render target array index isn't currently loaded, need to store out the
  145         // current hottile and load the requested array slice
  146         if (renderTargetArrayIndex != hotTile.renderTargetArrayIndex)
  147         {
  148             SWR_FORMAT format;
  149             switch (attachment)
  150             {
  151             case SWR_ATTACHMENT_COLOR0:
  152             case SWR_ATTACHMENT_COLOR1:
  153             case SWR_ATTACHMENT_COLOR2:
  154             case SWR_ATTACHMENT_COLOR3:
  155             case SWR_ATTACHMENT_COLOR4:
  156             case SWR_ATTACHMENT_COLOR5:
  157             case SWR_ATTACHMENT_COLOR6:
  158             case SWR_ATTACHMENT_COLOR7:
  159                 format = KNOB_COLOR_HOT_TILE_FORMAT;
  160                 break;
  161             case SWR_ATTACHMENT_DEPTH:
  162                 format = KNOB_DEPTH_HOT_TILE_FORMAT;
  163                 break;
  164             case SWR_ATTACHMENT_STENCIL:
  165                 format = KNOB_STENCIL_HOT_TILE_FORMAT;
  166                 break;
  167             default:
  168                 SWR_INVALID("Unknown attachment: %d", attachment);
  169                 format = KNOB_COLOR_HOT_TILE_FORMAT;
  170                 break;
  171             }
  172 
  173             if (hotTile.state == HOTTILE_CLEAR)
  174             {
  175                 if (attachment == SWR_ATTACHMENT_STENCIL)
  176                     ClearStencilHotTile(&hotTile);
  177                 else if (attachment == SWR_ATTACHMENT_DEPTH)
  178                     ClearDepthHotTile(&hotTile);
  179                 else
  180                     ClearColorHotTile(&hotTile);
  181 
  182                 hotTile.state = HOTTILE_DIRTY;
  183             }
  184 
  185             if (hotTile.state == HOTTILE_DIRTY)
  186             {
  187                 pContext->pfnStoreTile(pDC,
  188                                        hWorkerPrivateData,
  189                                        format,
  190                                        attachment,
  191                                        x * KNOB_MACROTILE_X_DIM,
  192                                        y * KNOB_MACROTILE_Y_DIM,
  193                                        hotTile.renderTargetArrayIndex,
  194                                        hotTile.pBuffer);
  195             }
  196 
  197             pContext->pfnLoadTile(pDC,
  198                                   hWorkerPrivateData,
  199                                   format,
  200                                   attachment,
  201                                   x * KNOB_MACROTILE_X_DIM,
  202                                   y * KNOB_MACROTILE_Y_DIM,
  203                                   renderTargetArrayIndex,
  204                                   hotTile.pBuffer);
  205 
  206             hotTile.renderTargetArrayIndex = renderTargetArrayIndex;
  207             hotTile.state = HOTTILE_RESOLVED;
  208         }
  209     }
  210     return &tile.Attachment[attachment];
  211 }
  212 
  213 HOTTILE* HotTileMgr::GetHotTileNoLoad(SWR_CONTEXT*                pContext,
  214                                       DRAW_CONTEXT*               pDC,
  215                                       uint32_t                    macroID,
  216                                       SWR_RENDERTARGET_ATTACHMENT attachment,
  217                                       bool                        create,
  218                                       uint32_t                    numSamples)
  219 {
  220     uint32_t x, y;
  221     MacroTileMgr::getTileIndices(macroID, x, y);
  222 
  223     SWR_ASSERT(x < KNOB_NUM_HOT_TILES_X);
  224     SWR_ASSERT(y < KNOB_NUM_HOT_TILES_Y);
  225 
  226     HotTileSet& tile    = mHotTiles[x][y];
  227     HOTTILE&    hotTile = tile.Attachment[attachment];
  228     if (hotTile.pBuffer == NULL)
  229     {
  230         if (create)
  231         {
  232             uint32_t size                  = numSamples * mHotTileSize[attachment];
  233             hotTile.pBuffer                = (uint8_t*)AlignedMalloc(size, 64);
  234             hotTile.state                  = HOTTILE_INVALID;
  235             hotTile.numSamples             = numSamples;
  236             hotTile.renderTargetArrayIndex = 0;
  237         }
  238         else
  239         {
  240             return NULL;
  241         }
  242     }
  243 
  244     return &hotTile;
  245 }
  246 
  247 void HotTileMgr::ClearColorHotTile(
  248     const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
  249 {
  250     // Load clear color into SIMD register...
  251     float*       pClearData = (float*)(pHotTile->clearData);
  252     simd16scalar valR       = _simd16_broadcast_ss(&pClearData[0]);
  253     simd16scalar valG       = _simd16_broadcast_ss(&pClearData[1]);
  254     simd16scalar valB       = _simd16_broadcast_ss(&pClearData[2]);
  255     simd16scalar valA       = _simd16_broadcast_ss(&pClearData[3]);
  256 
  257     float*   pfBuf      = (float*)pHotTile->pBuffer;
  258     uint32_t numSamples = pHotTile->numSamples;
  259 
  260     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
  261     {
  262         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
  263         {
  264             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
  265                  si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
  266             {
  267                 _simd16_store_ps(pfBuf, valR);
  268                 pfBuf += KNOB_SIMD16_WIDTH;
  269 
  270                 _simd16_store_ps(pfBuf, valG);
  271                 pfBuf += KNOB_SIMD16_WIDTH;
  272 
  273                 _simd16_store_ps(pfBuf, valB);
  274                 pfBuf += KNOB_SIMD16_WIDTH;
  275 
  276                 _simd16_store_ps(pfBuf, valA);
  277                 pfBuf += KNOB_SIMD16_WIDTH;
  278             }
  279         }
  280     }
  281 }
  282 
  283 void HotTileMgr::ClearDepthHotTile(
  284     const HOTTILE* pHotTile) // clear a macro tile from float4 clear data.
  285 {
  286     // Load clear color into SIMD register...
  287     float*       pClearData = (float*)(pHotTile->clearData);
  288     simd16scalar valZ       = _simd16_broadcast_ss(&pClearData[0]);
  289 
  290     float*   pfBuf      = (float*)pHotTile->pBuffer;
  291     uint32_t numSamples = pHotTile->numSamples;
  292 
  293     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
  294     {
  295         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
  296         {
  297             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
  298                  si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM)
  299             {
  300                 _simd16_store_ps(pfBuf, valZ);
  301                 pfBuf += KNOB_SIMD16_WIDTH;
  302             }
  303         }
  304     }
  305 }
  306 
  307 void HotTileMgr::ClearStencilHotTile(const HOTTILE* pHotTile)
  308 {
  309     // convert from F32 to U8.
  310     uint8_t clearVal = (uint8_t)(pHotTile->clearData[0]);
  311     // broadcast 32x into __m256i...
  312     simd16scalari valS = _simd16_set1_epi8(clearVal);
  313 
  314     simd16scalari* pBuf       = (simd16scalari*)pHotTile->pBuffer;
  315     uint32_t       numSamples = pHotTile->numSamples;
  316 
  317     for (uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
  318     {
  319         for (uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
  320         {
  321             // We're putting 4 pixels in each of the 32-bit slots, so increment 4 times as quickly.
  322             for (uint32_t si = 0; si < (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * numSamples);
  323                  si += SIMD16_TILE_X_DIM * SIMD16_TILE_Y_DIM * 4)
  324             {
  325                 _simd16_store_si(pBuf, valS);
  326                 pBuf += 1;
  327             }
  328         }
  329     }
  330 }
  331 
  332 //////////////////////////////////////////////////////////////////////////
  333 /// @brief InitializeHotTiles
  334 /// for draw calls, we initialize the active hot tiles and perform deferred
  335 /// load on them if tile is in invalid state. we do this in the outer thread
  336 /// loop instead of inside the draw routine itself mainly for performance,
  337 /// to avoid unnecessary setup every triangle
  338 /// @todo support deferred clear
  339 /// @param pCreateInfo - pointer to creation info.
  340 void HotTileMgr::InitializeHotTiles(SWR_CONTEXT*  pContext,
  341                                     DRAW_CONTEXT* pDC,
  342                                     uint32_t      workerId,
  343                                     uint32_t      macroID)
  344 {
  345     const API_STATE& state    = GetApiState(pDC);
  346     HANDLE hWorkerPrivateData = pDC->pContext->threadPool.pThreadData[workerId].pWorkerPrivateData;
  347 
  348     uint32_t x, y;
  349     MacroTileMgr::getTileIndices(macroID, x, y);
  350     x *= KNOB_MACROTILE_X_DIM;
  351     y *= KNOB_MACROTILE_Y_DIM;
  352 
  353     uint32_t numSamples = GetNumSamples(state.rastState.sampleCount);
  354 
  355     // check RT if enabled
  356     unsigned long rtSlot                 = 0;
  357     uint32_t      colorHottileEnableMask = state.colorHottileEnable;
  358     while (_BitScanForward(&rtSlot, colorHottileEnableMask))
  359     {
  360         HOTTILE* pHotTile =
  361             GetHotTile(pContext,
  362                        pDC,
  363                        hWorkerPrivateData,
  364                        macroID,
  365                        (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
  366                        true,
  367                        numSamples);
  368 
  369         if (pHotTile->state == HOTTILE_INVALID)
  370         {
  371             RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
  372             // invalid hottile before draw requires a load from surface before we can draw to it
  373             pContext->pfnLoadTile(pDC,
  374                                   hWorkerPrivateData,
  375                                   KNOB_COLOR_HOT_TILE_FORMAT,
  376                                   (SWR_RENDERTARGET_ATTACHMENT)(SWR_ATTACHMENT_COLOR0 + rtSlot),
  377                                   x,
  378                                   y,
  379                                   pHotTile->renderTargetArrayIndex,
  380                                   pHotTile->pBuffer);
  381             pHotTile->state = HOTTILE_RESOLVED;
  382             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
  383         }
  384         else if (pHotTile->state == HOTTILE_CLEAR)
  385         {
  386             RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
  387             // Clear the tile.
  388             ClearColorHotTile(pHotTile);
  389             pHotTile->state = HOTTILE_DIRTY;
  390             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
  391         }
  392         colorHottileEnableMask &= ~(1 << rtSlot);
  393     }
  394 
  395     // check depth if enabled
  396     if (state.depthHottileEnable)
  397     {
  398         HOTTILE* pHotTile = GetHotTile(
  399             pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_DEPTH, true, numSamples);
  400         if (pHotTile->state == HOTTILE_INVALID)
  401         {
  402             RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
  403             // invalid hottile before draw requires a load from surface before we can draw to it
  404             pContext->pfnLoadTile(pDC,
  405                                   hWorkerPrivateData,
  406                                   KNOB_DEPTH_HOT_TILE_FORMAT,
  407                                   SWR_ATTACHMENT_DEPTH,
  408                                   x,
  409                                   y,
  410                                   pHotTile->renderTargetArrayIndex,
  411                                   pHotTile->pBuffer);
  412             pHotTile->state = HOTTILE_DIRTY;
  413             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
  414         }
  415         else if (pHotTile->state == HOTTILE_CLEAR)
  416         {
  417             RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
  418             // Clear the tile.
  419             ClearDepthHotTile(pHotTile);
  420             pHotTile->state = HOTTILE_DIRTY;
  421             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
  422         }
  423     }
  424 
  425     // check stencil if enabled
  426     if (state.stencilHottileEnable)
  427     {
  428         HOTTILE* pHotTile = GetHotTile(
  429             pContext, pDC, hWorkerPrivateData, macroID, SWR_ATTACHMENT_STENCIL, true, numSamples);
  430         if (pHotTile->state == HOTTILE_INVALID)
  431         {
  432             RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
  433             // invalid hottile before draw requires a load from surface before we can draw to it
  434             pContext->pfnLoadTile(pDC,
  435                                   hWorkerPrivateData,
  436                                   KNOB_STENCIL_HOT_TILE_FORMAT,
  437                                   SWR_ATTACHMENT_STENCIL,
  438                                   x,
  439                                   y,
  440                                   pHotTile->renderTargetArrayIndex,
  441                                   pHotTile->pBuffer);
  442             pHotTile->state = HOTTILE_DIRTY;
  443             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
  444         }
  445         else if (pHotTile->state == HOTTILE_CLEAR)
  446         {
  447             RDTSC_BEGIN(pContext->pBucketMgr, BELoadTiles, pDC->drawId);
  448             // Clear the tile.
  449             ClearStencilHotTile(pHotTile);
  450             pHotTile->state = HOTTILE_DIRTY;
  451             RDTSC_END(pContext->pBucketMgr, BELoadTiles, 0);
  452         }
  453     }
  454 }