"Fossies" - the Fresh Open Source Software Archive

Member "mesa-20.1.8/src/amd/common/ac_surface.c" (16 Sep 2020, 68690 Bytes) of package /linux/misc/mesa-20.1.8.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ac_surface.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 20.1.5_vs_20.2.0-rc1.

    1 /*
    2  * Copyright © 2011 Red Hat All Rights Reserved.
    3  * Copyright © 2017 Advanced Micro Devices, Inc.
    4  * All Rights Reserved.
    5  *
    6  * Permission is hereby granted, free of charge, to any person obtaining
    7  * a copy of this software and associated documentation files (the
    8  * "Software"), to deal in the Software without restriction, including
    9  * without limitation the rights to use, copy, modify, merge, publish,
   10  * distribute, sub license, and/or sell copies of the Software, and to
   11  * permit persons to whom the Software is furnished to do so, subject to
   12  * the following conditions:
   13  *
   14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
   16  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   17  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
   18  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
   20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
   21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
   22  *
   23  * The above copyright notice and this permission notice (including the
   24  * next paragraph) shall be included in all copies or substantial portions
   25  * of the Software.
   26  */
   27 
   28 #include "ac_surface.h"
   29 #include "amd_family.h"
   30 #include "addrlib/src/amdgpu_asic_addr.h"
   31 #include "ac_gpu_info.h"
   32 #include "util/hash_table.h"
   33 #include "util/macros.h"
   34 #include "util/simple_mtx.h"
   35 #include "util/u_atomic.h"
   36 #include "util/u_math.h"
   37 #include "util/u_memory.h"
   38 #include "sid.h"
   39 
   40 #include <errno.h>
   41 #include <stdio.h>
   42 #include <stdlib.h>
   43 #include <amdgpu.h>
   44 #include "drm-uapi/amdgpu_drm.h"
   45 
   46 #include "addrlib/inc/addrinterface.h"
   47 
   48 #ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
   49 #define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
   50 #endif
   51 
   52 #ifndef CIASICIDGFXENGINE_ARCTICISLAND
   53 #define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
   54 #endif
   55 
   56 struct ac_addrlib {
   57     ADDR_HANDLE handle;
   58 
   59     /* The cache of DCC retile maps for reuse when allocating images of
   60      * similar sizes.
   61      */
   62     simple_mtx_t dcc_retile_map_lock;
   63     struct hash_table *dcc_retile_maps;
   64 };
   65 
   66 struct dcc_retile_map_key {
   67     enum radeon_family family;
   68     unsigned retile_width;
   69     unsigned retile_height;
   70     bool rb_aligned;
   71     bool pipe_aligned;
   72     unsigned dcc_retile_num_elements;
   73     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT input;
   74 };
   75 
   76 static uint32_t dcc_retile_map_hash_key(const void *key)
   77 {
   78     return _mesa_hash_data(key, sizeof(struct dcc_retile_map_key));
   79 }
   80 
   81 static bool dcc_retile_map_keys_equal(const void *a, const void *b)
   82 {
   83     return memcmp(a, b, sizeof(struct dcc_retile_map_key)) == 0;
   84 }
   85 
   86 static void dcc_retile_map_free(struct hash_entry *entry)
   87 {
   88     free((void*)entry->key);
   89     free(entry->data);
   90 }
   91 
   92 static uint32_t *ac_compute_dcc_retile_map(struct ac_addrlib *addrlib,
   93                        const struct radeon_info *info,
   94                        unsigned retile_width, unsigned retile_height,
   95                        bool rb_aligned, bool pipe_aligned, bool use_uint16,
   96                        unsigned dcc_retile_num_elements,
   97                        const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT *in)
   98 {
   99     unsigned dcc_retile_map_size = dcc_retile_num_elements * (use_uint16 ? 2 : 4);
  100     struct dcc_retile_map_key key;
  101 
  102     assert(in->numFrags == 1 && in->numSlices == 1 && in->numMipLevels == 1);
  103 
  104     memset(&key, 0, sizeof(key));
  105     key.family = info->family;
  106     key.retile_width = retile_width;
  107     key.retile_height = retile_height;
  108     key.rb_aligned = rb_aligned;
  109     key.pipe_aligned = pipe_aligned;
  110     key.dcc_retile_num_elements = dcc_retile_num_elements;
  111     memcpy(&key.input, in, sizeof(*in));
  112 
  113     simple_mtx_lock(&addrlib->dcc_retile_map_lock);
  114 
  115     /* If we have already computed this retile map, get it from the hash table. */
  116     struct hash_entry *entry = _mesa_hash_table_search(addrlib->dcc_retile_maps, &key);
  117     if (entry) {
  118         uint32_t *map = entry->data;
  119         simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
  120         return map;
  121     }
  122 
  123     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
  124     memcpy(&addrin, in, sizeof(*in));
  125 
  126     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT addrout = {};
  127     addrout.size = sizeof(addrout);
  128 
  129     void *dcc_retile_map = malloc(dcc_retile_map_size);
  130     if (!dcc_retile_map) {
  131         simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
  132         return NULL;
  133     }
  134 
  135     unsigned index = 0;
  136 
  137     for (unsigned y = 0; y < retile_height; y += in->compressBlkHeight) {
  138         addrin.y = y;
  139 
  140         for (unsigned x = 0; x < retile_width; x += in->compressBlkWidth) {
  141             addrin.x = x;
  142 
  143             /* Compute src DCC address */
  144             addrin.dccKeyFlags.pipeAligned = pipe_aligned;
  145             addrin.dccKeyFlags.rbAligned = rb_aligned;
  146             addrout.addr = 0;
  147 
  148             if (Addr2ComputeDccAddrFromCoord(addrlib->handle, &addrin, &addrout) != ADDR_OK) {
  149                 simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
  150                 return NULL;
  151             }
  152 
  153             if (use_uint16)
  154                 ((uint16_t*)dcc_retile_map)[index * 2] = addrout.addr;
  155             else
  156                 ((uint32_t*)dcc_retile_map)[index * 2] = addrout.addr;
  157 
  158             /* Compute dst DCC address */
  159             addrin.dccKeyFlags.pipeAligned = 0;
  160             addrin.dccKeyFlags.rbAligned = 0;
  161             addrout.addr = 0;
  162 
  163             if (Addr2ComputeDccAddrFromCoord(addrlib->handle, &addrin, &addrout) != ADDR_OK) {
  164                 simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
  165                 return NULL;
  166             }
  167 
  168             if (use_uint16)
  169                 ((uint16_t*)dcc_retile_map)[index * 2 + 1] = addrout.addr;
  170             else
  171                 ((uint32_t*)dcc_retile_map)[index * 2 + 1] = addrout.addr;
  172 
  173             assert(index * 2 + 1 < dcc_retile_num_elements);
  174             index++;
  175         }
  176     }
  177     /* Fill the remaining pairs with the last one (for the compute shader). */
  178     for (unsigned i = index * 2; i < dcc_retile_num_elements; i++) {
  179         if (use_uint16)
  180             ((uint16_t*)dcc_retile_map)[i] = ((uint16_t*)dcc_retile_map)[i - 2];
  181         else
  182             ((uint32_t*)dcc_retile_map)[i] = ((uint32_t*)dcc_retile_map)[i - 2];
  183     }
  184 
  185     /* Insert the retile map into the hash table, so that it can be reused and
  186      * the computation can be skipped for similar image sizes.
  187      */
  188     _mesa_hash_table_insert(addrlib->dcc_retile_maps,
  189                 mem_dup(&key, sizeof(key)), dcc_retile_map);
  190 
  191     simple_mtx_unlock(&addrlib->dcc_retile_map_lock);
  192     return dcc_retile_map;
  193 }
  194 
  195 static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
  196 {
  197     return malloc(pInput->sizeInBytes);
  198 }
  199 
  200 static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
  201 {
  202     free(pInput->pVirtAddr);
  203     return ADDR_OK;
  204 }
  205 
  206 struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
  207                      const struct amdgpu_gpu_info *amdinfo,
  208                      uint64_t *max_alignment)
  209 {
  210     ADDR_CREATE_INPUT addrCreateInput = {0};
  211     ADDR_CREATE_OUTPUT addrCreateOutput = {0};
  212     ADDR_REGISTER_VALUE regValue = {0};
  213     ADDR_CREATE_FLAGS createFlags = {{0}};
  214     ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
  215     ADDR_E_RETURNCODE addrRet;
  216 
  217     addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
  218     addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
  219 
  220     regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
  221     createFlags.value = 0;
  222 
  223     addrCreateInput.chipFamily = info->family_id;
  224     addrCreateInput.chipRevision = info->chip_external_rev;
  225 
  226     if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
  227         return NULL;
  228 
  229     if (addrCreateInput.chipFamily >= FAMILY_AI) {
  230         addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
  231     } else {
  232         regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
  233         regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
  234 
  235         regValue.backendDisables = amdinfo->enabled_rb_pipes_mask;
  236         regValue.pTileConfig = amdinfo->gb_tile_mode;
  237         regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
  238         if (addrCreateInput.chipFamily == FAMILY_SI) {
  239             regValue.pMacroTileConfig = NULL;
  240             regValue.noOfMacroEntries = 0;
  241         } else {
  242             regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
  243             regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
  244         }
  245 
  246         createFlags.useTileIndex = 1;
  247         createFlags.useHtileSliceAlign = 1;
  248 
  249         addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
  250     }
  251 
  252     addrCreateInput.callbacks.allocSysMem = allocSysMem;
  253     addrCreateInput.callbacks.freeSysMem = freeSysMem;
  254     addrCreateInput.callbacks.debugPrint = 0;
  255     addrCreateInput.createFlags = createFlags;
  256     addrCreateInput.regValue = regValue;
  257 
  258     addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
  259     if (addrRet != ADDR_OK)
  260         return NULL;
  261 
  262     if (max_alignment) {
  263         addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
  264         if (addrRet == ADDR_OK){
  265             *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
  266         }
  267     }
  268 
  269     struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
  270     if (!addrlib) {
  271         AddrDestroy(addrCreateOutput.hLib);
  272         return NULL;
  273     }
  274 
  275     addrlib->handle = addrCreateOutput.hLib;
  276     simple_mtx_init(&addrlib->dcc_retile_map_lock, mtx_plain);
  277     addrlib->dcc_retile_maps = _mesa_hash_table_create(NULL, dcc_retile_map_hash_key,
  278                                dcc_retile_map_keys_equal);
  279     return addrlib;
  280 }
  281 
  282 void ac_addrlib_destroy(struct ac_addrlib *addrlib)
  283 {
  284     AddrDestroy(addrlib->handle);
  285     simple_mtx_destroy(&addrlib->dcc_retile_map_lock);
  286     _mesa_hash_table_destroy(addrlib->dcc_retile_maps, dcc_retile_map_free);
  287     free(addrlib);
  288 }
  289 
  290 static int surf_config_sanity(const struct ac_surf_config *config,
  291                   unsigned flags)
  292 {
  293     /* FMASK is allocated together with the color surface and can't be
  294      * allocated separately.
  295      */
  296     assert(!(flags & RADEON_SURF_FMASK));
  297     if (flags & RADEON_SURF_FMASK)
  298         return -EINVAL;
  299 
  300     /* all dimension must be at least 1 ! */
  301     if (!config->info.width || !config->info.height || !config->info.depth ||
  302         !config->info.array_size || !config->info.levels)
  303         return -EINVAL;
  304 
  305     switch (config->info.samples) {
  306     case 0:
  307     case 1:
  308     case 2:
  309     case 4:
  310     case 8:
  311         break;
  312     case 16:
  313         if (flags & RADEON_SURF_Z_OR_SBUFFER)
  314             return -EINVAL;
  315         break;
  316     default:
  317         return -EINVAL;
  318     }
  319 
  320     if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
  321         switch (config->info.storage_samples) {
  322         case 0:
  323         case 1:
  324         case 2:
  325         case 4:
  326         case 8:
  327             break;
  328         default:
  329             return -EINVAL;
  330         }
  331     }
  332 
  333     if (config->is_3d && config->info.array_size > 1)
  334         return -EINVAL;
  335     if (config->is_cube && config->info.depth > 1)
  336         return -EINVAL;
  337 
  338     return 0;
  339 }
  340 
  341 static int gfx6_compute_level(ADDR_HANDLE addrlib,
  342                   const struct ac_surf_config *config,
  343                   struct radeon_surf *surf, bool is_stencil,
  344                   unsigned level, bool compressed,
  345                   ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
  346                   ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
  347                   ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
  348                   ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
  349                   ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
  350                   ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
  351 {
  352     struct legacy_surf_level *surf_level;
  353     ADDR_E_RETURNCODE ret;
  354 
  355     AddrSurfInfoIn->mipLevel = level;
  356     AddrSurfInfoIn->width = u_minify(config->info.width, level);
  357     AddrSurfInfoIn->height = u_minify(config->info.height, level);
  358 
  359     /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
  360      * because GFX9 needs linear alignment of 256 bytes.
  361      */
  362     if (config->info.levels == 1 &&
  363         AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
  364         AddrSurfInfoIn->bpp &&
  365         util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
  366         unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
  367 
  368         AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
  369     }
  370 
  371     /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
  372      * true for r32g32b32 formats. */
  373     if (AddrSurfInfoIn->bpp == 96) {
  374         assert(config->info.levels == 1);
  375         assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
  376 
  377         /* The least common multiple of 64 bytes and 12 bytes/pixel is
  378          * 192 bytes, or 16 pixels. */
  379         AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
  380     }
  381 
  382     if (config->is_3d)
  383         AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
  384     else if (config->is_cube)
  385         AddrSurfInfoIn->numSlices = 6;
  386     else
  387         AddrSurfInfoIn->numSlices = config->info.array_size;
  388 
  389     if (level > 0) {
  390         /* Set the base level pitch. This is needed for calculation
  391          * of non-zero levels. */
  392         if (is_stencil)
  393             AddrSurfInfoIn->basePitch = surf->u.legacy.stencil_level[0].nblk_x;
  394         else
  395             AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
  396 
  397         /* Convert blocks to pixels for compressed formats. */
  398         if (compressed)
  399             AddrSurfInfoIn->basePitch *= surf->blk_w;
  400     }
  401 
  402     ret = AddrComputeSurfaceInfo(addrlib,
  403                      AddrSurfInfoIn,
  404                      AddrSurfInfoOut);
  405     if (ret != ADDR_OK) {
  406         return ret;
  407     }
  408 
  409     surf_level = is_stencil ? &surf->u.legacy.stencil_level[level] : &surf->u.legacy.level[level];
  410     surf_level->offset = align64(surf->surf_size, AddrSurfInfoOut->baseAlign);
  411     surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
  412     surf_level->nblk_x = AddrSurfInfoOut->pitch;
  413     surf_level->nblk_y = AddrSurfInfoOut->height;
  414 
  415     switch (AddrSurfInfoOut->tileMode) {
  416     case ADDR_TM_LINEAR_ALIGNED:
  417         surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
  418         break;
  419     case ADDR_TM_1D_TILED_THIN1:
  420         surf_level->mode = RADEON_SURF_MODE_1D;
  421         break;
  422     case ADDR_TM_2D_TILED_THIN1:
  423         surf_level->mode = RADEON_SURF_MODE_2D;
  424         break;
  425     default:
  426         assert(0);
  427     }
  428 
  429     if (is_stencil)
  430         surf->u.legacy.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
  431     else
  432         surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
  433 
  434     surf->surf_size = surf_level->offset + AddrSurfInfoOut->surfSize;
  435 
  436     /* Clear DCC fields at the beginning. */
  437     surf_level->dcc_offset = 0;
  438 
  439     /* The previous level's flag tells us if we can use DCC for this level. */
  440     if (AddrSurfInfoIn->flags.dccCompatible &&
  441         (level == 0 || AddrDccOut->subLvlCompressible)) {
  442         bool prev_level_clearable = level == 0 ||
  443                         AddrDccOut->dccRamSizeAligned;
  444 
  445         AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
  446         AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
  447         AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
  448         AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
  449         AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
  450 
  451         ret = AddrComputeDccInfo(addrlib,
  452                      AddrDccIn,
  453                      AddrDccOut);
  454 
  455         if (ret == ADDR_OK) {
  456             surf_level->dcc_offset = surf->dcc_size;
  457             surf->num_dcc_levels = level + 1;
  458             surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
  459             surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
  460 
  461             /* If the DCC size of a subresource (1 mip level or 1 slice)
  462              * is not aligned, the DCC memory layout is not contiguous for
  463              * that subresource, which means we can't use fast clear.
  464              *
  465              * We only do fast clears for whole mipmap levels. If we did
  466              * per-slice fast clears, the same restriction would apply.
  467              * (i.e. only compute the slice size and see if it's aligned)
  468              *
  469              * The last level can be non-contiguous and still be clearable
  470              * if it's interleaved with the next level that doesn't exist.
  471              */
  472             if (AddrDccOut->dccRamSizeAligned ||
  473                 (prev_level_clearable && level == config->info.levels - 1))
  474                 surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
  475             else
  476                 surf_level->dcc_fast_clear_size = 0;
  477 
  478             /* Compute the DCC slice size because addrlib doesn't
  479              * provide this info. As DCC memory is linear (each
  480              * slice is the same size) it's easy to compute.
  481              */
  482             surf->dcc_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
  483 
  484             /* For arrays, we have to compute the DCC info again
  485              * with one slice size to get a correct fast clear
  486              * size.
  487              */
  488             if (config->info.array_size > 1) {
  489                 AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
  490                 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
  491                 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
  492                 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
  493                 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
  494 
  495                 ret = AddrComputeDccInfo(addrlib,
  496                              AddrDccIn, AddrDccOut);
  497                 if (ret == ADDR_OK) {
  498                     /* If the DCC memory isn't properly
  499                      * aligned, the data are interleaved
  500                      * accross slices.
  501                      */
  502                     if (AddrDccOut->dccRamSizeAligned)
  503                         surf_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
  504                     else
  505                         surf_level->dcc_slice_fast_clear_size = 0;
  506                 }
  507             } else {
  508                 surf_level->dcc_slice_fast_clear_size = surf_level->dcc_fast_clear_size;
  509             }
  510         }
  511     }
  512 
  513     /* HTILE. */
  514     if (!is_stencil &&
  515         AddrSurfInfoIn->flags.depth &&
  516         surf_level->mode == RADEON_SURF_MODE_2D &&
  517         level == 0 &&
  518         !(surf->flags & RADEON_SURF_NO_HTILE)) {
  519         AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
  520         AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
  521         AddrHtileIn->height = AddrSurfInfoOut->height;
  522         AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
  523         AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
  524         AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
  525         AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
  526         AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
  527         AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
  528 
  529         ret = AddrComputeHtileInfo(addrlib,
  530                        AddrHtileIn,
  531                        AddrHtileOut);
  532 
  533         if (ret == ADDR_OK) {
  534             surf->htile_size = AddrHtileOut->htileBytes;
  535             surf->htile_slice_size = AddrHtileOut->sliceSize;
  536             surf->htile_alignment = AddrHtileOut->baseAlign;
  537         }
  538     }
  539 
  540     return 0;
  541 }
  542 
  543 static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
  544                      const struct radeon_info *info)
  545 {
  546     uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
  547 
  548     if (info->chip_class >= GFX7)
  549         surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
  550     else
  551         surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
  552 }
  553 
  554 static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
  555 {
  556     unsigned index, tileb;
  557 
  558     tileb = 8 * 8 * surf->bpe;
  559     tileb = MIN2(surf->u.legacy.tile_split, tileb);
  560 
  561     for (index = 0; tileb > 64; index++)
  562         tileb >>= 1;
  563 
  564     assert(index < 16);
  565     return index;
  566 }
  567 
  568 static bool get_display_flag(const struct ac_surf_config *config,
  569                  const struct radeon_surf *surf)
  570 {
  571     unsigned num_channels = config->info.num_channels;
  572     unsigned bpe = surf->bpe;
  573 
  574     if (!config->is_3d &&
  575         !config->is_cube &&
  576         !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
  577         surf->flags & RADEON_SURF_SCANOUT &&
  578         config->info.samples <= 1 &&
  579         surf->blk_w <= 2 && surf->blk_h == 1) {
  580         /* subsampled */
  581         if (surf->blk_w == 2 && surf->blk_h == 1)
  582             return true;
  583 
  584         if  (/* RGBA8 or RGBA16F */
  585              (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
  586              /* R5G6B5 or R5G5B5A1 */
  587              (bpe == 2 && num_channels >= 3) ||
  588              /* C8 palette */
  589              (bpe == 1 && num_channels == 1))
  590             return true;
  591     }
  592     return false;
  593 }
  594 
  595 /**
  596  * This must be called after the first level is computed.
  597  *
  598  * Copy surface-global settings like pipe/bank config from level 0 surface
  599  * computation, and compute tile swizzle.
  600  */
  601 static int gfx6_surface_settings(ADDR_HANDLE addrlib,
  602                  const struct radeon_info *info,
  603                  const struct ac_surf_config *config,
  604                  ADDR_COMPUTE_SURFACE_INFO_OUTPUT* csio,
  605                  struct radeon_surf *surf)
  606 {
  607     surf->surf_alignment = csio->baseAlign;
  608     surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
  609     gfx6_set_micro_tile_mode(surf, info);
  610 
  611     /* For 2D modes only. */
  612     if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
  613         surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
  614         surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
  615         surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
  616         surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
  617         surf->u.legacy.num_banks = csio->pTileInfo->banks;
  618         surf->u.legacy.macro_tile_index = csio->macroModeIndex;
  619     } else {
  620         surf->u.legacy.macro_tile_index = 0;
  621     }
  622 
  623     /* Compute tile swizzle. */
  624     /* TODO: fix tile swizzle with mipmapping for GFX6 */
  625     if ((info->chip_class >= GFX7 || config->info.levels == 1) &&
  626         config->info.surf_index &&
  627         surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
  628         !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
  629         !get_display_flag(config, surf)) {
  630         ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
  631         ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
  632 
  633         AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
  634         AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
  635 
  636         AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
  637         AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
  638         AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
  639         AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
  640         AddrBaseSwizzleIn.tileMode = csio->tileMode;
  641 
  642         int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn,
  643                            &AddrBaseSwizzleOut);
  644         if (r != ADDR_OK)
  645             return r;
  646 
  647         assert(AddrBaseSwizzleOut.tileSwizzle <=
  648                u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
  649         surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
  650     }
  651     return 0;
  652 }
  653 
  654 static void ac_compute_cmask(const struct radeon_info *info,
  655                  const struct ac_surf_config *config,
  656                  struct radeon_surf *surf)
  657 {
  658     unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
  659     unsigned num_pipes = info->num_tile_pipes;
  660     unsigned cl_width, cl_height;
  661 
  662     if (surf->flags & RADEON_SURF_Z_OR_SBUFFER ||
  663         (config->info.samples >= 2 && !surf->fmask_size))
  664         return;
  665 
  666     assert(info->chip_class <= GFX8);
  667 
  668     switch (num_pipes) {
  669     case 2:
  670         cl_width = 32;
  671         cl_height = 16;
  672         break;
  673     case 4:
  674         cl_width = 32;
  675         cl_height = 32;
  676         break;
  677     case 8:
  678         cl_width = 64;
  679         cl_height = 32;
  680         break;
  681     case 16: /* Hawaii */
  682         cl_width = 64;
  683         cl_height = 64;
  684         break;
  685     default:
  686         assert(0);
  687         return;
  688     }
  689 
  690     unsigned base_align = num_pipes * pipe_interleave_bytes;
  691 
  692     unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width*8);
  693     unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height*8);
  694     unsigned slice_elements = (width * height) / (8*8);
  695 
  696     /* Each element of CMASK is a nibble. */
  697     unsigned slice_bytes = slice_elements / 2;
  698 
  699     surf->u.legacy.cmask_slice_tile_max = (width * height) / (128*128);
  700     if (surf->u.legacy.cmask_slice_tile_max)
  701         surf->u.legacy.cmask_slice_tile_max -= 1;
  702 
  703     unsigned num_layers;
  704     if (config->is_3d)
  705         num_layers = config->info.depth;
  706     else if (config->is_cube)
  707         num_layers = 6;
  708     else
  709         num_layers = config->info.array_size;
  710 
  711     surf->cmask_alignment = MAX2(256, base_align);
  712     surf->cmask_slice_size = align(slice_bytes, base_align);
  713     surf->cmask_size = surf->cmask_slice_size * num_layers;
  714 }
  715 
  716 /**
  717  * Fill in the tiling information in \p surf based on the given surface config.
  718  *
  719  * The following fields of \p surf must be initialized by the caller:
  720  * blk_w, blk_h, bpe, flags.
  721  */
  722 static int gfx6_compute_surface(ADDR_HANDLE addrlib,
  723                 const struct radeon_info *info,
  724                 const struct ac_surf_config *config,
  725                 enum radeon_surf_mode mode,
  726                 struct radeon_surf *surf)
  727 {
  728     unsigned level;
  729     bool compressed;
  730     ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
  731     ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
  732     ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
  733     ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
  734     ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
  735     ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
  736     ADDR_TILEINFO AddrTileInfoIn = {0};
  737     ADDR_TILEINFO AddrTileInfoOut = {0};
  738     int r;
  739 
  740     AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
  741     AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
  742     AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
  743     AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
  744     AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
  745     AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
  746     AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
  747 
  748     compressed = surf->blk_w == 4 && surf->blk_h == 4;
  749 
  750     /* MSAA requires 2D tiling. */
  751     if (config->info.samples > 1)
  752         mode = RADEON_SURF_MODE_2D;
  753 
  754     /* DB doesn't support linear layouts. */
  755     if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
  756         mode < RADEON_SURF_MODE_1D)
  757         mode = RADEON_SURF_MODE_1D;
  758 
  759     /* Set the requested tiling mode. */
  760     switch (mode) {
  761     case RADEON_SURF_MODE_LINEAR_ALIGNED:
  762         AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
  763         break;
  764     case RADEON_SURF_MODE_1D:
  765         AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
  766         break;
  767     case RADEON_SURF_MODE_2D:
  768         AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
  769         break;
  770     default:
  771         assert(0);
  772     }
  773 
  774     /* The format must be set correctly for the allocation of compressed
  775      * textures to work. In other cases, setting the bpp is sufficient.
  776      */
  777     if (compressed) {
  778         switch (surf->bpe) {
  779         case 8:
  780             AddrSurfInfoIn.format = ADDR_FMT_BC1;
  781             break;
  782         case 16:
  783             AddrSurfInfoIn.format = ADDR_FMT_BC3;
  784             break;
  785         default:
  786             assert(0);
  787         }
  788     }
  789     else {
  790         AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
  791     }
  792 
  793     AddrDccIn.numSamples = AddrSurfInfoIn.numSamples =
  794         MAX2(1, config->info.samples);
  795     AddrSurfInfoIn.tileIndex = -1;
  796 
  797     if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
  798         AddrDccIn.numSamples = AddrSurfInfoIn.numFrags =
  799             MAX2(1, config->info.storage_samples);
  800     }
  801 
  802     /* Set the micro tile type. */
  803     if (surf->flags & RADEON_SURF_SCANOUT)
  804         AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
  805     else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
  806         AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
  807     else
  808         AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
  809 
  810     AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
  811     AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
  812     AddrSurfInfoIn.flags.cube = config->is_cube;
  813     AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
  814     AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
  815     AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
  816 
  817     /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
  818      * requested, because TC-compatible HTILE requires 2D tiling.
  819      */
  820     AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
  821                      !AddrSurfInfoIn.flags.fmask &&
  822                      config->info.samples <= 1 &&
  823                      !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
  824 
  825     /* DCC notes:
  826      * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
  827      *   with samples >= 4.
  828      * - Mipmapped array textures have low performance (discovered by a closed
  829      *   driver team).
  830      */
  831     AddrSurfInfoIn.flags.dccCompatible =
  832         info->chip_class >= GFX8 &&
  833         info->has_graphics && /* disable DCC on compute-only chips */
  834         !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
  835         !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
  836         !compressed &&
  837         ((config->info.array_size == 1 && config->info.depth == 1) ||
  838          config->info.levels == 1);
  839 
  840     AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
  841     AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
  842 
  843     /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
  844      * for Z and stencil. This can cause a number of problems which we work
  845      * around here:
  846      *
  847      * - a depth part that is incompatible with mipmapped texturing
  848      * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
  849      *   incorrect tiling applied to the stencil part, stencil buffer
  850      *   memory accesses that go out of bounds) even without mipmapping
  851      *
  852      * Some piglit tests that are prone to different types of related
  853      * failures:
  854      *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
  855      *  ./bin/framebuffer-blit-levels {draw,read} stencil
  856      *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
  857      *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
  858      *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
  859      */
  860     int stencil_tile_idx = -1;
  861 
  862     if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
  863         (config->info.levels > 1 || info->family == CHIP_STONEY)) {
  864         /* Compute stencilTileIdx that is compatible with the (depth)
  865          * tileIdx. This degrades the depth surface if necessary to
  866          * ensure that a matching stencilTileIdx exists. */
  867         AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
  868 
  869         /* Keep the depth mip-tail compatible with texturing. */
  870         AddrSurfInfoIn.flags.noStencil = 1;
  871     }
  872 
  873     /* Set preferred macrotile parameters. This is usually required
  874      * for shared resources. This is for 2D tiling only. */
  875     if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
  876         surf->u.legacy.bankw && surf->u.legacy.bankh &&
  877         surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
  878         /* If any of these parameters are incorrect, the calculation
  879          * will fail. */
  880         AddrTileInfoIn.banks = surf->u.legacy.num_banks;
  881         AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
  882         AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
  883         AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
  884         AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
  885         AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
  886         AddrSurfInfoIn.flags.opt4Space = 0;
  887         AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
  888 
  889         /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
  890          * the tile index, because we are expected to know it if
  891          * we know the other parameters.
  892          *
  893          * This is something that can easily be fixed in Addrlib.
  894          * For now, just figure it out here.
  895          * Note that only 2D_TILE_THIN1 is handled here.
  896          */
  897         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
  898         assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
  899 
  900         if (info->chip_class == GFX6) {
  901             if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
  902                 if (surf->bpe == 2)
  903                     AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
  904                 else
  905                     AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
  906             } else {
  907                 if (surf->bpe == 1)
  908                     AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
  909                 else if (surf->bpe == 2)
  910                     AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
  911                 else if (surf->bpe == 4)
  912                     AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
  913                 else
  914                     AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
  915             }
  916         } else {
  917             /* GFX7 - GFX8 */
  918             if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
  919                 AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
  920             else
  921                 AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
  922 
  923             /* Addrlib doesn't set this if tileIndex is forced like above. */
  924             AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
  925         }
  926     }
  927 
  928     surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
  929     surf->num_dcc_levels = 0;
  930     surf->surf_size = 0;
  931     surf->dcc_size = 0;
  932     surf->dcc_alignment = 1;
  933     surf->htile_size = 0;
  934     surf->htile_slice_size = 0;
  935     surf->htile_alignment = 1;
  936 
  937     const bool only_stencil = (surf->flags & RADEON_SURF_SBUFFER) &&
  938                   !(surf->flags & RADEON_SURF_ZBUFFER);
  939 
  940     /* Calculate texture layout information. */
  941     if (!only_stencil) {
  942         for (level = 0; level < config->info.levels; level++) {
  943             r = gfx6_compute_level(addrlib, config, surf, false, level, compressed,
  944                            &AddrSurfInfoIn, &AddrSurfInfoOut,
  945                            &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
  946             if (r)
  947                 return r;
  948 
  949             if (level > 0)
  950                 continue;
  951 
  952             if (!AddrSurfInfoOut.tcCompatible) {
  953                 AddrSurfInfoIn.flags.tcCompatible = 0;
  954                 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
  955             }
  956 
  957             if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
  958                 AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
  959                 AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
  960                 stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
  961 
  962                 assert(stencil_tile_idx >= 0);
  963             }
  964 
  965             r = gfx6_surface_settings(addrlib, info, config,
  966                           &AddrSurfInfoOut, surf);
  967             if (r)
  968                 return r;
  969         }
  970     }
  971 
  972     /* Calculate texture layout information for stencil. */
  973     if (surf->flags & RADEON_SURF_SBUFFER) {
  974         AddrSurfInfoIn.tileIndex = stencil_tile_idx;
  975         AddrSurfInfoIn.bpp = 8;
  976         AddrSurfInfoIn.flags.depth = 0;
  977         AddrSurfInfoIn.flags.stencil = 1;
  978         AddrSurfInfoIn.flags.tcCompatible = 0;
  979         /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
  980         AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
  981 
  982         for (level = 0; level < config->info.levels; level++) {
  983             r = gfx6_compute_level(addrlib, config, surf, true, level, compressed,
  984                            &AddrSurfInfoIn, &AddrSurfInfoOut,
  985                            &AddrDccIn, &AddrDccOut,
  986                            NULL, NULL);
  987             if (r)
  988                 return r;
  989 
  990             /* DB uses the depth pitch for both stencil and depth. */
  991             if (!only_stencil) {
  992                 if (surf->u.legacy.stencil_level[level].nblk_x !=
  993                     surf->u.legacy.level[level].nblk_x)
  994                     surf->u.legacy.stencil_adjusted = true;
  995             } else {
  996                 surf->u.legacy.level[level].nblk_x =
  997                     surf->u.legacy.stencil_level[level].nblk_x;
  998             }
  999 
 1000             if (level == 0) {
 1001                 if (only_stencil) {
 1002                     r = gfx6_surface_settings(addrlib, info, config,
 1003                                   &AddrSurfInfoOut, surf);
 1004                     if (r)
 1005                         return r;
 1006                 }
 1007 
 1008                 /* For 2D modes only. */
 1009                 if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
 1010                     surf->u.legacy.stencil_tile_split =
 1011                         AddrSurfInfoOut.pTileInfo->tileSplitBytes;
 1012                 }
 1013             }
 1014         }
 1015     }
 1016 
 1017     /* Compute FMASK. */
 1018     if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color &&
 1019         info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
 1020         ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
 1021         ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
 1022         ADDR_TILEINFO fmask_tile_info = {};
 1023 
 1024         fin.size = sizeof(fin);
 1025         fout.size = sizeof(fout);
 1026 
 1027         fin.tileMode = AddrSurfInfoOut.tileMode;
 1028         fin.pitch = AddrSurfInfoOut.pitch;
 1029         fin.height = config->info.height;
 1030         fin.numSlices = AddrSurfInfoIn.numSlices;
 1031         fin.numSamples = AddrSurfInfoIn.numSamples;
 1032         fin.numFrags = AddrSurfInfoIn.numFrags;
 1033         fin.tileIndex = -1;
 1034         fout.pTileInfo = &fmask_tile_info;
 1035 
 1036         r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
 1037         if (r)
 1038             return r;
 1039 
 1040         surf->fmask_size = fout.fmaskBytes;
 1041         surf->fmask_alignment = fout.baseAlign;
 1042         surf->fmask_tile_swizzle = 0;
 1043 
 1044         surf->u.legacy.fmask.slice_tile_max =
 1045             (fout.pitch * fout.height) / 64;
 1046         if (surf->u.legacy.fmask.slice_tile_max)
 1047             surf->u.legacy.fmask.slice_tile_max -= 1;
 1048 
 1049         surf->u.legacy.fmask.tiling_index = fout.tileIndex;
 1050         surf->u.legacy.fmask.bankh = fout.pTileInfo->bankHeight;
 1051         surf->u.legacy.fmask.pitch_in_pixels = fout.pitch;
 1052         surf->u.legacy.fmask.slice_size = fout.sliceSize;
 1053 
 1054         /* Compute tile swizzle for FMASK. */
 1055         if (config->info.fmask_surf_index &&
 1056             !(surf->flags & RADEON_SURF_SHAREABLE)) {
 1057             ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
 1058             ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
 1059 
 1060             xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
 1061             xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
 1062 
 1063             /* This counter starts from 1 instead of 0. */
 1064             xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
 1065             xin.tileIndex = fout.tileIndex;
 1066             xin.macroModeIndex = fout.macroModeIndex;
 1067             xin.pTileInfo = fout.pTileInfo;
 1068             xin.tileMode = fin.tileMode;
 1069 
 1070             int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
 1071             if (r != ADDR_OK)
 1072                 return r;
 1073 
 1074             assert(xout.tileSwizzle <=
 1075                    u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
 1076             surf->fmask_tile_swizzle = xout.tileSwizzle;
 1077         }
 1078     }
 1079 
 1080     /* Recalculate the whole DCC miptree size including disabled levels.
 1081      * This is what addrlib does, but calling addrlib would be a lot more
 1082      * complicated.
 1083      */
 1084     if (surf->dcc_size && config->info.levels > 1) {
 1085         /* The smallest miplevels that are never compressed by DCC
 1086          * still read the DCC buffer via TC if the base level uses DCC,
 1087          * and for some reason the DCC buffer needs to be larger if
 1088          * the miptree uses non-zero tile_swizzle. Otherwise there are
 1089          * VM faults.
 1090          *
 1091          * "dcc_alignment * 4" was determined by trial and error.
 1092          */
 1093         surf->dcc_size = align64(surf->surf_size >> 8,
 1094                      surf->dcc_alignment * 4);
 1095     }
 1096 
 1097     /* Make sure HTILE covers the whole miptree, because the shader reads
 1098      * TC-compatible HTILE even for levels where it's disabled by DB.
 1099      */
 1100     if (surf->htile_size && config->info.levels > 1 &&
 1101         surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
 1102         /* MSAA can't occur with levels > 1, so ignore the sample count. */
 1103         const unsigned total_pixels = surf->surf_size / surf->bpe;
 1104         const unsigned htile_block_size = 8 * 8;
 1105         const unsigned htile_element_size = 4;
 1106 
 1107         surf->htile_size = (total_pixels / htile_block_size) *
 1108                    htile_element_size;
 1109         surf->htile_size = align(surf->htile_size, surf->htile_alignment);
 1110     }
 1111 
 1112     surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
 1113     surf->is_displayable = surf->is_linear ||
 1114                    surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
 1115                    surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
 1116 
 1117     /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
 1118      * used at the same time. This case is not currently expected to occur
 1119      * because we don't use rotated. Enforce this restriction on all chips
 1120      * to facilitate testing.
 1121      */
 1122     if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
 1123         assert(!"rotate micro tile mode is unsupported");
 1124         return ADDR_ERROR;
 1125     }
 1126 
 1127     ac_compute_cmask(info, config, surf);
 1128     return 0;
 1129 }
 1130 
 1131 /* This is only called when expecting a tiled layout. */
 1132 static int
 1133 gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib,
 1134                 struct radeon_surf *surf,
 1135                 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in,
 1136                 bool is_fmask, AddrSwizzleMode *swizzle_mode)
 1137 {
 1138     ADDR_E_RETURNCODE ret;
 1139     ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
 1140     ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
 1141 
 1142     sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
 1143     sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
 1144 
 1145     sin.flags = in->flags;
 1146     sin.resourceType = in->resourceType;
 1147     sin.format = in->format;
 1148     sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
 1149     /* TODO: We could allow some of these: */
 1150     sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
 1151     sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */
 1152     sin.bpp = in->bpp;
 1153     sin.width = in->width;
 1154     sin.height = in->height;
 1155     sin.numSlices = in->numSlices;
 1156     sin.numMipLevels = in->numMipLevels;
 1157     sin.numSamples = in->numSamples;
 1158     sin.numFrags = in->numFrags;
 1159 
 1160     if (is_fmask) {
 1161         sin.flags.display = 0;
 1162         sin.flags.color = 0;
 1163         sin.flags.fmask = 1;
 1164     }
 1165 
 1166     if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
 1167         sin.forbiddenBlock.linear = 1;
 1168 
 1169         if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
 1170             sin.preferredSwSet.sw_D = 1;
 1171         else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
 1172             sin.preferredSwSet.sw_S = 1;
 1173         else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
 1174             sin.preferredSwSet.sw_Z = 1;
 1175         else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
 1176             sin.preferredSwSet.sw_R = 1;
 1177     }
 1178 
 1179     ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
 1180     if (ret != ADDR_OK)
 1181         return ret;
 1182 
 1183     *swizzle_mode = sout.swizzleMode;
 1184     return 0;
 1185 }
 1186 
 1187 static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
 1188 {
 1189     if (info->chip_class >= GFX10)
 1190         return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
 1191 
 1192     return sw_mode != ADDR_SW_LINEAR;
 1193 }
 1194 
 1195 ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
 1196                         const struct radeon_surf *surf)
 1197 {
 1198     if (info->chip_class <= GFX9) {
 1199         /* Only independent 64B blocks are supported. */
 1200         return surf->u.gfx9.dcc.independent_64B_blocks &&
 1201                !surf->u.gfx9.dcc.independent_128B_blocks &&
 1202                surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
 1203     }
 1204 
 1205     if (info->family == CHIP_NAVI10) {
 1206         /* Only independent 128B blocks are supported. */
 1207         return !surf->u.gfx9.dcc.independent_64B_blocks &&
 1208                surf->u.gfx9.dcc.independent_128B_blocks &&
 1209                surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
 1210     }
 1211 
 1212     if (info->family == CHIP_NAVI12 ||
 1213         info->family == CHIP_NAVI14) {
 1214         /* Either 64B or 128B can be used, but not both.
 1215          * If 64B is used, DCC image stores are unsupported.
 1216          */
 1217         return surf->u.gfx9.dcc.independent_64B_blocks !=
 1218                surf->u.gfx9.dcc.independent_128B_blocks &&
 1219                (!surf->u.gfx9.dcc.independent_64B_blocks ||
 1220             surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&
 1221                (!surf->u.gfx9.dcc.independent_128B_blocks ||
 1222             surf->u.gfx9.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);
 1223     }
 1224 
 1225     unreachable("unhandled chip");
 1226     return false;
 1227 }
 1228 
 1229 static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
 1230                     const struct ac_surf_config *config,
 1231                     const struct radeon_surf *surf,
 1232                     bool rb_aligned, bool pipe_aligned)
 1233 {
 1234     if (!info->use_display_dcc_unaligned &&
 1235         !info->use_display_dcc_with_retile_blit)
 1236         return false;
 1237 
 1238     /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
 1239     if (surf->bpe != 4)
 1240         return false;
 1241 
 1242     /* Handle unaligned DCC. */
 1243     if (info->use_display_dcc_unaligned &&
 1244         (rb_aligned || pipe_aligned))
 1245         return false;
 1246 
 1247     switch (info->chip_class) {
 1248     case GFX9:
 1249         /* There are more constraints, but we always set
 1250          * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
 1251          * which always works.
 1252          */
 1253         assert(surf->u.gfx9.dcc.independent_64B_blocks &&
 1254                surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
 1255         return true;
 1256     case GFX10:
 1257         /* DCN requires INDEPENDENT_128B_BLOCKS = 0.
 1258          * For 4K, it also requires INDEPENDENT_64B_BLOCKS = 1.
 1259          */
 1260         return !surf->u.gfx9.dcc.independent_128B_blocks &&
 1261                ((config->info.width <= 2560 &&
 1262              config->info.height <= 2560) ||
 1263             (surf->u.gfx9.dcc.independent_64B_blocks &&
 1264              surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
 1265     default:
 1266         unreachable("unhandled chip");
 1267         return false;
 1268     }
 1269 }
 1270 
 1271 static int gfx9_compute_miptree(struct ac_addrlib *addrlib,
 1272                 const struct radeon_info *info,
 1273                 const struct ac_surf_config *config,
 1274                 struct radeon_surf *surf, bool compressed,
 1275                 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
 1276 {
 1277     ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {};
 1278     ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
 1279     ADDR_E_RETURNCODE ret;
 1280 
 1281     out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
 1282     out.pMipInfo = mip_info;
 1283 
 1284     ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
 1285     if (ret != ADDR_OK)
 1286         return ret;
 1287 
 1288     if (in->flags.stencil) {
 1289         surf->u.gfx9.stencil.swizzle_mode = in->swizzleMode;
 1290         surf->u.gfx9.stencil.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
 1291                                    out.mipChainPitch - 1;
 1292         surf->surf_alignment = MAX2(surf->surf_alignment, out.baseAlign);
 1293         surf->u.gfx9.stencil_offset = align(surf->surf_size, out.baseAlign);
 1294         surf->surf_size = surf->u.gfx9.stencil_offset + out.surfSize;
 1295         return 0;
 1296     }
 1297 
 1298     surf->u.gfx9.surf.swizzle_mode = in->swizzleMode;
 1299     surf->u.gfx9.surf.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 :
 1300                             out.mipChainPitch - 1;
 1301 
 1302     /* CMASK fast clear uses these even if FMASK isn't allocated.
 1303      * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
 1304      */
 1305     surf->u.gfx9.fmask.swizzle_mode = surf->u.gfx9.surf.swizzle_mode & ~0x3;
 1306     surf->u.gfx9.fmask.epitch = surf->u.gfx9.surf.epitch;
 1307 
 1308     surf->u.gfx9.surf_slice_size = out.sliceSize;
 1309     surf->u.gfx9.surf_pitch = out.pitch;
 1310     surf->u.gfx9.surf_height = out.height;
 1311     surf->surf_size = out.surfSize;
 1312     surf->surf_alignment = out.baseAlign;
 1313 
 1314     if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
 1315         surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR) {
 1316         /* Adjust surf_pitch to be in elements units not in pixels */
 1317         surf->u.gfx9.surf_pitch =
 1318             align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
 1319         surf->u.gfx9.surf.epitch = MAX2(surf->u.gfx9.surf.epitch,
 1320                         surf->u.gfx9.surf_pitch * surf->blk_w - 1);
 1321         /* The surface is really a surf->bpe bytes per pixel surface even if we
 1322          * use it as a surf->bpe bytes per element one.
 1323          * Adjust surf_slice_size and surf_size to reflect the change
 1324          * made to surf_pitch.
 1325          */
 1326         surf->u.gfx9.surf_slice_size = MAX2(
 1327             surf->u.gfx9.surf_slice_size,
 1328             surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
 1329         surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
 1330     }
 1331 
 1332     if (in->swizzleMode == ADDR_SW_LINEAR) {
 1333         for (unsigned i = 0; i < in->numMipLevels; i++) {
 1334             surf->u.gfx9.offset[i] = mip_info[i].offset;
 1335             surf->u.gfx9.pitch[i] = mip_info[i].pitch;
 1336         }
 1337     }
 1338 
 1339     if (in->flags.depth) {
 1340         assert(in->swizzleMode != ADDR_SW_LINEAR);
 1341 
 1342         if (surf->flags & RADEON_SURF_NO_HTILE)
 1343             return 0;
 1344 
 1345         /* HTILE */
 1346         ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
 1347         ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
 1348 
 1349         hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
 1350         hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
 1351 
 1352         hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned;
 1353         hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned;
 1354         hin.depthFlags = in->flags;
 1355         hin.swizzleMode = in->swizzleMode;
 1356         hin.unalignedWidth = in->width;
 1357         hin.unalignedHeight = in->height;
 1358         hin.numSlices = in->numSlices;
 1359         hin.numMipLevels = in->numMipLevels;
 1360         hin.firstMipIdInTail = out.firstMipIdInTail;
 1361 
 1362         ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
 1363         if (ret != ADDR_OK)
 1364             return ret;
 1365 
 1366         surf->u.gfx9.htile.rb_aligned = hin.hTileFlags.rbAligned;
 1367         surf->u.gfx9.htile.pipe_aligned = hin.hTileFlags.pipeAligned;
 1368         surf->htile_size = hout.htileBytes;
 1369         surf->htile_slice_size = hout.sliceSize;
 1370         surf->htile_alignment = hout.baseAlign;
 1371         return 0;
 1372     }
 1373 
 1374     {
 1375         /* Compute tile swizzle for the color surface.
 1376          * All *_X and *_T modes can use the swizzle.
 1377          */
 1378         if (config->info.surf_index &&
 1379             in->swizzleMode >= ADDR_SW_64KB_Z_T &&
 1380             !out.mipChainInTail &&
 1381             !(surf->flags & RADEON_SURF_SHAREABLE) &&
 1382             !in->flags.display) {
 1383             ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
 1384             ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
 1385 
 1386             xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
 1387             xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
 1388 
 1389             xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
 1390             xin.flags = in->flags;
 1391             xin.swizzleMode = in->swizzleMode;
 1392             xin.resourceType = in->resourceType;
 1393             xin.format = in->format;
 1394             xin.numSamples = in->numSamples;
 1395             xin.numFrags = in->numFrags;
 1396 
 1397             ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
 1398             if (ret != ADDR_OK)
 1399                 return ret;
 1400 
 1401             assert(xout.pipeBankXor <=
 1402                    u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
 1403             surf->tile_swizzle = xout.pipeBankXor;
 1404         }
 1405 
 1406         /* DCC */
 1407         if (info->has_graphics &&
 1408             !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
 1409             !compressed &&
 1410             is_dcc_supported_by_CB(info, in->swizzleMode) &&
 1411             (!in->flags.display ||
 1412              is_dcc_supported_by_DCN(info, config, surf,
 1413                          !in->flags.metaRbUnaligned,
 1414                          !in->flags.metaPipeUnaligned))) {
 1415             ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
 1416             ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
 1417             ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
 1418 
 1419             din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
 1420             dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
 1421             dout.pMipInfo = meta_mip_info;
 1422 
 1423             din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
 1424             din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
 1425             din.colorFlags = in->flags;
 1426             din.resourceType = in->resourceType;
 1427             din.swizzleMode = in->swizzleMode;
 1428             din.bpp = in->bpp;
 1429             din.unalignedWidth = in->width;
 1430             din.unalignedHeight = in->height;
 1431             din.numSlices = in->numSlices;
 1432             din.numFrags = in->numFrags;
 1433             din.numMipLevels = in->numMipLevels;
 1434             din.dataSurfaceSize = out.surfSize;
 1435             din.firstMipIdInTail = out.firstMipIdInTail;
 1436 
 1437             ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
 1438             if (ret != ADDR_OK)
 1439                 return ret;
 1440 
 1441             surf->u.gfx9.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
 1442             surf->u.gfx9.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
 1443             surf->dcc_size = dout.dccRamSize;
 1444             surf->dcc_alignment = dout.dccRamBaseAlign;
 1445             surf->num_dcc_levels = in->numMipLevels;
 1446 
 1447             /* Disable DCC for levels that are in the mip tail.
 1448              *
 1449              * There are two issues that this is intended to
 1450              * address:
 1451              *
 1452              * 1. Multiple mip levels may share a cache line. This
 1453              *    can lead to corruption when switching between
 1454              *    rendering to different mip levels because the
 1455              *    RBs don't maintain coherency.
 1456              *
 1457              * 2. Texturing with metadata after rendering sometimes
 1458              *    fails with corruption, probably for a similar
 1459              *    reason.
 1460              *
 1461              * Working around these issues for all levels in the
 1462              * mip tail may be overly conservative, but it's what
 1463              * Vulkan does.
 1464              *
 1465              * Alternative solutions that also work but are worse:
 1466              * - Disable DCC entirely.
 1467              * - Flush TC L2 after rendering.
 1468              */
 1469             for (unsigned i = 0; i < in->numMipLevels; i++) {
 1470                 if (meta_mip_info[i].inMiptail) {
 1471                     surf->num_dcc_levels = i;
 1472                     break;
 1473                 }
 1474             }
 1475 
 1476             if (!surf->num_dcc_levels)
 1477                 surf->dcc_size = 0;
 1478 
 1479             surf->u.gfx9.display_dcc_size = surf->dcc_size;
 1480             surf->u.gfx9.display_dcc_alignment = surf->dcc_alignment;
 1481             surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
 1482 
 1483             /* Compute displayable DCC. */
 1484             if (in->flags.display &&
 1485                 surf->num_dcc_levels &&
 1486                 info->use_display_dcc_with_retile_blit) {
 1487                 /* Compute displayable DCC info. */
 1488                 din.dccKeyFlags.pipeAligned = 0;
 1489                 din.dccKeyFlags.rbAligned = 0;
 1490 
 1491                 assert(din.numSlices == 1);
 1492                 assert(din.numMipLevels == 1);
 1493                 assert(din.numFrags == 1);
 1494                 assert(surf->tile_swizzle == 0);
 1495                 assert(surf->u.gfx9.dcc.pipe_aligned ||
 1496                        surf->u.gfx9.dcc.rb_aligned);
 1497 
 1498                 ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
 1499                 if (ret != ADDR_OK)
 1500                     return ret;
 1501 
 1502                 surf->u.gfx9.display_dcc_size = dout.dccRamSize;
 1503                 surf->u.gfx9.display_dcc_alignment = dout.dccRamBaseAlign;
 1504                 surf->u.gfx9.display_dcc_pitch_max = dout.pitch - 1;
 1505                 assert(surf->u.gfx9.display_dcc_size <= surf->dcc_size);
 1506 
 1507                 surf->u.gfx9.dcc_retile_use_uint16 =
 1508                     surf->u.gfx9.display_dcc_size <= UINT16_MAX + 1 &&
 1509                     surf->dcc_size <= UINT16_MAX + 1;
 1510 
 1511                 /* Align the retile map size to get more hash table hits and
 1512                  * decrease the maximum memory footprint when all retile maps
 1513                  * are cached in the hash table.
 1514                  */
 1515                 unsigned retile_dim[2] = {in->width, in->height};
 1516 
 1517                 for (unsigned i = 0; i < 2; i++) {
 1518                     /* Increase the alignment as the size increases.
 1519                      * Greater alignment increases retile compute work,
 1520                      * but decreases maximum memory footprint for the cache.
 1521                      *
 1522                      * With this alignment, the worst case memory footprint of
 1523                      * the cache is:
 1524                      *   1920x1080: 55 MB
 1525                      *   2560x1440: 99 MB
 1526                      *   3840x2160: 305 MB
 1527                      *
 1528                      * The worst case size in MB can be computed in Haskell as follows:
 1529                      *   (sum (map get_retile_size (map get_dcc_size (deduplicate (map align_pair
 1530                      *       [(i*16,j*16) | i <- [1..maxwidth`div`16], j <- [1..maxheight`div`16]]))))) `div` 1024^2
 1531                      *     where
 1532                      *       alignment x = if x <= 512 then 16 else if x <= 1024 then 32 else if x <= 2048 then 64 else 128
 1533                      *       align x = (x + (alignment x) - 1) `div` (alignment x) * (alignment x)
 1534                      *       align_pair e = (align (fst e), align (snd e))
 1535                      *       deduplicate = map head . groupBy (\ a b -> ((fst a) == (fst b)) && ((snd a) == (snd b))) . sortBy compare
 1536                      *       get_dcc_size e = ((fst e) * (snd e) * bpp) `div` 256
 1537                      *       get_retile_size dcc_size = dcc_size * 2 * (if dcc_size <= 2^16 then 2 else 4)
 1538                      *       bpp = 4; maxwidth = 3840; maxheight = 2160
 1539                      */
 1540                     if (retile_dim[i] <= 512)
 1541                         retile_dim[i] = align(retile_dim[i], 16);
 1542                     else if (retile_dim[i] <= 1024)
 1543                         retile_dim[i] = align(retile_dim[i], 32);
 1544                     else if (retile_dim[i] <= 2048)
 1545                         retile_dim[i] = align(retile_dim[i], 64);
 1546                     else
 1547                         retile_dim[i] = align(retile_dim[i], 128);
 1548 
 1549                     /* Don't align more than the DCC pixel alignment. */
 1550                     assert(dout.metaBlkWidth >= 128 && dout.metaBlkHeight >= 128);
 1551                 }
 1552 
 1553                 surf->u.gfx9.dcc_retile_num_elements =
 1554                     DIV_ROUND_UP(retile_dim[0], dout.compressBlkWidth) *
 1555                     DIV_ROUND_UP(retile_dim[1], dout.compressBlkHeight) * 2;
 1556                 /* Align the size to 4 (for the compute shader). */
 1557                 surf->u.gfx9.dcc_retile_num_elements =
 1558                     align(surf->u.gfx9.dcc_retile_num_elements, 4);
 1559 
 1560                 if (!(surf->flags & RADEON_SURF_IMPORTED)) {
 1561                     /* Compute address mapping from non-displayable to displayable DCC. */
 1562                     ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT addrin;
 1563                     memset(&addrin, 0, sizeof(addrin));
 1564                     addrin.size             = sizeof(addrin);
 1565                     addrin.swizzleMode      = din.swizzleMode;
 1566                     addrin.resourceType     = din.resourceType;
 1567                     addrin.bpp              = din.bpp;
 1568                     addrin.numSlices        = 1;
 1569                     addrin.numMipLevels     = 1;
 1570                     addrin.numFrags         = 1;
 1571                     addrin.pitch            = dout.pitch;
 1572                     addrin.height           = dout.height;
 1573                     addrin.compressBlkWidth = dout.compressBlkWidth;
 1574                     addrin.compressBlkHeight = dout.compressBlkHeight;
 1575                     addrin.compressBlkDepth = dout.compressBlkDepth;
 1576                     addrin.metaBlkWidth     = dout.metaBlkWidth;
 1577                     addrin.metaBlkHeight    = dout.metaBlkHeight;
 1578                     addrin.metaBlkDepth     = dout.metaBlkDepth;
 1579                     addrin.dccRamSliceSize  = 0; /* Don't care for non-layered images. */
 1580 
 1581                     surf->u.gfx9.dcc_retile_map =
 1582                         ac_compute_dcc_retile_map(addrlib, info,
 1583                                       retile_dim[0], retile_dim[1],
 1584                                       surf->u.gfx9.dcc.rb_aligned,
 1585                                       surf->u.gfx9.dcc.pipe_aligned,
 1586                                       surf->u.gfx9.dcc_retile_use_uint16,
 1587                                       surf->u.gfx9.dcc_retile_num_elements,
 1588                                       &addrin);
 1589                     if (!surf->u.gfx9.dcc_retile_map)
 1590                         return ADDR_OUTOFMEMORY;
 1591                 }
 1592             }
 1593         }
 1594 
 1595         /* FMASK */
 1596         if (in->numSamples > 1 && info->has_graphics &&
 1597             !(surf->flags & RADEON_SURF_NO_FMASK)) {
 1598             ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
 1599             ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
 1600 
 1601             fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
 1602             fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
 1603 
 1604             ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, in,
 1605                                   true, &fin.swizzleMode);
 1606             if (ret != ADDR_OK)
 1607                 return ret;
 1608 
 1609             fin.unalignedWidth = in->width;
 1610             fin.unalignedHeight = in->height;
 1611             fin.numSlices = in->numSlices;
 1612             fin.numSamples = in->numSamples;
 1613             fin.numFrags = in->numFrags;
 1614 
 1615             ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
 1616             if (ret != ADDR_OK)
 1617                 return ret;
 1618 
 1619             surf->u.gfx9.fmask.swizzle_mode = fin.swizzleMode;
 1620             surf->u.gfx9.fmask.epitch = fout.pitch - 1;
 1621             surf->fmask_size = fout.fmaskBytes;
 1622             surf->fmask_alignment = fout.baseAlign;
 1623 
 1624             /* Compute tile swizzle for the FMASK surface. */
 1625             if (config->info.fmask_surf_index &&
 1626                 fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
 1627                 !(surf->flags & RADEON_SURF_SHAREABLE)) {
 1628                 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
 1629                 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
 1630 
 1631                 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
 1632                 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
 1633 
 1634                 /* This counter starts from 1 instead of 0. */
 1635                 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
 1636                 xin.flags = in->flags;
 1637                 xin.swizzleMode = fin.swizzleMode;
 1638                 xin.resourceType = in->resourceType;
 1639                 xin.format = in->format;
 1640                 xin.numSamples = in->numSamples;
 1641                 xin.numFrags = in->numFrags;
 1642 
 1643                 ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
 1644                 if (ret != ADDR_OK)
 1645                     return ret;
 1646 
 1647                 assert(xout.pipeBankXor <=
 1648                        u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
 1649                 surf->fmask_tile_swizzle = xout.pipeBankXor;
 1650             }
 1651         }
 1652 
 1653         /* CMASK -- on GFX10 only for FMASK */
 1654         if (in->swizzleMode != ADDR_SW_LINEAR &&
 1655             in->resourceType == ADDR_RSRC_TEX_2D &&
 1656             ((info->chip_class <= GFX9 && in->numSamples == 1) ||
 1657              (surf->fmask_size && in->numSamples >= 2))) {
 1658             ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
 1659             ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
 1660 
 1661             cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
 1662             cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
 1663 
 1664             if (in->numSamples > 1) {
 1665                 /* FMASK is always aligned. */
 1666                 cin.cMaskFlags.pipeAligned = 1;
 1667                 cin.cMaskFlags.rbAligned = 1;
 1668             } else {
 1669                 cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned;
 1670                 cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned;
 1671             }
 1672             cin.colorFlags = in->flags;
 1673             cin.resourceType = in->resourceType;
 1674             cin.unalignedWidth = in->width;
 1675             cin.unalignedHeight = in->height;
 1676             cin.numSlices = in->numSlices;
 1677 
 1678             if (in->numSamples > 1)
 1679                 cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode;
 1680             else
 1681                 cin.swizzleMode = in->swizzleMode;
 1682 
 1683             ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
 1684             if (ret != ADDR_OK)
 1685                 return ret;
 1686 
 1687             surf->u.gfx9.cmask.rb_aligned = cin.cMaskFlags.rbAligned;
 1688             surf->u.gfx9.cmask.pipe_aligned = cin.cMaskFlags.pipeAligned;
 1689             surf->cmask_size = cout.cmaskBytes;
 1690             surf->cmask_alignment = cout.baseAlign;
 1691         }
 1692     }
 1693 
 1694     return 0;
 1695 }
 1696 
 1697 static int gfx9_compute_surface(struct ac_addrlib *addrlib,
 1698                 const struct radeon_info *info,
 1699                 const struct ac_surf_config *config,
 1700                 enum radeon_surf_mode mode,
 1701                 struct radeon_surf *surf)
 1702 {
 1703     bool compressed;
 1704     ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
 1705     int r;
 1706 
 1707     AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
 1708 
 1709     compressed = surf->blk_w == 4 && surf->blk_h == 4;
 1710 
 1711     /* The format must be set correctly for the allocation of compressed
 1712      * textures to work. In other cases, setting the bpp is sufficient. */
 1713     if (compressed) {
 1714         switch (surf->bpe) {
 1715         case 8:
 1716             AddrSurfInfoIn.format = ADDR_FMT_BC1;
 1717             break;
 1718         case 16:
 1719             AddrSurfInfoIn.format = ADDR_FMT_BC3;
 1720             break;
 1721         default:
 1722             assert(0);
 1723         }
 1724     } else {
 1725         switch (surf->bpe) {
 1726         case 1:
 1727             assert(!(surf->flags & RADEON_SURF_ZBUFFER));
 1728             AddrSurfInfoIn.format = ADDR_FMT_8;
 1729             break;
 1730         case 2:
 1731             assert(surf->flags & RADEON_SURF_ZBUFFER ||
 1732                    !(surf->flags & RADEON_SURF_SBUFFER));
 1733             AddrSurfInfoIn.format = ADDR_FMT_16;
 1734             break;
 1735         case 4:
 1736             assert(surf->flags & RADEON_SURF_ZBUFFER ||
 1737                    !(surf->flags & RADEON_SURF_SBUFFER));
 1738             AddrSurfInfoIn.format = ADDR_FMT_32;
 1739             break;
 1740         case 8:
 1741             assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
 1742             AddrSurfInfoIn.format = ADDR_FMT_32_32;
 1743             break;
 1744         case 12:
 1745             assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
 1746             AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
 1747             break;
 1748         case 16:
 1749             assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
 1750             AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
 1751             break;
 1752         default:
 1753             assert(0);
 1754         }
 1755         AddrSurfInfoIn.bpp = surf->bpe * 8;
 1756     }
 1757 
 1758     bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
 1759     AddrSurfInfoIn.flags.color = is_color_surface &&
 1760                                  !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
 1761     AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
 1762     AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
 1763     /* flags.texture currently refers to TC-compatible HTILE */
 1764     AddrSurfInfoIn.flags.texture = is_color_surface ||
 1765                        surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
 1766     AddrSurfInfoIn.flags.opt4space = 1;
 1767 
 1768     AddrSurfInfoIn.numMipLevels = config->info.levels;
 1769     AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
 1770     AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
 1771 
 1772     if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
 1773         AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
 1774 
 1775     /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
 1776      * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
 1777      * must sample 1D textures as 2D. */
 1778     if (config->is_3d)
 1779         AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
 1780     else if (info->chip_class != GFX9 && config->is_1d)
 1781         AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
 1782     else
 1783         AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
 1784 
 1785     AddrSurfInfoIn.width = config->info.width;
 1786     AddrSurfInfoIn.height = config->info.height;
 1787 
 1788     if (config->is_3d)
 1789         AddrSurfInfoIn.numSlices = config->info.depth;
 1790     else if (config->is_cube)
 1791         AddrSurfInfoIn.numSlices = 6;
 1792     else
 1793         AddrSurfInfoIn.numSlices = config->info.array_size;
 1794 
 1795     /* This is propagated to HTILE/DCC/CMASK. */
 1796     AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
 1797     AddrSurfInfoIn.flags.metaRbUnaligned = 0;
 1798 
 1799     /* Optimal values for the L2 cache. */
 1800     if (info->chip_class == GFX9) {
 1801         surf->u.gfx9.dcc.independent_64B_blocks = 1;
 1802         surf->u.gfx9.dcc.independent_128B_blocks = 0;
 1803         surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
 1804     } else if (info->chip_class >= GFX10) {
 1805         surf->u.gfx9.dcc.independent_64B_blocks = 0;
 1806         surf->u.gfx9.dcc.independent_128B_blocks = 1;
 1807         surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
 1808     }
 1809 
 1810     if (AddrSurfInfoIn.flags.display) {
 1811         /* The display hardware can only read DCC with RB_ALIGNED=0 and
 1812          * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
 1813          *
 1814          * The CB block requires RB_ALIGNED=1 except 1 RB chips.
 1815          * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
 1816          * after rendering, so PIPE_ALIGNED=1 is recommended.
 1817          */
 1818         if (info->use_display_dcc_unaligned) {
 1819             AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
 1820             AddrSurfInfoIn.flags.metaRbUnaligned = 1;
 1821         }
 1822 
 1823         /* Adjust DCC settings to meet DCN requirements. */
 1824         if (info->use_display_dcc_unaligned ||
 1825             info->use_display_dcc_with_retile_blit) {
 1826             /* Only Navi12/14 support independent 64B blocks in L2,
 1827              * but without DCC image stores.
 1828              */
 1829             if (info->family == CHIP_NAVI12 ||
 1830                 info->family == CHIP_NAVI14) {
 1831                 surf->u.gfx9.dcc.independent_64B_blocks = 1;
 1832                 surf->u.gfx9.dcc.independent_128B_blocks = 0;
 1833                 surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
 1834             }
 1835         }
 1836     }
 1837 
 1838     switch (mode) {
 1839     case RADEON_SURF_MODE_LINEAR_ALIGNED:
 1840         assert(config->info.samples <= 1);
 1841         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
 1842         AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
 1843         break;
 1844 
 1845     case RADEON_SURF_MODE_1D:
 1846     case RADEON_SURF_MODE_2D:
 1847         if (surf->flags & RADEON_SURF_IMPORTED ||
 1848                     (info->chip_class >= GFX10 &&
 1849                      surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
 1850             AddrSurfInfoIn.swizzleMode = surf->u.gfx9.surf.swizzle_mode;
 1851             break;
 1852         }
 1853 
 1854         r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn,
 1855                             false, &AddrSurfInfoIn.swizzleMode);
 1856         if (r)
 1857             return r;
 1858         break;
 1859 
 1860     default:
 1861         assert(0);
 1862     }
 1863 
 1864     surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
 1865     surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
 1866 
 1867     surf->num_dcc_levels = 0;
 1868     surf->surf_size = 0;
 1869     surf->fmask_size = 0;
 1870     surf->dcc_size = 0;
 1871     surf->htile_size = 0;
 1872     surf->htile_slice_size = 0;
 1873     surf->u.gfx9.surf_offset = 0;
 1874     surf->u.gfx9.stencil_offset = 0;
 1875     surf->cmask_size = 0;
 1876     surf->u.gfx9.dcc_retile_use_uint16 = false;
 1877     surf->u.gfx9.dcc_retile_num_elements = 0;
 1878     surf->u.gfx9.dcc_retile_map = NULL;
 1879 
 1880     /* Calculate texture layout information. */
 1881     r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
 1882                  &AddrSurfInfoIn);
 1883     if (r)
 1884         return r;
 1885 
 1886     /* Calculate texture layout information for stencil. */
 1887     if (surf->flags & RADEON_SURF_SBUFFER) {
 1888         AddrSurfInfoIn.flags.stencil = 1;
 1889         AddrSurfInfoIn.bpp = 8;
 1890         AddrSurfInfoIn.format = ADDR_FMT_8;
 1891 
 1892         if (!AddrSurfInfoIn.flags.depth) {
 1893             r = gfx9_get_preferred_swizzle_mode(addrlib->handle, surf, &AddrSurfInfoIn,
 1894                                 false, &AddrSurfInfoIn.swizzleMode);
 1895             if (r)
 1896                 return r;
 1897         } else
 1898             AddrSurfInfoIn.flags.depth = 0;
 1899 
 1900         r = gfx9_compute_miptree(addrlib, info, config, surf, compressed,
 1901                      &AddrSurfInfoIn);
 1902         if (r)
 1903             return r;
 1904     }
 1905 
 1906     surf->is_linear = surf->u.gfx9.surf.swizzle_mode == ADDR_SW_LINEAR;
 1907 
 1908     /* Query whether the surface is displayable. */
 1909     /* This is only useful for surfaces that are allocated without SCANOUT. */
 1910     bool displayable = false;
 1911     if (!config->is_3d && !config->is_cube) {
 1912         r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.surf.swizzle_mode,
 1913                            surf->bpe * 8, &displayable);
 1914         if (r)
 1915             return r;
 1916 
 1917         /* Display needs unaligned DCC. */
 1918         if (surf->num_dcc_levels &&
 1919             !is_dcc_supported_by_DCN(info, config, surf,
 1920                          surf->u.gfx9.dcc.rb_aligned,
 1921                          surf->u.gfx9.dcc.pipe_aligned))
 1922             displayable = false;
 1923     }
 1924     surf->is_displayable = displayable;
 1925 
 1926     /* Validate that we allocated a displayable surface if requested. */
 1927     assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
 1928 
 1929     /* Validate that DCC is set up correctly. */
 1930     if (surf->num_dcc_levels) {
 1931         assert(is_dcc_supported_by_L2(info, surf));
 1932         if (AddrSurfInfoIn.flags.color)
 1933             assert(is_dcc_supported_by_CB(info, surf->u.gfx9.surf.swizzle_mode));
 1934         if (AddrSurfInfoIn.flags.display) {
 1935             assert(is_dcc_supported_by_DCN(info, config, surf,
 1936                                surf->u.gfx9.dcc.rb_aligned,
 1937                                surf->u.gfx9.dcc.pipe_aligned));
 1938         }
 1939     }
 1940 
 1941     if (info->has_graphics &&
 1942         !compressed &&
 1943         !config->is_3d &&
 1944         config->info.levels == 1 &&
 1945         AddrSurfInfoIn.flags.color &&
 1946         !surf->is_linear &&
 1947         surf->surf_alignment >= 64 * 1024 && /* 64KB tiling */
 1948         !(surf->flags & (RADEON_SURF_DISABLE_DCC |
 1949                  RADEON_SURF_FORCE_SWIZZLE_MODE |
 1950                  RADEON_SURF_FORCE_MICRO_TILE_MODE))) {
 1951         /* Validate that DCC is enabled if DCN can do it. */
 1952         if ((info->use_display_dcc_unaligned ||
 1953              info->use_display_dcc_with_retile_blit) &&
 1954             AddrSurfInfoIn.flags.display &&
 1955             surf->bpe == 4) {
 1956             assert(surf->num_dcc_levels);
 1957         }
 1958 
 1959         /* Validate that non-scanout DCC is always enabled. */
 1960         if (!AddrSurfInfoIn.flags.display)
 1961             assert(surf->num_dcc_levels);
 1962     }
 1963 
 1964     switch (surf->u.gfx9.surf.swizzle_mode) {
 1965         /* S = standard. */
 1966         case ADDR_SW_256B_S:
 1967         case ADDR_SW_4KB_S:
 1968         case ADDR_SW_64KB_S:
 1969         case ADDR_SW_64KB_S_T:
 1970         case ADDR_SW_4KB_S_X:
 1971         case ADDR_SW_64KB_S_X:
 1972             surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
 1973             break;
 1974 
 1975         /* D = display. */
 1976         case ADDR_SW_LINEAR:
 1977         case ADDR_SW_256B_D:
 1978         case ADDR_SW_4KB_D:
 1979         case ADDR_SW_64KB_D:
 1980         case ADDR_SW_64KB_D_T:
 1981         case ADDR_SW_4KB_D_X:
 1982         case ADDR_SW_64KB_D_X:
 1983             surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
 1984             break;
 1985 
 1986         /* R = rotated (gfx9), render target (gfx10). */
 1987         case ADDR_SW_256B_R:
 1988         case ADDR_SW_4KB_R:
 1989         case ADDR_SW_64KB_R:
 1990         case ADDR_SW_64KB_R_T:
 1991         case ADDR_SW_4KB_R_X:
 1992         case ADDR_SW_64KB_R_X:
 1993         case ADDR_SW_VAR_R_X:
 1994             /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
 1995              * used at the same time. We currently do not use rotated
 1996              * in gfx9.
 1997              */
 1998             assert(info->chip_class >= GFX10 ||
 1999                    !"rotate micro tile mode is unsupported");
 2000             surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
 2001             break;
 2002 
 2003         /* Z = depth. */
 2004         case ADDR_SW_4KB_Z:
 2005         case ADDR_SW_64KB_Z:
 2006         case ADDR_SW_64KB_Z_T:
 2007         case ADDR_SW_4KB_Z_X:
 2008         case ADDR_SW_64KB_Z_X:
 2009         case ADDR_SW_VAR_Z_X:
 2010             surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
 2011             break;
 2012 
 2013         default:
 2014             assert(0);
 2015     }
 2016 
 2017     return 0;
 2018 }
 2019 
 2020 int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
 2021                const struct ac_surf_config *config,
 2022                enum radeon_surf_mode mode,
 2023                struct radeon_surf *surf)
 2024 {
 2025     int r;
 2026 
 2027     r = surf_config_sanity(config, surf->flags);
 2028     if (r)
 2029         return r;
 2030 
 2031     if (info->chip_class >= GFX9)
 2032         r = gfx9_compute_surface(addrlib, info, config, mode, surf);
 2033     else
 2034         r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
 2035 
 2036     if (r)
 2037         return r;
 2038 
 2039     /* Determine the memory layout of multiple allocations in one buffer. */
 2040     surf->total_size = surf->surf_size;
 2041 
 2042     if (surf->htile_size) {
 2043         surf->htile_offset = align64(surf->total_size, surf->htile_alignment);
 2044         surf->total_size = surf->htile_offset + surf->htile_size;
 2045     }
 2046 
 2047     if (surf->fmask_size) {
 2048         assert(config->info.samples >= 2);
 2049         surf->fmask_offset = align64(surf->total_size, surf->fmask_alignment);
 2050         surf->total_size = surf->fmask_offset + surf->fmask_size;
 2051     }
 2052 
 2053     /* Single-sample CMASK is in a separate buffer. */
 2054     if (surf->cmask_size && config->info.samples >= 2) {
 2055         surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
 2056         surf->total_size = surf->cmask_offset + surf->cmask_size;
 2057     }
 2058 
 2059     if (surf->is_displayable)
 2060         surf->flags |= RADEON_SURF_SCANOUT;
 2061 
 2062     if (surf->dcc_size &&
 2063         /* dcc_size is computed on GFX9+ only if it's displayable. */
 2064         (info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
 2065         /* It's better when displayable DCC is immediately after
 2066          * the image due to hw-specific reasons.
 2067          */
 2068         if (info->chip_class >= GFX9 &&
 2069             surf->u.gfx9.dcc_retile_num_elements) {
 2070             /* Add space for the displayable DCC buffer. */
 2071             surf->display_dcc_offset =
 2072                 align64(surf->total_size, surf->u.gfx9.display_dcc_alignment);
 2073             surf->total_size = surf->display_dcc_offset +
 2074                        surf->u.gfx9.display_dcc_size;
 2075 
 2076             /* Add space for the DCC retile buffer. (16-bit or 32-bit elements) */
 2077             surf->dcc_retile_map_offset =
 2078                 align64(surf->total_size, info->tcc_cache_line_size);
 2079 
 2080             if (surf->u.gfx9.dcc_retile_use_uint16) {
 2081                 surf->total_size = surf->dcc_retile_map_offset +
 2082                            surf->u.gfx9.dcc_retile_num_elements * 2;
 2083             } else {
 2084                 surf->total_size = surf->dcc_retile_map_offset +
 2085                            surf->u.gfx9.dcc_retile_num_elements * 4;
 2086             }
 2087         }
 2088 
 2089         surf->dcc_offset = align64(surf->total_size, surf->dcc_alignment);
 2090         surf->total_size = surf->dcc_offset + surf->dcc_size;
 2091     }
 2092 
 2093     return 0;
 2094 }